framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,window_size,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,float16,0,8.484858830769857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,fp8,0,8.395306905110678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,1,64,128,1,fp8,fp8,0,10.977802276611328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,2,64,128,1,float16,float16,0,8.587765375773111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,2,64,128,1,float16,fp8,0,8.530314763387045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,2,64,128,1,fp8,fp8,0,11.09988784790039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,float16,0,8.662805557250977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,float16,0,57.39753723144531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,1,64,0,1,fp8,fp8,0,51.64247131347656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,fp8,0,57.41568501790365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,fp8,0,8.534144083658854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,4,64,128,1,fp8,fp8,0,11.133188883463541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,2,64,0,1,float16,float16,0,57.11286417643229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,float16,0,8.666410446166992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,2,64,0,1,float16,fp8,0,57.64348856608073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,2,64,0,1,fp8,fp8,0,51.539449055989586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,fp8,0,8.605541229248047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,8,64,128,1,fp8,fp8,0,11.120372772216797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,float16,0,4.811013221740723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,fp8,0,4.780111948649089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,float16,0,58.2540283203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,96,64,128,1,fp8,fp8,0,6.204426447550456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,4,64,0,1,fp8,fp8,0,52.03538513183594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,fp8,0,57.40426127115885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,float16,0,28.88585154215495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,float16,0,4.306069374084473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,float16,0,57.78606160481771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,fp8,0,4.264464060465495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,64,128,1,fp8,fp8,0,5.556671778361003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,fp8,0,57.777862548828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,fp8,0,29.498138427734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,64,128,1,float16,float16,0,4.349743843078613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,8,64,0,1,fp8,fp8,0,51.596394856770836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,96,64,0,1,fp8,fp8,0,26.4986089070638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,64,128,1,float16,fp8,0,4.340517361958821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,float16,0,28.695823669433594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,64,128,1,fp8,fp8,0,5.631093343098958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,float16,0,4.383690516153972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,64,0,1,fp8,fp8,0,25.81292724609375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,fp8,0,28.582361857096355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,fp8,0,4.295061429341634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,64,128,1,fp8,fp8,0,5.678997039794922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,float16,0,4.375781377156575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,64,0,1,float16,float16,0,28.713022867838543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,fp8,0,4.315818786621094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,64,0,1,fp8,fp8,0,25.904645284016926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,64,0,1,float16,fp8,0,28.71449025472005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,64,128,1,fp8,fp8,0,5.712064107259114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,float16,0,28.763470967610676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,float16,0,2.498821258544922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,64,0,1,fp8,fp8,0,25.925702412923176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,fp8,0,2.5293386777242026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,fp8,0,28.34826151529948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,64,128,1,fp8,fp8,0,3.2294880549112954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,float16,0,29.21800994873047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,float16,0,14.820964813232422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,float16,0,2.315999984741211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,fp8,0,28.994405110677082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,64,0,1,fp8,fp8,0,25.94775390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,fp8,0,14.751541137695312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,64,0,1,fp8,fp8,0,13.53546142578125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,fp8,0,2.2885546684265137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,64,128,1,fp8,fp8,0,2.9408534367879233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,float16,0,14.441791534423828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,64,128,1,float16,float16,0,2.327669302622477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,fp8,0,14.332480112711588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,64,128,1,float16,fp8,0,2.3006399472554526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,64,128,1,fp8,fp8,0,2.9649387995402017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,64,0,1,fp8,fp8,0,13.113749186197916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,64,0,1,float16,float16,0,14.569727579752604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,float16,0,2.331077257792155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,64,0,1,float16,fp8,0,14.432197570800781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,fp8,0,2.2980480194091797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,64,0,1,fp8,fp8,0,13.124746958414713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,64,128,1,fp8,fp8,0,2.968597412109375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,float16,0,14.356698354085287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,float16,0,2.3345386187235513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,fp8,0,14.511002858479818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,fp8,0,2.306111971537272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,float16,0,1.622554620107015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,64,128,1,fp8,fp8,0,2.974735895792643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,fp8,0,1.6144107182820637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,64,0,1,fp8,fp8,0,13.125199635823568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,64,128,1,fp8,fp8,0,1.932378609975179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,float16,0,7.751925150553386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,float16,0,14.576175689697266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,fp8,0,7.789013544718425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,64,0,1,fp8,fp8,0,7.109077453613281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,64,0,1,fp8,fp8,0,13.135743459065756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,fp8,0,14.64190928141276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,float16,0,1.6184906959533691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,fp8,0,1.6144107182820637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,64,128,1,fp8,fp8,0,1.9308640162150066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,float16,0,7.730560302734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,64,128,1,float16,float16,0,1.6142826080322266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,64,0,1,fp8,fp8,0,7.086106618245442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,fp8,0,7.790224075317383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,64,128,1,float16,fp8,0,1.6183573404947917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,64,128,1,fp8,fp8,0,1.920090675354004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,64,0,1,float16,float16,0,7.728576024373372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,64,0,1,float16,fp8,0,7.783626556396484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,64,0,1,fp8,fp8,0,7.036949157714844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,float16,0,1.6147146224975586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,fp8,0,1.6186025937398274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,64,128,1,fp8,fp8,0,1.9314506848653157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,float16,0,7.734064102172852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,fp8,0,7.7259947458903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,64,0,1,fp8,fp8,0,7.096597035725911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,float16,0,1.6205013593037922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,fp8,0,1.615514596303304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,float16,0,7.733845392862956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,64,128,1,fp8,fp8,0,1.9287892977396648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,fp8,0,7.78335444132487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,64,0,1,fp8,fp8,0,7.062858581542969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,float16,0,6.327754974365234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,fp8,0,6.259429295857747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,64,128,1,fp8,fp8,0,8.19160016377767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,64,128,1,float16,float16,0,6.458954493204753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,64,128,1,float16,fp8,0,6.353872299194336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,64,128,1,fp8,fp8,0,8.322229385375977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,float16,0,34.55213928222656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,float16,0,6.450389226277669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,fp8,0,6.414992014567058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,fp8,0,34.57055409749349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,64,0,1,fp8,fp8,0,29.947535196940105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,64,128,1,fp8,fp8,0,8.29252815246582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,64,0,1,float16,float16,0,34.8279774983724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,64,0,1,float16,fp8,0,35.060132344563804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,float16,0,6.506053288777669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,64,0,1,fp8,fp8,0,30.05310821533203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,fp8,0,6.4545440673828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,64,128,1,fp8,fp8,0,8.387077331542969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,float16,0,35.307604471842446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,float16,0,3.5389601389567056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,fp8,0,3.5776853561401367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,64,128,1,fp8,fp8,0,4.64027722676595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,64,0,1,fp8,fp8,0,30.054468790690105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,fp8,0,35.049914042154946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,float16,0,3.2195841471354165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,float16,0,16.89557393391927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,float16,0,35.984212239583336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,fp8,0,3.159173329671224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,64,128,1,fp8,fp8,0,4.149642626444499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,64,0,1,fp8,fp8,0,30.10997772216797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,fp8,0,17.31468327840169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,fp8,0,35.1328379313151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,64,0,1,fp8,fp8,0,15.675210316975912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,64,128,1,float16,float16,0,3.2325865427652993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,float16,0,16.83037821451823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,64,128,1,float16,fp8,0,3.1922613779703775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,fp8,0,16.760026295979817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,64,0,1,fp8,fp8,0,15.043888092041016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,64,128,1,fp8,fp8,0,4.190133412679036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,float16,0,3.2515573501586914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,64,0,1,float16,float16,0,16.709557851155598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,fp8,0,3.1825974782307944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,64,128,1,fp8,fp8,0,4.177034695943196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,64,0,1,fp8,fp8,0,15.079269409179688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,float16,0,3.2844212849934897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,64,0,1,float16,fp8,0,16.66003163655599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,fp8,0,3.196885426839193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,float16,0,16.79198964436849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,64,128,1,fp8,fp8,0,4.176266670227051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,64,0,1,fp8,fp8,0,15.118831634521484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,fp8,0,16.52008056640625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,float16,0,1.856005350748698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,float16,0,16.91104507446289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,fp8,0,16.554283142089844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,fp8,0,1.8891359965006511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,float16,0,8.738576253255209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,64,0,1,fp8,fp8,0,15.231552124023438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,64,128,1,fp8,fp8,0,2.4242773056030273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,fp8,0,8.6636962890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,64,0,1,fp8,fp8,0,7.9081064860026045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,float16,0,1.740928014119466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,fp8,0,1.7160852750142415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,64,128,1,fp8,fp8,0,2.222928047180176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,float16,0,8.518106460571289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,fp8,0,8.433290481567383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,64,128,1,float16,float16,0,1.7498985926310222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,64,0,1,fp8,fp8,0,7.693983713785808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,64,0,1,float16,float16,0,8.467157363891602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,64,128,1,float16,fp8,0,1.7149386405944824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,64,128,1,fp8,fp8,0,2.2151519457499185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,64,0,1,float16,fp8,0,8.564863840738932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,float16,0,1.7466346422831218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,fp8,0,1.7269919713338215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,64,0,1,fp8,fp8,0,7.738485336303711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,64,128,1,fp8,fp8,0,2.2284266153971353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,float16,0,1.7758132616678874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,float16,0,8.532938639322916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,fp8,0,1.7348747253417969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,64,128,1,fp8,fp8,0,2.237818717956543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,64,0,1,fp8,fp8,0,7.656821568806966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,fp8,0,8.454986572265625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,float16,0,1.2256320317586262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,float16,0,8.471765518188477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,fp8,0,1.2216320037841797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,64,128,1,fp8,fp8,0,1.4469812711079915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,fp8,0,8.441845575968424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,64,0,1,fp8,fp8,0,7.716992060343425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,float16,0,4.625103950500488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,fp8,0,4.611093203226726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,64,0,1,fp8,fp8,0,4.231631914774577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,float16,0,1.2167786757151287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,float16,0,4.610074679056804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,fp8,0,1.218000014623006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,64,128,1,fp8,fp8,0,1.4575039545694988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,fp8,0,4.637263933817546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,64,128,1,float16,float16,0,1.2165760199228923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,64,0,1,fp8,fp8,0,4.220063845316569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,64,0,1,float16,float16,0,4.6047414143880205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,64,128,1,float16,fp8,0,1.218058665593465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,64,128,1,fp8,fp8,0,1.456650733947754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,64,0,1,float16,fp8,0,4.553258577982585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,64,0,1,fp8,fp8,0,4.242442766825358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,float16,0,1.221354643503825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,float16,0,4.609642664591472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,fp8,0,1.221226692199707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,64,128,1,fp8,fp8,0,1.455407937367757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,fp8,0,4.603082656860352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,64,0,1,fp8,fp8,0,4.231520016988118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,float16,0,1.2177813053131104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,float16,0,4.612895965576172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,fp8,0,1.2216853300730388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,64,128,1,fp8,fp8,0,1.4503040313720703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,64,0,1,fp8,fp8,0,4.24780813852946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,fp8,0,4.587802569071452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,float16,0,5.237658818562825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,fp8,0,5.143802642822266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,64,128,1,fp8,fp8,0,6.824506759643555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,64,128,1,float16,float16,0,5.341429392496745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,64,128,1,float16,fp8,0,5.253696123758952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,float16,0,23.53497568766276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,64,128,1,fp8,fp8,0,6.899141311645508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,64,0,1,fp8,fp8,0,21.3516362508138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,float16,0,5.339199701944987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,fp8,0,23.469706217447918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,64,0,1,float16,float16,0,23.563369750976562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,fp8,0,5.2805226643880205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,64,0,1,fp8,fp8,0,21.449803670247395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,64,128,1,fp8,fp8,0,6.8827253977457685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,64,0,1,float16,fp8,0,24.071487426757812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,float16,0,23.872833251953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,float16,0,5.361818949381511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,fp8,0,5.322490692138672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,fp8,0,23.669748942057293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,64,0,1,fp8,fp8,0,21.459259033203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,float16,0,23.498026529947918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,64,128,1,fp8,fp8,0,6.9295304616292315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,float16,0,2.9414453506469727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,float16,0,12.242421468098959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,fp8,0,2.9402879079182944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,fp8,0,24.131296793619793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,64,0,1,fp8,fp8,0,21.494527180989582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,64,128,1,fp8,fp8,0,3.8428481419881186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,fp8,0,12.0447146097819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,float16,0,2.697493235270182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,64,0,1,fp8,fp8,0,11.181573232014975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,fp8,0,2.6377013524373374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,float16,0,11.832389831542969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,64,128,1,fp8,fp8,0,3.4687201182047525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,64,128,1,float16,float16,0,2.690458615620931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,fp8,0,11.80172856648763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,64,0,1,fp8,fp8,0,10.748560587565104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,64,128,1,float16,fp8,0,2.6428640683492026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,64,128,1,fp8,fp8,0,3.4807840983072915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,64,0,1,float16,float16,0,11.836981455485025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,float16,0,2.700394630432129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,fp8,0,2.6611146926879883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,64,128,1,fp8,fp8,0,3.466032028198242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,64,0,1,float16,fp8,0,11.775381724039713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,float16,0,2.707834561665853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,64,0,1,fp8,fp8,0,10.790949503580729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,float16,0,11.83730697631836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,fp8,0,2.6588853200276694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,64,0,1,fp8,fp8,0,10.773775736490885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,fp8,0,11.80942408243815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,64,128,1,fp8,fp8,0,3.50437863667806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,float16,0,11.936912536621094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,float16,0,1.5570665995279949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,64,0,1,fp8,fp8,0,10.818767547607422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,fp8,0,11.798314412434896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,fp8,0,1.575445334116618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,float16,0,6.24941889444987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,64,128,1,fp8,fp8,0,2.024336020151774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,fp8,0,6.196277618408203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,float16,0,1.460800011952718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,64,0,1,fp8,fp8,0,5.724677403767903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,fp8,0,1.4312480290730794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,float16,0,6.126213073730469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,64,128,1,fp8,fp8,0,1.844805399576823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,64,0,1,fp8,fp8,0,5.559194564819336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,fp8,0,6.062815984090169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,64,128,1,float16,float16,0,1.4617865880330403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,64,0,1,float16,float16,0,6.106570561726888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,64,128,1,float16,fp8,0,1.4403947194417317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,64,128,1,fp8,fp8,0,1.8503999710083008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,64,0,1,float16,fp8,0,6.126181284586589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,64,0,1,fp8,fp8,0,5.517322540283203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,float16,0,1.4601066907246907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,fp8,0,1.4429012934366863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,float16,0,6.069040298461914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,64,128,1,fp8,fp8,0,1.8533813158671062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,float16,0,1.462480068206787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,64,0,1,fp8,fp8,0,5.490447998046875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,fp8,0,6.0393117268880205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,fp8,0,1.4487253824869792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,float16,0,6.070117314656575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,64,128,1,fp8,fp8,0,1.8719093004862468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,fp8,0,6.045141220092773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,float16,0,1.0216426849365234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,64,0,1,fp8,fp8,0,5.57366943359375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,fp8,0,1.022063970565796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,64,128,1,fp8,fp8,0,1.2204533418019612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,float16,0,3.2943201065063477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,float16,0,1.0177600383758545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,fp8,0,3.316608111063639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,64,0,1,fp8,fp8,0,3.080479939778646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,fp8,0,1.020906686782837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,64,128,1,fp8,fp8,0,1.2193653583526611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,float16,0,3.3429012298583984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,64,128,1,float16,float16,0,1.0175413290659587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,64,0,1,fp8,fp8,0,3.0696051915486655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,fp8,0,3.345973332722982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,64,128,1,float16,fp8,0,1.01800537109375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,64,128,1,fp8,fp8,0,1.2177226543426514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,64,0,1,float16,float16,0,3.3236160278320312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,64,0,1,float16,fp8,0,3.3439947764078775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,float16,0,1.0171573162078857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,64,0,1,fp8,fp8,0,3.090655962626139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,float16,0,3.3149760564168296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,fp8,0,1.0217759609222412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,64,128,1,fp8,fp8,0,1.2189599672953289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,fp8,0,3.3414719899495444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,64,0,1,fp8,fp8,0,3.0705385208129883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,float16,0,1.0218613147735596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,float16,0,3.320197423299154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,fp8,0,1.022602637608846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,64,128,1,fp8,fp8,0,1.215663989384969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,fp8,0,3.3461599349975586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,64,0,1,fp8,fp8,0,3.092111905415853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,float16,0,8.374490737915039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,fp8,0,8.255989074707031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,1,64,128,1,fp8,fp8,0,10.714271545410156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,2,64,128,1,float16,float16,0,8.477466583251953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,float16,0,31.475972493489582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,2,64,128,1,float16,fp8,0,8.435696283976236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,1,64,0,1,fp8,fp8,0,28.28942362467448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,2,64,128,1,fp8,fp8,0,10.866682688395182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,fp8,0,32.07099151611328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,float16,0,8.512085596720377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,2,64,0,1,float16,float16,0,32.05784606933594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,fp8,0,8.393840154012045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,2,64,0,1,fp8,fp8,0,28.392283121744793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,2,64,0,1,float16,fp8,0,32.758750915527344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,4,64,128,1,fp8,fp8,0,10.896415710449219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,float16,0,8.548234939575195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,fp8,0,8.459519704182943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,float16,0,31.466720581054688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,8,64,128,1,fp8,fp8,0,10.99569574991862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,float16,0,4.699114799499512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,4,64,0,1,fp8,fp8,0,28.499659220377605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,fp8,0,31.994517008463543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,fp8,0,4.639034589131673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,96,64,128,1,fp8,fp8,0,6.021749496459961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,float16,0,31.64122772216797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,float16,0,16.197546641031902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,float16,0,4.146186510721843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,fp8,0,32.293871561686196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,fp8,0,4.101855913798015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,8,64,0,1,fp8,fp8,0,28.459083557128906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,64,128,1,fp8,fp8,0,5.447439829508464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,fp8,0,16.274171193440754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,96,64,0,1,fp8,fp8,0,14.848052978515625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,float16,0,15.537291208902994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,64,128,1,float16,float16,0,4.181290626525879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,fp8,0,15.668277740478516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,64,0,1,fp8,fp8,0,14.176469167073568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,64,128,1,float16,fp8,0,4.155029296875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,64,128,1,fp8,fp8,0,5.484661102294922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,64,0,1,float16,float16,0,15.622688293457031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,float16,0,4.196624120076497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,fp8,0,4.169626553853353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,64,0,1,fp8,fp8,0,14.237055460611979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,64,0,1,float16,fp8,0,15.79697036743164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,64,128,1,fp8,fp8,0,5.503967920939128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,float16,0,15.801600138346354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,float16,0,4.227295875549316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,fp8,0,15.677482604980469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,fp8,0,4.185999870300293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,64,0,1,fp8,fp8,0,14.251834869384766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,64,128,1,fp8,fp8,0,5.544090906778972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,float16,0,15.803354899088541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,fp8,0,15.580608367919922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,float16,0,2.3345866203308105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,64,0,1,fp8,fp8,0,14.284250895182291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,fp8,0,2.3613972663879395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,float16,0,8.104949315388998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,64,128,1,fp8,fp8,0,3.043269475301107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,float16,0,2.1592532793680825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,fp8,0,2.109989325205485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,fp8,0,8.204085032145182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,64,0,1,fp8,fp8,0,7.473050435384114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,64,128,1,fp8,fp8,0,2.7744693756103516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,float16,0,7.821695963541667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,fp8,0,7.891978581746419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,64,0,1,fp8,fp8,0,7.16374397277832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,64,128,1,float16,float16,0,2.1534293492635093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,64,128,1,float16,fp8,0,2.1172213554382324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,64,128,1,fp8,fp8,0,2.782682736714681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,float16,0,2.158991972605387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,fp8,0,2.1191412607828775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,64,0,1,float16,float16,0,7.966501235961914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,64,128,1,fp8,fp8,0,2.77509339650472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,64,0,1,fp8,fp8,0,7.133994420369466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,64,0,1,float16,fp8,0,7.8359629313151045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,float16,0,7.882799784342448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,64,0,1,fp8,fp8,0,7.241903940836589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,float16,0,2.1631360054016113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,fp8,0,7.908202489217122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,fp8,0,2.142437299092611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,float16,0,7.984266916910808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,64,128,1,fp8,fp8,0,2.7914772033691406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,float16,0,1.2472106615702312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,fp8,0,7.853514353434245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,fp8,0,1.2660480340321858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,64,0,1,fp8,fp8,0,7.24124272664388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,float16,0,4.098645210266113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,64,128,1,fp8,fp8,0,1.6267040570576985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,64,0,1,fp8,fp8,0,3.853717486063639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,fp8,0,4.175296147664388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,float16,0,1.1704479853312175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,fp8,0,1.156394640604655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,64,128,1,fp8,fp8,0,1.490575949350993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,float16,0,4.103343963623047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,fp8,0,4.045674641927083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,64,128,1,float16,float16,0,1.1705333391825359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,64,0,1,fp8,fp8,0,3.7349812189737954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,64,128,1,float16,fp8,0,1.153322696685791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,64,0,1,float16,float16,0,4.14961592356364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,64,128,1,fp8,fp8,0,1.4933973948160808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,float16,0,1.1722773710886638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,64,0,1,fp8,fp8,0,3.7261387507120767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,64,0,1,float16,fp8,0,4.030672073364258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,float16,0,4.023263931274414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,fp8,0,1.1554453372955322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,64,128,1,fp8,fp8,0,1.4953385988871257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,fp8,0,4.038746515909831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,float16,0,1.1749866803487141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,64,0,1,fp8,fp8,0,3.712592124938965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,float16,0,4.057632128397624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,fp8,0,1.1639520327250164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,64,128,1,fp8,fp8,0,1.4970134099324544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,fp8,0,4.061936060587565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,float16,0,0.8202079931894938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,64,0,1,fp8,fp8,0,3.7209866841634116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,float16,0,2.2609333992004395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,fp8,0,0.8233706951141357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,64,128,1,fp8,fp8,0,0.9838079611460367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,fp8,0,2.274709383646647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,64,0,1,fp8,fp8,0,2.1067147254943848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,float16,0,0.8193279902140299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,float16,0,2.261338710784912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,fp8,0,0.8201013406117758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,64,128,1,fp8,fp8,0,0.9780693054199219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,fp8,0,2.2798293431599936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,64,128,1,float16,float16,0,0.8217493693033854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,64,0,1,fp8,fp8,0,2.111738681793213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,64,128,1,float16,fp8,0,0.8200213114420573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,64,0,1,float16,float16,0,2.2685546875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,64,128,1,fp8,fp8,0,0.9779679775238037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,64,0,1,float16,fp8,0,2.2653120358784995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,64,0,1,fp8,fp8,0,2.09934933980306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,float16,0,0.8227466742197672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,float16,0,2.278432051340739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,fp8,0,0.8216799894968668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,64,128,1,fp8,fp8,0,0.9794346491495768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,fp8,0,2.2668587366739907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,64,0,1,fp8,fp8,0,2.100048065185547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,float16,0,0.820192019144694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,fp8,0,0.8230186303456625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,float16,0,2.265775998433431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,64,128,1,fp8,fp8,0,0.9823626677195231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,fp8,0,2.2816480000813804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,64,0,1,fp8,fp8,0,2.100927988688151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,float16,0,6.201375961303711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,fp8,0,6.107119878133138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,1,64,128,1,fp8,fp8,0,8.061808268229166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,float16,0,18.830181121826172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,2,64,128,1,float16,float16,0,6.360240300496419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,2,64,128,1,float16,fp8,0,6.2750504811604815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,1,64,0,1,fp8,fp8,0,16.87548828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,fp8,0,18.394400278727215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,2,64,128,1,fp8,fp8,0,8.191418965657553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,2,64,0,1,float16,float16,0,18.96679941813151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,float16,0,6.392592112223308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,2,64,0,1,float16,fp8,0,18.753130594889324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,2,64,0,1,fp8,fp8,0,16.98797353108724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,fp8,0,6.321098963419597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,4,64,128,1,fp8,fp8,0,8.162256240844727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,float16,0,6.415791829427083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,float16,0,18.656475067138672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,fp8,0,18.650330861409504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,fp8,0,6.329231897989909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,4,64,0,1,fp8,fp8,0,17.045525868733723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,8,64,128,1,fp8,fp8,0,8.197317123413086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,float16,0,3.4762293497721353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,float16,0,18.783370971679688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,fp8,0,3.437018712361654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,64,128,1,fp8,fp8,0,4.505615870157878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,float16,0,9.687013626098633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,8,64,0,1,fp8,fp8,0,17.05691146850586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,fp8,0,18.601776123046875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,fp8,0,9.677679697672525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,float16,0,3.115472157796224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,fp8,0,3.048992156982422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,64,0,1,fp8,fp8,0,8.99452273050944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,64,128,1,fp8,fp8,0,4.053226788838704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,float16,0,9.333920160929361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,64,128,1,float16,float16,0,3.136176109313965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,fp8,0,9.202981313069662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,64,0,1,fp8,fp8,0,8.453376134236654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,64,128,1,float16,fp8,0,3.065317471822103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,64,128,1,fp8,fp8,0,4.056506792704265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,64,0,1,float16,float16,0,9.270602544148764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,64,0,1,float16,fp8,0,9.287151972452799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,64,0,1,fp8,fp8,0,8.46504020690918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,float16,0,3.138458569844564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,fp8,0,3.083045323689779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,64,128,1,fp8,fp8,0,4.046698570251465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,float16,0,9.222389221191406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,float16,0,3.144960085550944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,fp8,0,9.302080154418945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,64,0,1,fp8,fp8,0,8.479951858520508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,float16,0,9.364954630533854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,fp8,0,3.085834821065267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,64,128,1,fp8,fp8,0,4.083946545918782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,float16,0,1.760826587677002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,64,0,1,fp8,fp8,0,8.489994684855143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,fp8,0,9.41102409362793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,fp8,0,1.769605318705241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,float16,0,4.8875732421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,64,128,1,fp8,fp8,0,2.2828799883524575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,fp8,0,4.865023930867513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,float16,0,1.6250346501668294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,64,0,1,fp8,fp8,0,4.542319933573405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,float16,0,4.709989229838054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,fp8,0,1.5895627339680989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,64,128,1,fp8,fp8,0,2.0851786931355796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,64,0,1,fp8,fp8,0,4.3043521245320635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,fp8,0,4.739034652709961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,64,128,1,float16,float16,0,1.62009064356486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,64,128,1,float16,fp8,0,1.5988106727600098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,64,0,1,float16,float16,0,4.768656094868978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,64,128,1,fp8,fp8,0,2.0797279675801597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,64,0,1,float16,fp8,0,4.775125185648601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,float16,0,1.6229920387268066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,64,0,1,fp8,fp8,0,4.305781364440918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,float16,0,4.75211747487386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,fp8,0,1.5964800516764324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,64,128,1,fp8,fp8,0,2.0949014027913413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,fp8,0,4.77346674601237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,64,0,1,fp8,fp8,0,4.316624005635579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,float16,0,1.6370399792989094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,fp8,0,1.612005392710368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,float16,0,4.754879951477051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,64,128,1,fp8,fp8,0,2.1044373512268066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,fp8,0,4.706159909566243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,64,0,1,fp8,fp8,0,4.351200103759766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,float16,0,0.9445707003275553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,float16,0,2.486453374226888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,fp8,0,0.9605066776275635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,64,128,1,fp8,fp8,0,1.2273493607838948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,fp8,0,2.4977919260660806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,64,0,1,fp8,fp8,0,2.3456106185913086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,float16,0,0.8826399644215902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,float16,0,2.446458657582601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,fp8,0,0.8725866476694742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,64,128,1,fp8,fp8,0,1.1212960084279378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,fp8,0,2.4321440060933432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,64,0,1,fp8,fp8,0,2.2570133209228516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,64,128,1,float16,float16,0,0.8841866652170817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,64,0,1,float16,float16,0,2.4714667002360025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,64,128,1,float16,fp8,0,0.8703359762827555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,64,128,1,fp8,fp8,0,1.1265920003255208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,64,0,1,float16,fp8,0,2.4311200777689614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,64,0,1,fp8,fp8,0,2.2472586631774902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,float16,0,0.8894879817962646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,float16,0,2.4494825998942056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,fp8,0,0.8719147046407064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,64,128,1,fp8,fp8,0,1.1306506792704265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,fp8,0,2.419621308644613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,64,0,1,fp8,fp8,0,2.2605387369791665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,float16,0,0.8915839989980062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,float16,0,2.444197336832682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,fp8,0,0.8803359667460123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,64,128,1,fp8,fp8,0,1.1337920029958088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,fp8,0,2.423189322153727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,64,0,1,fp8,fp8,0,2.2529120445251465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,float16,0,0.6211520036061605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,float16,0,1.4146933555603027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,float16,0,1.4082454045613606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,fp8,0,0.6238079865773519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,64,128,1,fp8,fp8,0,0.7436319986979166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,fp8,0,1.4171679814656575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,64,0,1,fp8,fp8,0,1.3209386666615803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,float16,0,0.6202506621678671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,fp8,0,0.621946652730306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,64,128,1,fp8,fp8,0,0.7415040334065756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,fp8,0,1.4094506899515789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,64,0,1,fp8,fp8,0,1.318714698155721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,64,128,1,float16,float16,0,0.6209866603215536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,64,0,1,float16,float16,0,1.4167893727620442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,64,128,1,float16,fp8,0,0.6234560012817383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,64,128,1,fp8,fp8,0,0.74125870068868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,64,0,1,float16,fp8,0,1.4154879252115886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,float16,0,0.6214773257573446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,64,0,1,fp8,fp8,0,1.3110720316569011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,fp8,0,0.6214880148569742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,64,128,1,fp8,fp8,0,0.7460800011952718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,float16,0,1.416202704111735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,fp8,0,1.4178026517232258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,64,0,1,fp8,fp8,0,1.3103946844736736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,float16,0,0.6218400001525879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,fp8,0,0.6202986637751261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,float16,0,1.408906618754069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,64,128,1,fp8,fp8,0,0.7434399922688802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,fp8,0,1.421247959136963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,64,0,1,fp8,fp8,0,1.3188426494598389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,float16,0,8.262079874674479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,fp8,0,8.157407760620117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,float16,0,18.334122975667317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,1,64,128,1,fp8,fp8,0,10.588885625203451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,fp8,0,18.18846384684245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,2,64,128,1,float16,float16,0,8.42297617594401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,1,64,0,1,fp8,fp8,0,16.63497543334961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,2,64,128,1,float16,fp8,0,8.342986424763998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,2,64,0,1,float16,float16,0,18.42511494954427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,2,64,128,1,fp8,fp8,0,10.802501678466797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,float16,0,8.449104309082031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,2,64,0,1,float16,fp8,0,18.580095926920574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,2,64,0,1,fp8,fp8,0,16.80803680419922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,fp8,0,8.378960291544596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,4,64,128,1,fp8,fp8,0,10.781578063964844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,float16,0,18.634485880533855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,float16,0,8.538602828979492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,fp8,0,8.416463851928711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,fp8,0,18.503451029459637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,4,64,0,1,fp8,fp8,0,16.860052744547527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,8,64,128,1,fp8,fp8,0,10.846805572509766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,float16,0,18.657071431477863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,float16,0,4.677568117777507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,fp8,0,4.6043039957682295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,float16,0,9.677482604980469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,8,64,0,1,fp8,fp8,0,16.91713587443034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,fp8,0,18.515477498372395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,float16,0,4.072271982828776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,96,64,128,1,fp8,fp8,0,5.971466700236003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,96,64,0,1,fp8,fp8,0,8.982357025146484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,fp8,0,9.548069636027018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,fp8,0,4.025504112243652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,float16,0,9.084298451741537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,64,128,1,fp8,fp8,0,5.270517349243164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,64,128,1,float16,float16,0,4.142223993937175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,fp8,0,9.017845153808594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,64,0,1,fp8,fp8,0,8.346464157104492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,64,0,1,float16,float16,0,9.164426803588867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,64,128,1,float16,fp8,0,4.045237223307292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,64,128,1,fp8,fp8,0,5.35202153523763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,float16,0,4.150874773661296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,64,0,1,fp8,fp8,0,8.405194600423178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,64,0,1,float16,fp8,0,9.133018493652344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,fp8,0,4.077797253926595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,float16,0,9.146570841471354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,64,128,1,fp8,fp8,0,5.3702239990234375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,fp8,0,9.039712270100912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,float16,0,4.172320048014323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,64,0,1,fp8,fp8,0,8.406549453735352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,fp8,0,4.113701184590657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,64,128,1,fp8,fp8,0,5.424879709879558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,float16,0,9.25104014078776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,fp8,0,9.113370895385742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,float16,0,2.284634590148926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,64,0,1,fp8,fp8,0,8.433834711710611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,fp8,0,2.282378673553467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,float16,0,4.864042599995931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,64,128,1,fp8,fp8,0,2.9683411916097007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,fp8,0,4.8067626953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,64,0,1,fp8,fp8,0,4.477658589680989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,float16,0,2.0837705930074057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,float16,0,4.5587412516276045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,fp8,0,2.032927989959717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,64,128,1,fp8,fp8,0,2.6649492581685386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,fp8,0,4.515920003255208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,64,0,1,fp8,fp8,0,4.208815892537435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,64,128,1,float16,float16,0,2.0769386291503906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,64,128,1,float16,fp8,0,2.050901254018148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,64,0,1,float16,fp8,0,4.594730695088704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,float16,0,2.0884052912394204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,64,0,1,float16,float16,0,4.683189392089844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,64,128,1,fp8,fp8,0,2.678314526875814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,fp8,0,2.0467093785603843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,64,0,1,fp8,fp8,0,4.193936030069987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,float16,0,4.642789204915364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,64,128,1,fp8,fp8,0,2.695221265157064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,fp8,0,4.57044792175293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,64,0,1,fp8,fp8,0,4.200757344563802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,float16,0,2.0875093142191568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,fp8,0,2.0536959966023765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,64,128,1,fp8,fp8,0,2.6899305979410806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,float16,0,4.617429415384929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,float16,0,1.1788053512573242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,fp8,0,4.579584121704102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,float16,0,2.442176024119059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,64,0,1,fp8,fp8,0,4.220954577128093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,fp8,0,1.1896320184071858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,64,128,1,fp8,fp8,0,1.5299733479817708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,fp8,0,2.439290682474772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,64,0,1,fp8,fp8,0,2.293722629547119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,float16,0,1.088527997334798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,float16,0,2.358448028564453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,fp8,0,1.0728639761606853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,64,128,1,fp8,fp8,0,1.3909385999043782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,fp8,0,2.3225812911987305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,64,0,1,fp8,fp8,0,2.154815991719564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,64,128,1,float16,float16,0,1.094714641571045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,64,0,1,float16,float16,0,2.348149299621582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,64,128,1,float16,fp8,0,1.0706346829732258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,64,128,1,fp8,fp8,0,1.4015040397644043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,64,0,1,float16,fp8,0,2.3370240529378257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,float16,0,1.092901309331258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,64,0,1,fp8,fp8,0,2.156010627746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,float16,0,2.363258679707845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,fp8,0,1.0782453219095867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,64,128,1,fp8,fp8,0,1.4066133499145508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,fp8,0,2.3253706296284995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,64,0,1,fp8,fp8,0,2.1742080052693686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,float16,0,1.099503993988037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,fp8,0,1.078426678975423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,float16,0,2.3499892552693686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,64,128,1,fp8,fp8,0,1.403498649597168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,float16,0,0.6358453432718912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,fp8,0,2.3441333770751953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,float16,0,1.2769066492716472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,float16,0,0.5973440011342367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,64,0,1,fp8,fp8,0,2.167685349782308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,fp8,0,0.6493493318557739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,64,128,1,fp8,fp8,0,0.8265706698099772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,fp8,0,1.2803839842478435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,64,0,1,fp8,fp8,0,1.2088212966918945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,fp8,0,0.5899786551793417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,float16,0,1.2404106458028157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,64,128,1,fp8,fp8,0,0.7584693431854248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,fp8,0,1.2246826489766438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,64,0,1,fp8,fp8,0,1.1502079963684082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,64,128,1,float16,float16,0,0.5982453425725301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,64,0,1,float16,float16,0,1.2345866362253826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,64,128,1,float16,fp8,0,0.5888319810231527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,64,128,1,fp8,fp8,0,0.7624906698862711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,64,0,1,float16,fp8,0,1.2312106291453044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,float16,0,0.598906675974528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,64,0,1,fp8,fp8,0,1.142917315165202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,float16,0,1.2419040203094482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,fp8,0,0.5894773403803507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,64,128,1,fp8,fp8,0,0.7609866460164388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,fp8,0,1.225157340367635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,64,0,1,fp8,fp8,0,1.151535987854004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,float16,0,0.6022186676661173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,float16,0,1.2362559636433919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,fp8,0,0.5926986535390218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,64,128,1,fp8,fp8,0,0.7677600383758545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,fp8,0,1.2274986902872722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,64,0,1,fp8,fp8,0,1.1493919690450032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,float16,0,0.4238933324813843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,float16,0,0.7511413097381592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,fp8,0,0.42563732465108234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,64,128,1,fp8,fp8,0,0.5100640058517456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,fp8,0,0.7477173010508219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,64,0,1,fp8,fp8,0,0.6836746533711752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,float16,0,0.42139732837677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,float16,0,0.7487680117289225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,fp8,0,0.4222453435262044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,64,128,1,fp8,fp8,0,0.509610652923584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,fp8,0,0.7528586387634277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,64,0,1,fp8,fp8,0,0.6796800295511881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,64,128,1,float16,float16,0,0.42256001631418866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,64,128,1,float16,fp8,0,0.42215466499328613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,64,0,1,float16,float16,0,0.7526933352152506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,64,128,1,fp8,fp8,0,0.506714661916097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,64,0,1,float16,fp8,0,0.7484906514485677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,64,0,1,fp8,fp8,0,0.6837279796600342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,float16,0,0.423744002978007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,float16,0,0.7487626870473226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,fp8,0,0.4228959878285726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,64,128,1,fp8,fp8,0,0.5053973197937012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,fp8,0,0.7530720233917236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,64,0,1,fp8,fp8,0,0.6805973052978516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,float16,0,0.4219040075937907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,float16,0,0.7518880367279053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,fp8,0,0.4244106610616048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,64,128,1,fp8,fp8,0,0.5082720120747884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,fp8,0,0.7487839857737223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,64,0,1,fp8,fp8,0,0.6794453461964926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,float16,0,6.127893447875977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,float16,0,11.245909372965494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,fp8,0,6.037317276000977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,1,64,128,1,fp8,fp8,0,7.941658655802409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,fp8,0,11.224693298339844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,1,64,0,1,fp8,fp8,0,10.359551747639975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,2,64,128,1,float16,float16,0,6.286816279093425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,2,64,0,1,float16,float16,0,11.47546132405599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,2,64,128,1,float16,fp8,0,6.185573577880859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,2,64,128,1,fp8,fp8,0,8.033525466918945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,2,64,0,1,float16,fp8,0,11.367791493733725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,float16,0,6.332101186116536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,fp8,0,6.20460828145345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,2,64,0,1,fp8,fp8,0,10.499189376831055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,float16,0,11.496592203776041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,4,64,128,1,fp8,fp8,0,8.052602767944336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,float16,0,6.324096043904622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,4,64,0,1,fp8,fp8,0,10.461786905924479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,fp8,0,6.243824005126953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,fp8,0,11.382938385009766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,float16,0,11.499824523925781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,8,64,128,1,fp8,fp8,0,8.134117126464844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,float16,0,3.4450292587280273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,fp8,0,3.4013919830322266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,fp8,0,11.434132893880209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,float16,0,6.004208246866862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,8,64,0,1,fp8,fp8,0,10.491813023885092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,64,128,1,fp8,fp8,0,4.446709314982097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,64,0,1,fp8,fp8,0,5.659247716267903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,fp8,0,6.036037445068359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,float16,0,3.039087931315104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,float16,0,5.665546417236328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,fp8,0,2.9882399241129556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,64,128,1,fp8,fp8,0,3.8911412556966147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,fp8,0,5.593861262003581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,64,0,1,fp8,fp8,0,5.147583961486816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,64,128,1,float16,float16,0,3.0715627670288086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,64,0,1,float16,float16,0,5.668613433837891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,64,128,1,float16,fp8,0,3.023712158203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,64,128,1,fp8,fp8,0,3.9458185831705728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,64,0,1,float16,fp8,0,5.669690450032552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,64,0,1,fp8,fp8,0,5.130400021870931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,float16,0,3.0590346654256186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,fp8,0,3.0281120936075845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,float16,0,5.6570078531901045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,64,128,1,fp8,fp8,0,3.972362518310547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,fp8,0,5.610293070475261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,64,0,1,fp8,fp8,0,5.157669385274251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,float16,0,3.0801547368367515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,float16,0,5.635040283203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,fp8,0,3.026442527770996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,64,128,1,fp8,fp8,0,3.9950294494628906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,fp8,0,5.667552312215169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,64,0,1,fp8,fp8,0,5.175248146057129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,float16,0,1.7136212984720867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,fp8,0,1.722885290781657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,float16,0,3.0243040720621743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,64,128,1,fp8,fp8,0,2.213007926940918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,float16,0,1.5663305918375652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,fp8,0,3.0059839884440103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,64,0,1,fp8,fp8,0,2.826330820719401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,fp8,0,1.5314399401346843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,float16,0,2.8602612813313804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,64,128,1,fp8,fp8,0,2.0147786140441895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,fp8,0,2.833792050679525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,64,0,1,fp8,fp8,0,2.6059840520222983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,64,128,1,float16,float16,0,1.5705386797587078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,64,0,1,float16,float16,0,2.8452587127685547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,64,128,1,float16,fp8,0,1.5429226557413738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,64,128,1,fp8,fp8,0,2.005861282348633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,64,0,1,fp8,fp8,0,2.608661333719889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,64,0,1,float16,fp8,0,2.834837277730306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,float16,0,1.5656372706095378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,float16,0,2.850133260091146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,fp8,0,1.5462932586669922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,64,128,1,fp8,fp8,0,2.008959929148356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,fp8,0,2.8381760915120444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,64,0,1,fp8,fp8,0,2.6116159756978354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,float16,0,1.5792959531148274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,float16,0,2.872506777445475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,fp8,0,1.5482880274454753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,64,128,1,fp8,fp8,0,2.0200319290161133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,fp8,0,2.8505385716756186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,float16,0,0.8950719833374023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,64,0,1,fp8,fp8,0,2.6195359230041504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,float16,0,1.5341919263203938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,fp8,0,0.9043253262837728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,64,128,1,fp8,fp8,0,1.1614346504211426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,fp8,0,1.5469279289245605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,64,0,1,fp8,fp8,0,1.4570719401041667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,float16,0,0.8264213403066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,float16,0,1.4764960606892903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,fp8,0,0.8076586723327637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,64,128,1,fp8,fp8,0,1.0509333610534668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,fp8,0,1.4661493301391602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,64,0,1,fp8,fp8,0,1.359231948852539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,64,128,1,float16,float16,0,0.8245120048522949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,64,0,1,float16,float16,0,1.4735253651936848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,64,128,1,float16,fp8,0,0.8129599889119467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,64,128,1,fp8,fp8,0,1.0579573313395183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,64,0,1,float16,fp8,0,1.4559946060180664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,64,0,1,fp8,fp8,0,1.3529173533121746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,float16,0,0.8310506343841553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,fp8,0,0.8122773170471191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,float16,0,1.4736107190450032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,64,128,1,fp8,fp8,0,1.062986691792806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,fp8,0,1.466858704884847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,float16,0,0.8306879997253418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,64,0,1,fp8,fp8,0,1.3557066917419434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,float16,0,1.4738346735636394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,fp8,0,0.819439967473348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,64,128,1,fp8,fp8,0,1.060970703760783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,fp8,0,1.4653387069702148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,64,0,1,fp8,fp8,0,1.3693226178487141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,float16,0,0.4858826796213786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,float16,0,0.8154400189717611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,fp8,0,0.4907360076904297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,64,128,1,fp8,fp8,0,0.627829353014628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,fp8,0,0.826469341913859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,64,0,1,fp8,fp8,0,0.7790079911549886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,float16,0,0.4550400177637736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,float16,0,0.7889066537221273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,fp8,0,0.4500053326288859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,64,128,1,fp8,fp8,0,0.5787733395894369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,fp8,0,0.7761066754659017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,64,0,1,fp8,fp8,0,0.7279893557230631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,64,128,1,float16,float16,0,0.4573119878768921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,64,0,1,float16,float16,0,0.7851200103759766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,64,128,1,float16,fp8,0,0.44814932346343994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,64,128,1,fp8,fp8,0,0.580186684926351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,64,0,1,float16,fp8,0,0.7817546526590983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,64,0,1,fp8,fp8,0,0.7322506904602051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,float16,0,0.45606935024261475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,float16,0,0.7858666578928629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,fp8,0,0.45126934846242267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,64,128,1,fp8,fp8,0,0.5786826610565186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,fp8,0,0.7790186405181885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,64,0,1,fp8,fp8,0,0.7331519921620687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,float16,0,0.45875732103983563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,fp8,0,0.4513173500696818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,float16,0,0.7910827000935873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,64,128,1,fp8,fp8,0,0.580618659655253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,fp8,0,0.7854506969451904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,64,0,1,fp8,fp8,0,0.7327946821848551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,float16,0,0.323743999004364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,float16,0,0.4957493146260579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,fp8,0,0.325109342734019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,64,128,1,fp8,fp8,0,0.3895626862843831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,fp8,0,0.495194673538208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,64,0,1,fp8,fp8,0,0.44863466421763104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,float16,0,0.32344533999760944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,float16,0,0.4947253465652466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,fp8,0,0.32266666491826373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,64,128,1,fp8,fp8,0,0.3893973429997762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,fp8,0,0.495471994082133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,64,0,1,fp8,fp8,0,0.44741864999135333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,64,128,1,float16,float16,0,0.3242453336715698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,64,0,1,float16,float16,0,0.49396268526713055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,64,128,1,float16,fp8,0,0.32440000772476196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,64,128,1,fp8,fp8,0,0.38883201281229657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,64,0,1,float16,fp8,0,0.49325335025787354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,64,0,1,fp8,fp8,0,0.4495573441187541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,float16,0,0.32365866502126056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,float16,0,0.49413331349690753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,fp8,0,0.324837327003479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,64,128,1,fp8,fp8,0,0.3893226782480876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,fp8,0,0.4967306852340698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,64,0,1,fp8,fp8,0,0.4485653241475423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,float16,0,0.3226933280626933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,float16,0,0.4965333143870036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,fp8,0,0.3242986599604289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,64,128,1,fp8,fp8,0,0.38786133130391437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,fp8,0,0.4966506560643514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,64,0,1,fp8,fp8,0,0.4473866621653239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,float16,0,8.157578786214193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,fp8,0,8.092117309570312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,float16,0,11.920949300130209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,1,64,128,1,fp8,fp8,0,10.45676294962565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,fp8,0,11.96667226155599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,2,64,128,1,float16,float16,0,8.19538688659668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,1,64,0,1,fp8,fp8,0,10.806026458740234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,2,64,128,1,float16,fp8,0,8.046944300333658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,2,64,0,1,float16,float16,0,12.014048258463541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,float16,0,8.235941569010416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,2,64,128,1,fp8,fp8,0,10.594271977742514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,2,64,0,1,fp8,fp8,0,10.988090515136719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,2,64,0,1,float16,fp8,0,11.811920166015625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,float16,0,12.05191421508789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,fp8,0,8.061205546061197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,4,64,128,1,fp8,fp8,0,10.685663859049479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,fp8,0,11.9049072265625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,4,64,0,1,fp8,fp8,0,11.088816324869791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,float16,0,8.231562932332357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,float16,0,12.06832504272461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,fp8,0,8.074431737263998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,8,64,128,1,fp8,fp8,0,10.711104075113932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,fp8,0,11.965034484863281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,float16,0,4.578367869059245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,8,64,0,1,fp8,fp8,0,11.10055414835612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,float16,0,6.539429346720378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,fp8,0,4.551258722941081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,96,64,128,1,fp8,fp8,0,5.838805516560872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,fp8,0,6.456096013387044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,float16,0,4.004709243774414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,96,64,0,1,fp8,fp8,0,6.041466395060222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,float16,0,5.969615936279297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,fp8,0,3.9373547236124673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,64,128,1,fp8,fp8,0,5.154330571492513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,64,0,1,fp8,fp8,0,5.35153071085612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,fp8,0,5.877311706542969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,64,128,1,float16,float16,0,4.054287910461426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,64,0,1,float16,float16,0,5.974341074625651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,64,128,1,float16,fp8,0,3.980501174926758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,64,128,1,fp8,fp8,0,5.2194773356119795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,64,0,1,float16,fp8,0,5.91599973042806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,64,0,1,fp8,fp8,0,5.420890808105469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,float16,0,4.0742292404174805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,float16,0,5.990416208902995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,fp8,0,3.9982773462931314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,64,128,1,fp8,fp8,0,5.237125396728516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,fp8,0,5.927226384480794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,64,0,1,fp8,fp8,0,5.409445444742839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,float16,0,4.078501383463542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,fp8,0,4.038271903991699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,float16,0,6.030874888102214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,64,128,1,fp8,fp8,0,5.250522613525391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,float16,0,2.257802645365397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,float16,0,3.2199414571126304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,64,0,1,fp8,fp8,0,5.458410898844401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,fp8,0,5.872447967529297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,fp8,0,2.2434080441792807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,64,128,1,fp8,fp8,0,2.902496019999186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,fp8,0,3.2030293146769204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,float16,0,2.0233920415242515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,64,0,1,fp8,fp8,0,2.988784154256185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,fp8,0,1.98416535059611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,float16,0,2.985760052998861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,64,128,1,fp8,fp8,0,2.5960906346639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,fp8,0,2.9344959259033203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,64,0,1,fp8,fp8,0,2.706437428792318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,64,128,1,float16,float16,0,2.0361119906107583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,64,0,1,float16,float16,0,2.974287986755371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,64,128,1,float16,fp8,0,2.0034826596577964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,64,128,1,fp8,fp8,0,2.5998560587565103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,64,0,1,float16,fp8,0,2.9538825352986655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,64,0,1,fp8,fp8,0,2.701903978983561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,float16,0,2.0312320391337075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,fp8,0,2.0061333974202475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,float16,0,2.9935948053995767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,64,128,1,fp8,fp8,0,2.605840047200521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,fp8,0,2.9624319076538086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,64,0,1,fp8,fp8,0,2.704533259073893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,float16,0,2.0467732747395835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,fp8,0,2.008608023325602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,float16,0,2.9850292205810547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,64,128,1,fp8,fp8,0,2.6335573196411133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,fp8,0,2.9693225224812827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,64,0,1,fp8,fp8,0,2.714639981587728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,float16,0,1.1533866723378499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,float16,0,1.6240213712056477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,fp8,0,1.1571626663208008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,64,128,1,fp8,fp8,0,1.4830560684204102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,fp8,0,1.6293813387552898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,float16,0,1.048789342244466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,64,0,1,fp8,fp8,0,1.529962698618571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,float16,0,1.5192054112752278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,fp8,0,1.0308053493499756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,64,128,1,fp8,fp8,0,1.336400032043457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,fp8,0,1.510287920633952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,64,128,1,float16,float16,0,1.0479466915130615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,64,0,1,fp8,fp8,0,1.3839573860168457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,64,128,1,float16,fp8,0,1.0323999722798665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,64,0,1,float16,float16,0,1.5301705996195476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,64,128,1,fp8,fp8,0,1.3416372934977214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,64,0,1,float16,fp8,0,1.515973409016927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,float16,0,1.0550453662872314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,64,0,1,fp8,fp8,0,1.3874400456746419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,float16,0,1.5295467376708984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,fp8,0,1.03166397412618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,64,128,1,fp8,fp8,0,1.3502880732218425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,fp8,0,1.5148639678955078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,64,0,1,fp8,fp8,0,1.3885760307312012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,float16,0,1.0572906335194905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,float16,0,1.5361013412475586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,fp8,0,1.0366026560465496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,64,128,1,fp8,fp8,0,1.3506080309549968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,fp8,0,1.5130400657653809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,64,0,1,fp8,fp8,0,1.403696060180664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,float16,0,0.6034453312555949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,float16,0,0.8399626413981119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,fp8,0,0.6098666588465372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,64,128,1,fp8,fp8,0,0.7813653151194254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,fp8,0,0.8464960257212321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,64,0,1,fp8,fp8,0,0.7987733681996664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,float16,0,0.5557706753412882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,float16,0,0.8001226584116617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,fp8,0,0.5487146774927775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,64,128,1,fp8,fp8,0,0.7090933322906494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,fp8,0,0.7912533283233643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,64,0,1,fp8,fp8,0,0.7329333623250326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,64,128,1,float16,float16,0,0.5559306542078654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,64,128,1,float16,fp8,0,0.5466986497243246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,64,0,1,float16,float16,0,0.797696034113566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,64,128,1,fp8,fp8,0,0.7142240206400553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,64,0,1,float16,fp8,0,0.7863946755727133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,64,0,1,fp8,fp8,0,0.7349387009938558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,float16,0,0.5603413184483846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,float16,0,0.8010613123575846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,fp8,0,0.5478453238805135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,64,128,1,fp8,fp8,0,0.7149866422017416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,fp8,0,0.7882080078125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,64,0,1,fp8,fp8,0,0.736629327138265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,float16,0,0.5582186778386434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,float16,0,0.8037226994832357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,fp8,0,0.5537600119908651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,64,128,1,fp8,fp8,0,0.7171786626180013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,fp8,0,0.7907093365987142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,64,0,1,fp8,fp8,0,0.7356906731923422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,float16,0,0.33053332567214966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,float16,0,0.4557493527730306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,fp8,0,0.3363200028737386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,64,128,1,fp8,fp8,0,0.43251200517018634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,fp8,0,0.4612586498260498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,64,0,1,fp8,fp8,0,0.4283359845479329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,float16,0,0.31084267298380536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,float16,0,0.43452266852060956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,fp8,0,0.307151993115743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,64,128,1,fp8,fp8,0,0.3964213530222575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,fp8,0,0.42974400520324707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,64,0,1,fp8,fp8,0,0.3937600056330363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,64,128,1,float16,float16,0,0.31299734115600586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,64,0,1,float16,float16,0,0.4371680021286011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,64,128,1,float16,fp8,0,0.30709866682688397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,64,128,1,fp8,fp8,0,0.3964746793111165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,64,0,1,float16,fp8,0,0.4326719840367635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,64,0,1,fp8,fp8,0,0.3946880102157593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,float16,0,0.3135146697362264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,float16,0,0.4378133217493693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,fp8,0,0.30949334303538006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,64,128,1,fp8,fp8,0,0.3984266519546509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,fp8,0,0.43169601758321124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,64,0,1,fp8,fp8,0,0.3938613335291545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,float16,0,0.3131999969482422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,float16,0,0.4375306765238444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,fp8,0,0.3099093238512675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,64,128,1,fp8,fp8,0,0.40060798327128094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,fp8,0,0.43537068367004395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,64,0,1,fp8,fp8,0,0.3967413504918416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,float16,0,0.22592000166575113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,float16,0,0.28221867481867474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,fp8,0,0.22572267055511475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,64,128,1,fp8,fp8,0,0.27265065908432007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,fp8,0,0.28325867652893066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,64,0,1,fp8,fp8,0,0.26364799340566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,float16,0,0.22401599089304605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,float16,0,0.27668799956639606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,fp8,0,0.22372267643610635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,64,128,1,fp8,fp8,0,0.2712586720784505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,fp8,0,0.27538132667541504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,64,0,1,fp8,fp8,0,0.26172266403834027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,64,128,1,float16,float16,0,0.22445333003997803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,64,0,1,float16,float16,0,0.2762986620267232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,64,128,1,float16,fp8,0,0.22422399123509726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,64,128,1,fp8,fp8,0,0.2714879910151164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,64,0,1,float16,fp8,0,0.27552000681559247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,64,0,1,fp8,fp8,0,0.26185067494710285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,float16,0,0.22444266080856323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,float16,0,0.2760159969329834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,fp8,0,0.22454400857289633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,64,128,1,fp8,fp8,0,0.27106134096781415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,fp8,0,0.2765706578890483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,64,0,1,fp8,fp8,0,0.2624533375104268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,float16,0,0.22332799434661865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,float16,0,0.27638934055964154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,fp8,0,0.22379199663798013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,64,128,1,fp8,fp8,0,0.27163199583689374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,fp8,0,0.27582399050394696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,64,0,1,fp8,fp8,0,0.2624533375104268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,float16,0,6.032501220703125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,float16,0,7.778810501098633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,fp8,0,5.9564158121744795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,1,64,128,1,fp8,fp8,0,7.698432286580403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,fp8,0,7.7008851369222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,1,64,0,1,fp8,fp8,0,6.948869069417317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,2,64,128,1,float16,float16,0,6.024757385253906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,2,64,0,1,float16,float16,0,7.837962468465169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,2,64,128,1,float16,fp8,0,5.936229070027669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,2,64,128,1,fp8,fp8,0,7.734005610148112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,2,64,0,1,float16,fp8,0,7.688757578531901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,float16,0,6.043850580851237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,2,64,0,1,fp8,fp8,0,6.996378580729167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,fp8,0,5.927706400553386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,float16,0,7.815088272094727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,4,64,128,1,fp8,fp8,0,7.751375834147136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,fp8,0,7.777397155761719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,float16,0,6.063776016235352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,4,64,0,1,fp8,fp8,0,7.052778879801433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,fp8,0,6.001183827718099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,float16,0,7.804789225260417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,8,64,128,1,fp8,fp8,0,7.869333267211914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,float16,0,3.4039306640625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,fp8,0,7.79853884379069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,8,64,0,1,fp8,fp8,0,7.210325241088867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,fp8,0,3.356320063273112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,float16,0,4.288069407145183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,64,128,1,fp8,fp8,0,4.336607933044434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,fp8,0,4.237183888753255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,64,0,1,fp8,fp8,0,4.0032908121744795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,float16,0,2.987504005432129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,float16,0,3.8659305572509766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,fp8,0,2.9335734049479165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,64,128,1,fp8,fp8,0,3.855621337890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,fp8,0,3.826047897338867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,64,0,1,fp8,fp8,0,3.4663893381754556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,64,128,1,float16,float16,0,2.976170539855957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,64,128,1,float16,fp8,0,2.956613222757975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,64,0,1,float16,float16,0,3.9078238805135093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,64,128,1,fp8,fp8,0,3.859893480936686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,64,0,1,float16,fp8,0,3.8460213343302407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,64,0,1,fp8,fp8,0,3.5107574462890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,float16,0,3.008474667867025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,float16,0,3.886725425720215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,fp8,0,2.932880083719889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,64,128,1,fp8,fp8,0,3.8680105209350586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,fp8,0,3.864960034688314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,float16,0,3.004021326700846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,64,0,1,fp8,fp8,0,3.5135574340820312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,float16,0,3.9016799926757812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,fp8,0,2.977871894836426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,64,128,1,fp8,fp8,0,3.8719094594319663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,fp8,0,3.8544480005900064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,64,0,1,fp8,fp8,0,3.538474718729655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,float16,0,1.682213306427002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,float16,0,2.128671964009603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,fp8,0,1.683301289876302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,64,128,1,fp8,fp8,0,2.1683947245279946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,fp8,0,2.136026700337728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,64,0,1,fp8,fp8,0,1.9683680534362793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,float16,0,1.5042239824930828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,float16,0,1.9677440325419109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,fp8,0,1.482927958170573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,64,128,1,fp8,fp8,0,1.9366346995035808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,fp8,0,1.9375200271606445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,64,0,1,fp8,fp8,0,1.7691787083943684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,64,128,1,float16,float16,0,1.522047996520996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,64,128,1,float16,fp8,0,1.4793492952982585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,64,0,1,float16,float16,0,1.9627200762430828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,64,128,1,fp8,fp8,0,1.9528053601582844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,64,0,1,float16,fp8,0,1.9373493194580078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,64,0,1,fp8,fp8,0,1.7608799934387207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,float16,0,1.5242773691813152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,float16,0,1.9789013862609863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,fp8,0,1.498832066853841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,64,128,1,fp8,fp8,0,1.936255931854248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,fp8,0,1.9387839635213215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,64,0,1,fp8,fp8,0,1.7765386899312336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,float16,0,1.522271951039632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,float16,0,1.9669866561889648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,fp8,0,1.5071733792622883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,64,128,1,fp8,fp8,0,1.9752000172932942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,fp8,0,1.9564533233642578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,64,0,1,fp8,fp8,0,1.7662453651428223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,float16,0,0.862714687983195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,float16,0,1.0926667054494221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,fp8,0,0.8643733660380045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,64,128,1,fp8,fp8,0,1.1089599927266438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,fp8,0,1.0932640234629314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,64,0,1,fp8,fp8,0,1.0188319683074951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,float16,0,0.7870720227559408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,float16,0,1.0038560231526692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,fp8,0,0.7703039646148682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,64,128,1,fp8,fp8,0,1.0093546708424885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,fp8,0,0.9963733355204264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,64,0,1,fp8,fp8,0,0.9100693066914877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,64,128,1,float16,float16,0,0.7894240220387777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,64,0,1,float16,float16,0,1.0170239607493083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,64,128,1,float16,fp8,0,0.7758399645487467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,64,128,1,fp8,fp8,0,1.000762701034546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,64,0,1,float16,fp8,0,0.9984479745229086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,64,0,1,fp8,fp8,0,0.9167306423187256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,float16,0,0.7875146865844727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,float16,0,1.0102559725443523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,fp8,0,0.7780586878458658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,64,128,1,fp8,fp8,0,1.0182560284932454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,fp8,0,1.004581371943156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,64,0,1,fp8,fp8,0,0.9109973112742106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,float16,0,0.7902133464813232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,float16,0,1.0207893053690593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,fp8,0,0.7803573608398438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,64,128,1,fp8,fp8,0,1.012826681137085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,fp8,0,1.0071093241373699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,64,0,1,fp8,fp8,0,0.9249227046966553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,float16,0,0.45576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,float16,0,0.5673813422520956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,fp8,0,0.4594026803970337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,64,128,1,fp8,fp8,0,0.5923519929250082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,fp8,0,0.5735946496327718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,64,0,1,fp8,fp8,0,0.5373226801554362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,float16,0,0.42113598187764484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,float16,0,0.53875199953715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,fp8,0,0.4145386616388957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,64,128,1,fp8,fp8,0,0.5346133311589559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,fp8,0,0.5264853239059448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,64,0,1,fp8,fp8,0,0.48574399948120117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,64,128,1,float16,float16,0,0.419648011525472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,64,0,1,float16,float16,0,0.5373119910558065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,64,128,1,float16,fp8,0,0.4153493245442708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,64,128,1,fp8,fp8,0,0.5431040128072103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,64,0,1,float16,fp8,0,0.5303786595662435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,64,0,1,fp8,fp8,0,0.48427732785542804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,float16,0,0.42115732034047443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,float16,0,0.5363466739654541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,fp8,0,0.4145919879277547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,64,128,1,fp8,fp8,0,0.5414826472600301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,fp8,0,0.5319519837697347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,64,0,1,fp8,fp8,0,0.4913333257039388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,float16,0,0.4240640004475911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,float16,0,0.5350133180618286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,fp8,0,0.4172533353169759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,64,128,1,fp8,fp8,0,0.5436746676762899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,fp8,0,0.5305706659952799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,64,0,1,fp8,fp8,0,0.4930880069732666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,float16,0,0.2541653315226237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,float16,0,0.31383466720581055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,fp8,0,0.25763734181722003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,64,128,1,fp8,fp8,0,0.3279893398284912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,fp8,0,0.31621332963307697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,64,0,1,fp8,fp8,0,0.2929706573486328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,float16,0,0.2390026648839315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,float16,0,0.29688533147176105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,fp8,0,0.2355039914449056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,64,128,1,fp8,fp8,0,0.30636799335479736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,fp8,0,0.2953653335571289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,64,0,1,fp8,fp8,0,0.26475733518600464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,64,128,1,float16,float16,0,0.23803732792536417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,64,0,1,float16,float16,0,0.29873067140579224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,64,128,1,float16,fp8,0,0.2355840007464091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,64,128,1,fp8,fp8,0,0.30554133653640747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,64,0,1,float16,fp8,0,0.2950986623764038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,64,0,1,fp8,fp8,0,0.2683733304341634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,float16,0,0.23914666970570883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,float16,0,0.2950506607691447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,fp8,0,0.2352799971898397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,64,128,1,fp8,fp8,0,0.3059306740760803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,fp8,0,0.29361599683761597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,64,0,1,fp8,fp8,0,0.2683839996655782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,float16,0,0.23987199862798056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,float16,0,0.2995786666870117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,fp8,0,0.2386186718940735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,64,128,1,fp8,fp8,0,0.3043733239173889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,fp8,0,0.2959573268890381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,64,0,1,fp8,fp8,0,0.26951466004053753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,float16,0,0.1750346620877584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,float16,0,0.1979680061340332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,fp8,0,0.1757919987042745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,64,128,1,fp8,fp8,0,0.21607999006907144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,fp8,0,0.19785600900650024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,64,0,1,fp8,fp8,0,0.18474133809407553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,float16,0,0.17293866475423178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,float16,0,0.19365866978963217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,fp8,0,0.17335466543833414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,64,128,1,fp8,fp8,0,0.21037334203720093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,fp8,0,0.19426133235295615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,64,0,1,fp8,fp8,0,0.18450133005777994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,64,128,1,float16,float16,0,0.17349332571029663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,64,0,1,float16,float16,0,0.19343467553456625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,64,128,1,float16,fp8,0,0.17373865842819214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,64,128,1,fp8,fp8,0,0.20931732654571533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,64,0,1,float16,fp8,0,0.19431465864181519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,64,0,1,fp8,fp8,0,0.18425067265828451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,float16,0,0.17363733053207397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,float16,0,0.1946559945742289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,fp8,0,0.17416000366210938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,64,128,1,fp8,fp8,0,0.20901866753896078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,fp8,0,0.19431465864181519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,64,0,1,fp8,fp8,0,0.18386133511861166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,float16,0,0.17292799552281699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,float16,0,0.19504533211390176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,fp8,0,0.17445866266886392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,64,128,1,fp8,fp8,0,0.21153066555658975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,fp8,0,0.194432000319163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,64,0,1,fp8,fp8,0,0.1834933360417684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,float16,0,7.336309432983398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,fp8,0,7.3241761525472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,float16,0,8.52468236287435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,1,64,128,1,fp8,fp8,0,9.60317866007487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,1,64,0,1,fp8,fp8,0,7.4436798095703125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,2,64,128,1,float16,float16,0,7.512661616007487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,fp8,0,8.480480194091797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,2,64,128,1,float16,fp8,0,7.451194763183594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,2,64,0,1,float16,float16,0,8.664794921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,2,64,128,1,fp8,fp8,0,9.57810147603353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,2,64,0,1,float16,fp8,0,8.579397201538086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,2,64,0,1,fp8,fp8,0,7.458725611368815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,float16,0,7.540181477864583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,fp8,0,7.506538391113281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,float16,0,8.659733454386393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,fp8,0,8.646629333496094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,4,64,128,1,fp8,fp8,0,9.668682734171549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,4,64,0,1,fp8,fp8,0,7.568735758463542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,float16,0,7.634688059488933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,float16,0,8.76315180460612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,fp8,0,7.584096272786458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,fp8,0,8.774757385253906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,8,64,128,1,fp8,fp8,0,9.896095911661783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,float16,0,4.198986689249675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,8,64,0,1,fp8,fp8,0,7.693861643473308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,float16,0,4.7712907791137695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,fp8,0,4.118560155232747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,fp8,0,4.74623457590739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,96,64,128,1,fp8,fp8,0,5.152053197224935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,96,64,0,1,fp8,fp8,0,4.067930539449056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,float16,0,3.6727574666341147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,fp8,0,3.6665865580240884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,float16,0,4.25706672668457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,64,128,1,fp8,fp8,0,4.764634768168132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,fp8,0,4.247973442077637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,64,0,1,fp8,fp8,0,3.731509208679199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,64,128,1,float16,float16,0,3.715984026590983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,64,0,1,float16,float16,0,4.23411210378011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,64,128,1,float16,fp8,0,3.6872212092081704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,64,128,1,fp8,fp8,0,4.810117403666179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,64,0,1,float16,fp8,0,4.281488100687663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,64,0,1,fp8,fp8,0,3.712810516357422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,float16,0,3.7340854008992515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,float16,0,4.3405866622924805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,fp8,0,3.7529919942220054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,64,128,1,fp8,fp8,0,4.798240025838216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,fp8,0,4.33187198638916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,64,0,1,fp8,fp8,0,3.7703625361124673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,float16,0,3.8169120152791343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,float16,0,4.379237174987793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,fp8,0,3.823984146118164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,float16,0,2.0917760531107583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,float16,0,2.3962079683939614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,fp8,0,2.056645393371582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,64,128,1,fp8,fp8,0,4.973509470621745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,fp8,0,4.409712155659993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,64,0,1,fp8,fp8,0,3.8218774795532227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,64,128,1,fp8,fp8,0,2.5684266090393066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,fp8,0,2.35588804880778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,64,0,1,fp8,fp8,0,2.027242660522461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,float16,0,1.8389387130737305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,float16,0,2.138869285583496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,fp8,0,1.8461119333902996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,64,128,1,fp8,fp8,0,2.4066720008850098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,fp8,0,2.1119786898295083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,64,0,1,fp8,fp8,0,1.85153595606486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,64,128,1,float16,float16,0,1.8567147254943848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,64,0,1,float16,float16,0,2.1467466354370117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,64,128,1,float16,fp8,0,1.8432159423828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,64,128,1,fp8,fp8,0,2.4138506253560386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,64,0,1,fp8,fp8,0,1.8713173866271973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,64,0,1,float16,fp8,0,2.149413267771403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,float16,0,1.8376587231953938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,float16,0,2.136042594909668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,fp8,0,1.8565012613932292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,64,128,1,fp8,fp8,0,2.4312586784362793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,fp8,0,2.143909295399984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,64,0,1,fp8,fp8,0,1.8806079228719075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,float16,0,1.902714729309082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,float16,0,2.203440030415853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,fp8,0,1.8794453938802083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,64,128,1,fp8,fp8,0,2.4613119761149087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,fp8,0,2.1980692545572915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,64,0,1,fp8,fp8,0,1.9278826713562012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,float16,0,1.0479413668314617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,float16,0,1.194170633951823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,fp8,0,1.0409386952718098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,64,128,1,fp8,fp8,0,1.3048106829325359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,fp8,0,1.168218692143758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,64,0,1,fp8,fp8,0,1.0171679655710857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,float16,0,0.9348693688710531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,float16,0,1.0805599689483643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,fp8,0,0.9321599801381429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,64,128,1,fp8,fp8,0,1.2179839611053467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,fp8,0,1.0845173199971516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,64,0,1,fp8,fp8,0,0.9450293382008871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,64,128,1,float16,float16,0,0.9328853289286295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,64,0,1,float16,float16,0,1.081600030263265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,64,128,1,float16,fp8,0,0.9391840298970541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,64,128,1,fp8,fp8,0,1.2238240242004395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,64,0,1,float16,fp8,0,1.0825119813283284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,64,0,1,fp8,fp8,0,0.9459146658579508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,float16,0,0.9402240117390951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,float16,0,1.0915733178456624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,fp8,0,0.931552012761434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,64,128,1,fp8,fp8,0,1.221274693806966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,fp8,0,1.0899413426717122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,64,0,1,fp8,fp8,0,0.9499786694844564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,float16,0,0.9470346768697103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,float16,0,1.0971786975860596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,fp8,0,0.9487573305765787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,64,128,1,fp8,fp8,0,1.246613343556722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,fp8,0,1.0846292972564697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,float16,0,0.5397813320159912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,64,0,1,fp8,fp8,0,0.9604852994283041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,float16,0,0.6118079821268717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,fp8,0,0.5300960143407186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,64,128,1,fp8,fp8,0,0.6696853637695312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,fp8,0,0.6063626607259115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,64,0,1,fp8,fp8,0,0.5253760019938151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,float16,0,0.47565333048502606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,float16,0,0.5526986519495646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,fp8,0,0.4817386468251546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,64,128,1,fp8,fp8,0,0.6286826531092325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,fp8,0,0.55676798025767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,64,0,1,fp8,fp8,0,0.4854346513748169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,64,128,1,float16,float16,0,0.48451733589172363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,64,0,1,float16,float16,0,0.5596373478571574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,64,128,1,float16,fp8,0,0.47915200392405194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,64,128,1,fp8,fp8,0,0.6290026505788168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,64,0,1,float16,fp8,0,0.5568319956461588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,64,0,1,fp8,fp8,0,0.4866666793823242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,float16,0,0.4860373338063558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,float16,0,0.5590720176696777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,fp8,0,0.4856799840927124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,64,128,1,fp8,fp8,0,0.63318932056427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,fp8,0,0.5534240007400513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,64,0,1,fp8,fp8,0,0.485701322555542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,float16,0,0.48842668533325195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,float16,0,0.5653119881947836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,fp8,0,0.4877013365427653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,64,128,1,fp8,fp8,0,0.6348640124003092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,fp8,0,0.564192016919454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,64,0,1,fp8,fp8,0,0.493557333946228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,float16,0,0.2823839982350667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,float16,0,0.3219573299090068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,fp8,0,0.2811200022697449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,64,128,1,fp8,fp8,0,0.35231999556223553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,fp8,0,0.3171306649843852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,64,0,1,fp8,fp8,0,0.2681173284848531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,float16,0,0.2555733323097229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,float16,0,0.2938239971796672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,fp8,0,0.2513440052668254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,64,128,1,fp8,fp8,0,0.3302239974339803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,fp8,0,0.2942453424135844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,64,0,1,fp8,fp8,0,0.24987733364105225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,64,128,1,float16,float16,0,0.25407467285792035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,64,0,1,float16,float16,0,0.29371732473373413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,64,128,1,float16,fp8,0,0.2557920018831889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,64,128,1,fp8,fp8,0,0.3338346481323242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,64,0,1,float16,fp8,0,0.2898826599121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,64,0,1,fp8,fp8,0,0.2504640022913615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,float16,0,0.2570986747741699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,float16,0,0.2937386631965637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,fp8,0,0.2553760011990865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,64,128,1,fp8,fp8,0,0.3328160047531128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,fp8,0,0.29356799523035687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,64,0,1,fp8,fp8,0,0.2521173357963562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,float16,0,0.2539359927177429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,float16,0,0.2940373420715332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,fp8,0,0.2577706575393677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,64,128,1,fp8,fp8,0,0.33752532800038654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,fp8,0,0.2935413320859273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,64,0,1,fp8,fp8,0,0.25490132967631024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,float16,0,0.15600533286730447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,float16,0,0.17145599921544394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,fp8,0,0.1546293298403422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,64,128,1,fp8,fp8,0,0.19580266873041788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,fp8,0,0.17056532700856528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,64,0,1,fp8,fp8,0,0.1458239952723185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,float16,0,0.1393280029296875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,float16,0,0.15186132987340292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,fp8,0,0.1395039955774943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,64,128,1,fp8,fp8,0,0.18435200055440268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,fp8,0,0.14963733156522116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,64,0,1,fp8,fp8,0,0.13522133231163025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,64,128,1,float16,float16,0,0.14050133029619852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,64,0,1,float16,float16,0,0.1518880029519399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,64,128,1,float16,fp8,0,0.14013333121935526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,64,128,1,fp8,fp8,0,0.18729599316914877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,64,0,1,float16,fp8,0,0.15373333295186362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,64,0,1,fp8,fp8,0,0.13622400164604187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,float16,0,0.13870400190353394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,float16,0,0.15220800042152405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,fp8,0,0.142085333665212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,64,128,1,fp8,fp8,0,0.1848533352216085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,fp8,0,0.1527733306090037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,64,0,1,fp8,fp8,0,0.13661866386731467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,float16,0,0.14165866374969482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,float16,0,0.1532853345076243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,fp8,0,0.14007467031478882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,64,128,1,fp8,fp8,0,0.1850666602452596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,fp8,0,0.1544373333454132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,64,0,1,fp8,fp8,0,0.13672000169754028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,float16,0,0.09065600236256917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,float16,0,0.09426132837931316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,fp8,0,0.09096533060073853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,64,128,1,fp8,fp8,0,0.11525866389274597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,fp8,0,0.09246399998664856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,64,0,1,fp8,fp8,0,0.08429333567619324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,float16,0,0.07977599898974101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,float16,0,0.08400000135103862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,fp8,0,0.08065066734949748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,64,128,1,fp8,fp8,0,0.10262399911880493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,fp8,0,0.08474666873613994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,64,0,1,fp8,fp8,0,0.07663999994595845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,64,128,1,float16,float16,0,0.07849066456158955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,64,0,1,float16,float16,0,0.08448533217112224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,64,128,1,float16,fp8,0,0.08054933448632558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,64,128,1,fp8,fp8,0,0.102101335922877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,64,0,1,float16,fp8,0,0.08591467142105103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,64,0,1,fp8,fp8,0,0.07580799857775371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,float16,0,0.07975466549396515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,float16,0,0.08462400237719218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,fp8,0,0.0792906681696574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,64,128,1,fp8,fp8,0,0.10195199648539226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,fp8,0,0.08518933256467183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,64,0,1,fp8,fp8,0,0.07561600208282471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,float16,0,0.08044800162315369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,float16,0,0.08481599887212117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,fp8,0,0.07971199850241344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,64,128,1,fp8,fp8,0,0.10444266597429912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,fp8,0,0.08409066994984944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,64,0,1,fp8,fp8,0,0.07564266522725423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,1,64,128,1,float16,float16,0,6.928464253743489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,1,64,0,1,float16,float16,0,6.916512171427409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,1,64,128,1,float16,fp8,0,6.900271733601888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,1,64,128,1,fp8,fp8,0,9.025029500325521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,1,64,0,1,float16,fp8,0,6.889914830525716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,1,64,0,1,fp8,fp8,0,5.979317347208659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,2,64,128,1,float16,float16,0,7.022223790486653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,2,64,0,1,float16,float16,0,7.025472005208333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,2,64,128,1,float16,fp8,0,7.045178731282552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,2,64,0,1,float16,fp8,0,6.986378351847331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,2,64,128,1,fp8,fp8,0,9.060159683227539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,2,64,0,1,fp8,fp8,0,6.028432210286458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,4,64,128,1,float16,float16,0,7.113653182983398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,4,64,0,1,float16,float16,0,7.08892822265625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,4,64,128,1,float16,fp8,0,7.026106516520183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,4,64,128,1,fp8,fp8,0,9.076533635457357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,4,64,0,1,float16,fp8,0,7.0895735422770185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,4,64,0,1,fp8,fp8,0,6.075050354003906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,8,64,128,1,float16,float16,0,7.220437367757161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,8,64,0,1,float16,float16,0,7.220266977945964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,8,64,128,1,float16,fp8,0,7.211418787638347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,8,64,0,1,float16,fp8,0,7.145792007446289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,96,64,128,1,float16,float16,0,3.9968748092651367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,8,64,128,1,fp8,fp8,0,9.310133616129557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,8,64,0,1,fp8,fp8,0,6.263434727986653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,96,64,0,1,float16,float16,0,3.9460748036702475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,96,64,128,1,float16,fp8,0,3.925919850667318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,96,64,128,1,fp8,fp8,0,4.867066701253255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,96,64,0,1,float16,fp8,0,3.9016586939493814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,96,64,0,1,fp8,fp8,0,3.3194665908813477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,1,64,128,1,float16,float16,0,3.468463897705078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,1,64,0,1,float16,float16,0,3.452026685078939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,1,64,128,1,float16,fp8,0,3.4517599741617837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,1,64,128,1,fp8,fp8,0,4.484794616699219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,1,64,0,1,fp8,fp8,0,2.9941813151041665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,1,64,0,1,float16,fp8,0,3.4506826400756836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,2,64,128,1,float16,float16,0,3.5143200556437173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,2,64,0,1,float16,float16,0,3.481114705403646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,2,64,128,1,float16,fp8,0,3.4867626825968423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,2,64,0,1,fp8,fp8,0,3.0026772816975913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,2,64,0,1,float16,fp8,0,3.4790080388387046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,2,64,128,1,fp8,fp8,0,4.536666552225749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,4,64,128,1,float16,float16,0,3.532133420308431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,4,64,0,1,float16,float16,0,3.520106633504232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,4,64,128,1,float16,fp8,0,3.5384000142415366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,4,64,128,1,fp8,fp8,0,4.5348161061604815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,4,64,0,1,float16,fp8,0,3.5268214543660483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,4,64,0,1,fp8,fp8,0,3.0246238708496094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,8,64,128,1,float16,float16,0,3.5989761352539062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,8,64,0,1,float16,float16,0,3.602341334025065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,8,64,128,1,float16,fp8,0,3.606389363606771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,8,64,128,1,fp8,fp8,0,4.641077359517415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,8,64,0,1,float16,fp8,0,3.5965067545572915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,8,64,0,1,fp8,fp8,0,3.107050577799479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,96,64,128,1,float16,float16,0,2.0006027221679688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,96,64,0,1,float16,float16,0,1.9796373049418132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,96,64,128,1,float16,fp8,0,1.9547573725382488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,96,64,128,1,fp8,fp8,0,2.43778133392334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,96,64,0,1,float16,fp8,0,1.9493759473164876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,96,64,0,1,fp8,fp8,0,1.6649333635965984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,1,64,128,1,float16,float16,0,1.749733289082845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,1,64,0,1,float16,float16,0,1.7234560648600261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,1,64,128,1,float16,fp8,0,1.7504053115844727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,1,64,128,1,fp8,fp8,0,2.2511092821756997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,1,64,0,1,float16,fp8,0,1.7234400113423665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,1,64,0,1,fp8,fp8,0,1.4980160395304363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,2,64,128,1,float16,float16,0,1.7555360794067383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,2,64,0,1,float16,float16,0,1.741125265757243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,2,64,128,1,float16,fp8,0,1.7611039479573567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,2,64,128,1,fp8,fp8,0,2.2635253270467124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,2,64,0,1,float16,fp8,0,1.7393867174784343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,2,64,0,1,fp8,fp8,0,1.5031627019246419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,4,64,128,1,float16,float16,0,1.750874678293864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,4,64,0,1,float16,float16,0,1.7462986310323079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,8,64,0,1,float16,float16,0,1.78876797358195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,4,64,128,1,float16,fp8,0,1.764560063680013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,4,64,128,1,fp8,fp8,0,2.2873706817626953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,4,64,0,1,float16,fp8,0,1.749872048695882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,4,64,0,1,fp8,fp8,0,1.5122507413228352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,8,64,128,1,float16,float16,0,1.8083680470784504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,8,64,128,1,float16,fp8,0,1.7975680033365886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,8,64,128,1,fp8,fp8,0,2.329909324645996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,8,64,0,1,float16,fp8,0,1.7906400362650554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,96,64,128,1,float16,float16,0,1.0064160029093425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,8,64,0,1,fp8,fp8,0,1.5586133003234863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,96,64,0,1,float16,float16,0,0.9932533105214437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,96,64,128,1,float16,fp8,0,0.9818133513132731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,96,64,128,1,fp8,fp8,0,1.2202026844024658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,96,64,0,1,float16,fp8,0,0.9769120216369629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,96,64,0,1,fp8,fp8,0,0.8348906834920248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,1,64,128,1,float16,float16,0,0.8842079639434814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,1,64,0,1,float16,float16,0,0.8768746852874756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,1,64,128,1,float16,fp8,0,0.8874080181121826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,1,64,128,1,fp8,fp8,0,1.1511893272399902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,1,64,0,1,float16,fp8,0,0.8733920256296793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,1,64,0,1,fp8,fp8,0,0.7540853023529053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,2,64,128,1,float16,float16,0,0.8959840138753256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,2,64,0,1,float16,float16,0,0.8821866512298584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,2,64,128,1,float16,fp8,0,0.8902133305867513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,2,64,128,1,fp8,fp8,0,1.1496533552805583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,2,64,0,1,float16,fp8,0,0.8810506661732992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,2,64,0,1,fp8,fp8,0,0.7627200285593668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,4,64,128,1,float16,float16,0,0.890442689259847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,4,64,0,1,float16,float16,0,0.879472017288208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,4,64,128,1,float16,fp8,0,0.8956426779429117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,4,64,128,1,fp8,fp8,0,1.1551946798960369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,4,64,0,1,float16,fp8,0,0.8835519949595133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,4,64,0,1,fp8,fp8,0,0.7651200294494629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,8,64,128,1,float16,float16,0,0.9049973487854004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,8,64,0,1,float16,float16,0,0.8932212988535563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,8,64,128,1,float16,fp8,0,0.8979946772257487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,8,64,128,1,fp8,fp8,0,1.1604639689127605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,8,64,0,1,float16,fp8,0,0.8919626871744791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,8,64,0,1,fp8,fp8,0,0.7749706904093424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,96,64,128,1,float16,float16,0,0.5158506631851196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,96,64,0,1,float16,float16,0,0.5081973473230997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,96,64,128,1,float16,fp8,0,0.5090239842732748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,96,64,128,1,fp8,fp8,0,0.6355306704839071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,96,64,0,1,float16,fp8,0,0.4987200101216634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,96,64,0,1,fp8,fp8,0,0.4267306725184123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,1,64,128,1,float16,float16,0,0.45864001909891766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,1,64,0,1,float16,float16,0,0.45208533604939777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,1,64,128,1,float16,fp8,0,0.4587413469950358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,1,64,128,1,fp8,fp8,0,0.5921599864959717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,1,64,0,1,float16,fp8,0,0.45187731583913165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,1,64,0,1,fp8,fp8,0,0.3925439914067586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,2,64,128,1,float16,float16,0,0.45841066042582196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,2,64,0,1,float16,float16,0,0.45154134432474774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,2,64,128,1,float16,fp8,0,0.46036799748738605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,2,64,128,1,fp8,fp8,0,0.5928320089975992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,2,64,0,1,float16,fp8,0,0.45205334822336835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,2,64,0,1,fp8,fp8,0,0.391701340675354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,4,64,128,1,float16,float16,0,0.46074668566385907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,4,64,0,1,float16,float16,0,0.4538240035374959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,4,64,128,1,float16,fp8,0,0.4589173396428426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,4,64,128,1,fp8,fp8,0,0.5945226748784384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,4,64,0,1,float16,fp8,0,0.4551733334859212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,4,64,0,1,fp8,fp8,0,0.3951679865519206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,8,64,128,1,float16,float16,0,0.46536533037821454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,8,64,0,1,float16,float16,0,0.45923201243082684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,8,64,128,1,float16,fp8,0,0.4643626610438029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,8,64,128,1,fp8,fp8,0,0.6030720074971517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,8,64,0,1,float16,fp8,0,0.4558346668879191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,8,64,0,1,fp8,fp8,0,0.39826667308807373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,96,64,128,1,float16,float16,0,0.27349867423375446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,96,64,0,1,float16,float16,0,0.2674773335456848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,96,64,128,1,float16,fp8,0,0.2693120042483012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,96,64,128,1,fp8,fp8,0,0.3354080120722453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,96,64,0,1,float16,fp8,0,0.26313066482543945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,96,64,0,1,fp8,fp8,0,0.22419732809066772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,1,64,128,1,float16,float16,0,0.24165334304173788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,1,64,0,1,float16,float16,0,0.2375146746635437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,1,64,128,1,float16,fp8,0,0.24422933657964072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,1,64,128,1,fp8,fp8,0,0.31486932436625165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,1,64,0,1,float16,fp8,0,0.2394933303197225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,1,64,0,1,fp8,fp8,0,0.2066719929377238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,2,64,128,1,float16,float16,0,0.24310400088628134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,2,64,0,1,float16,float16,0,0.23881600300470987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,2,64,128,1,float16,fp8,0,0.24329066276550293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,2,64,128,1,fp8,fp8,0,0.3126666744550069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,2,64,0,1,float16,fp8,0,0.24001065889994302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,2,64,0,1,fp8,fp8,0,0.20589866240819296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,4,64,128,1,float16,float16,0,0.24322134256362915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,4,64,0,1,float16,float16,0,0.2398773431777954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,4,64,128,1,float16,fp8,0,0.24458134174346924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,4,64,128,1,fp8,fp8,0,0.31574400266011554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,4,64,0,1,float16,fp8,0,0.23880533377329508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,4,64,0,1,fp8,fp8,0,0.20648000637690225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,8,64,128,1,float16,float16,0,0.2465333342552185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,8,64,0,1,float16,float16,0,0.2407360076904297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,8,64,128,1,float16,fp8,0,0.24472532669703165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,8,64,128,1,fp8,fp8,0,0.318938672542572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,8,64,0,1,float16,fp8,0,0.24170132478078207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,8,64,0,1,fp8,fp8,0,0.2095200022061666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,96,64,128,1,float16,float16,0,0.14973333477973938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,96,64,0,1,float16,float16,0,0.14855466286341348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,96,64,128,1,float16,fp8,0,0.15037866433461508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,96,64,128,1,fp8,fp8,0,0.18464533487955728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,96,64,0,1,float16,fp8,0,0.14778133233388266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,96,64,0,1,fp8,fp8,0,0.12392533818880717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,1,64,128,1,float16,float16,0,0.13314666350682577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,1,64,0,1,float16,float16,0,0.12634666760762533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,1,64,128,1,float16,fp8,0,0.13371200362841287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,1,64,128,1,fp8,fp8,0,0.17550400892893472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,1,64,0,1,float16,fp8,0,0.12807466586430868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,1,64,0,1,fp8,fp8,0,0.11365333199501038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,2,64,128,1,float16,float16,0,0.1344000001748403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,2,64,0,1,float16,float16,0,0.12777066230773926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,2,64,128,1,float16,fp8,0,0.13406399885813394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,2,64,128,1,fp8,fp8,0,0.17340266704559326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,2,64,0,1,float16,fp8,0,0.12714133659998575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,2,64,0,1,fp8,fp8,0,0.1136853297551473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,4,64,128,1,float16,float16,0,0.13492266337076822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,4,64,0,1,float16,float16,0,0.12794666488965353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,4,64,128,1,float16,fp8,0,0.13614400227864584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,4,64,128,1,fp8,fp8,0,0.1743626594543457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,4,64,0,1,float16,fp8,0,0.12876266241073608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,4,64,0,1,fp8,fp8,0,0.11412800351778667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,8,64,128,1,float16,float16,0,0.13409599661827087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,8,64,0,1,float16,float16,0,0.12925333778063455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,8,64,128,1,float16,fp8,0,0.13596266508102417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,8,64,128,1,fp8,fp8,0,0.1762133240699768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,8,64,0,1,float16,fp8,0,0.12931733330090842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,8,64,0,1,fp8,fp8,0,0.11581333478291829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,96,64,128,1,float16,float16,0,0.08763733506202698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,96,64,0,1,float16,float16,0,0.08163199822107951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,96,64,128,1,float16,fp8,0,0.08725333213806152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,96,64,128,1,fp8,fp8,0,0.10970133543014526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,96,64,0,1,float16,fp8,0,0.08117333551247914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,96,64,0,1,fp8,fp8,0,0.07357866565386455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,1,64,128,1,float16,float16,0,0.07701333363850911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,1,64,0,1,float16,float16,0,0.07151466608047485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,1,64,128,1,float16,fp8,0,0.07659733295440674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,1,64,128,1,fp8,fp8,0,0.09524800380071004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,1,64,0,1,float16,fp8,0,0.07206400235493977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,1,64,0,1,fp8,fp8,0,0.06407999992370605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,2,64,128,1,float16,float16,0,0.0771679977575938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,2,64,0,1,float16,float16,0,0.07217066486676534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,2,64,128,1,float16,fp8,0,0.07776533563931783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,2,64,128,1,fp8,fp8,0,0.0979306697845459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,2,64,0,1,float16,fp8,0,0.07218133409818013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,2,64,0,1,fp8,fp8,0,0.0642080008983612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,4,64,128,1,float16,float16,0,0.0772213339805603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,4,64,0,1,float16,float16,0,0.07146133482456207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,4,64,128,1,float16,fp8,0,0.07793599863847096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,4,64,128,1,fp8,fp8,0,0.09762133161226909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,4,64,0,1,float16,fp8,0,0.07267733414967854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,4,64,0,1,fp8,fp8,0,0.06542400022347768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,8,64,128,1,float16,float16,0,0.07753600180149078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,8,64,0,1,float16,float16,0,0.07246933380762736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,8,64,128,1,float16,fp8,0,0.07851733267307281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,8,64,128,1,fp8,fp8,0,0.09946667154630025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,8,64,0,1,float16,fp8,0,0.07358400026957194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,8,64,0,1,fp8,fp8,0,0.0650079995393753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,96,64,128,1,float16,float16,0,0.055120001236597695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,96,64,0,1,float16,float16,0,0.05017599960168203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,96,64,128,1,float16,fp8,0,0.053360000252723694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,96,64,128,1,fp8,fp8,0,0.07016533116499583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,96,64,0,1,float16,fp8,0,0.04985600213209788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,96,64,0,1,fp8,fp8,0,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,1,64,128,1,float16,float16,0,0.05123733480771383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,1,64,0,1,float16,float16,0,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,1,64,128,1,float16,fp8,0,0.05208533505598704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,1,64,128,1,fp8,fp8,0,0.06467733283837636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,1,64,0,1,float16,fp8,0,0.04635733366012573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,2,64,0,1,float16,fp8,0,0.04734933376312256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,1,64,0,1,fp8,fp8,0,0.0425600012143453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,2,64,128,1,float16,float16,0,0.051114668448766075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,2,64,0,1,float16,float16,0,0.04655999938646952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,2,64,128,1,float16,fp8,0,0.05194666484991709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,2,64,128,1,fp8,fp8,0,0.06369066735108693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,2,64,0,1,fp8,fp8,0,0.04303466777006785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,4,64,128,1,float16,float16,0,0.05151999990145365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,4,64,0,1,float16,float16,0,0.04691733419895172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,4,64,128,1,float16,fp8,0,0.05091733237107595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,4,64,128,1,fp8,fp8,0,0.06467733283837636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,4,64,0,1,float16,fp8,0,0.04673600196838379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,4,64,0,1,fp8,fp8,0,0.043375998735427856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,8,64,128,1,float16,float16,0,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,8,64,0,1,float16,float16,0,0.04744000236193339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,8,64,128,1,float16,fp8,0,0.052015999952952065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,8,64,128,1,fp8,fp8,0,0.06552533308664958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,8,64,0,1,float16,fp8,0,0.04724800089995066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,8,64,0,1,fp8,fp8,0,0.04322666426499685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,1,64,128,1,float16,float16,0,2.88590939839681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,1,64,0,1,float16,float16,0,2.795813242594401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,1,64,128,1,float16,fp8,0,2.8825012842814126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,1,64,0,1,float16,fp8,0,2.7933225631713867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,1,64,128,1,fp8,fp8,0,3.9401652018229165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,1,64,0,1,fp8,fp8,0,2.5514559745788574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,2,64,128,1,float16,float16,0,2.911856015523275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,2,64,0,1,float16,float16,0,2.8193012873331704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,2,64,128,1,float16,fp8,0,2.935152053833008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,2,64,128,1,fp8,fp8,0,3.9632479349772134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,2,64,0,1,float16,fp8,0,2.8292160034179688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,2,64,0,1,fp8,fp8,0,2.5847946802775064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,4,64,128,1,float16,float16,0,2.9524799982706704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,4,64,0,1,float16,float16,0,2.8592265446980796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,4,64,128,1,float16,fp8,0,2.955648104349772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,4,64,128,1,fp8,fp8,0,3.9820213317871094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,4,64,0,1,float16,fp8,0,2.8764638900756836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,4,64,0,1,fp8,fp8,0,2.6094560623168945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,8,64,128,1,float16,float16,0,3.0321172078450522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,8,64,0,1,float16,float16,0,2.9478880564371743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,8,64,128,1,float16,fp8,0,3.036527951558431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,8,64,128,1,fp8,fp8,0,4.058570543924968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,8,64,0,1,float16,fp8,0,2.926426569620768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,8,64,0,1,fp8,fp8,0,2.689157485961914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,96,64,128,1,float16,float16,0,1.6815253893534343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,96,64,0,1,float16,float16,0,1.645535945892334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,96,64,128,1,float16,fp8,0,1.6365440686543782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,96,64,128,1,fp8,fp8,0,2.145909309387207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,96,64,0,1,float16,fp8,0,1.616927941640218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,96,64,0,1,fp8,fp8,0,1.4498292605082195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,1,64,128,1,float16,float16,0,1.4427092870076497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,1,64,0,1,float16,float16,0,1.4022666613260906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,1,64,128,1,float16,fp8,0,1.4494773546854656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,1,64,128,1,fp8,fp8,0,1.9737760225931804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,1,64,0,1,float16,fp8,0,1.3962880770365398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,1,64,0,1,fp8,fp8,0,1.2821386655171711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,2,64,128,1,float16,float16,0,1.468821366628011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,2,64,0,1,float16,float16,0,1.4142452875773113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,2,64,128,1,float16,fp8,0,1.4588106473286946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,2,64,0,1,float16,fp8,0,1.4228746096293132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,2,64,0,1,fp8,fp8,0,1.2889599800109863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,2,64,128,1,fp8,fp8,0,1.9811679522196453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,4,64,128,1,float16,float16,0,1.4636106491088867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,4,64,0,1,float16,float16,0,1.4288214047749836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,4,64,128,1,float16,fp8,0,1.4707519213358562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,4,64,128,1,fp8,fp8,0,1.9841705958048503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,4,64,0,1,float16,fp8,0,1.4186347325642903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,4,64,0,1,fp8,fp8,0,1.3050506909688313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,8,64,128,1,float16,float16,0,1.4964267412821453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,8,64,0,1,float16,float16,0,1.459664026896159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,8,64,128,1,float16,fp8,0,1.4888532956441243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,8,64,128,1,fp8,fp8,0,2.023039976755778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,8,64,0,1,float16,fp8,0,1.4489760398864746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,96,64,128,1,float16,float16,0,0.8451786835988363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,8,64,0,1,fp8,fp8,0,1.3449920018513997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,96,64,0,1,float16,float16,0,0.8273066679636637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,96,64,128,1,float16,fp8,0,0.8300000031789144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,96,64,128,1,fp8,fp8,0,1.0734346707661946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,96,64,0,1,float16,fp8,0,0.8101440270741781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,96,64,0,1,fp8,fp8,0,0.7273706595102946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,1,64,128,1,float16,float16,0,0.7331360181172689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,1,64,0,1,float16,float16,0,0.7099200089772543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,1,64,128,1,float16,fp8,0,0.7315839926401774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,1,64,128,1,fp8,fp8,0,0.9954346815745035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,1,64,0,1,float16,fp8,0,0.7123626867930094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,1,64,0,1,fp8,fp8,0,0.6527359882990519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,2,64,128,1,float16,float16,0,0.7338773409525553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,2,64,0,1,float16,float16,0,0.7192320028940836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,2,64,128,1,float16,fp8,0,0.7383786837259928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,2,64,128,1,fp8,fp8,0,0.9969973564147949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,2,64,0,1,float16,fp8,0,0.7142613728841146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,2,64,0,1,fp8,fp8,0,0.6548746824264526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,4,64,128,1,float16,float16,0,0.7380479971567789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,4,64,0,1,float16,float16,0,0.7188159624735514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,4,64,128,1,float16,fp8,0,0.7369279861450195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,4,64,128,1,fp8,fp8,0,1.0068960189819336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,4,64,0,1,float16,fp8,0,0.7208106517791748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,4,64,0,1,fp8,fp8,0,0.6567039887110392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,8,64,128,1,float16,float16,0,0.7459359963734945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,8,64,0,1,float16,float16,0,0.7274986902872721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,8,64,128,1,float16,fp8,0,0.7465279897054037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,8,64,128,1,fp8,fp8,0,1.0111040274302165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,8,64,0,1,float16,fp8,0,0.7239573001861572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,96,64,128,1,float16,float16,0,0.43779198328653973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,8,64,0,1,fp8,fp8,0,0.6726400057474772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,96,64,0,1,float16,float16,0,0.4283039967219035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,96,64,128,1,float16,fp8,0,0.4296319882074992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,96,64,128,1,fp8,fp8,0,0.5466986497243246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,96,64,0,1,float16,fp8,0,0.4218826691309611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,96,64,0,1,fp8,fp8,0,0.3715466658274333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,1,64,128,1,float16,float16,0,0.3768693208694458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,1,64,0,1,float16,float16,0,0.3685386578241984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,1,64,128,1,float16,fp8,0,0.3794186512629191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,1,64,128,1,fp8,fp8,0,0.5056426525115967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,1,64,0,1,float16,fp8,0,0.36854398250579834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,1,64,0,1,fp8,fp8,0,0.3391679922739665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,2,64,128,1,float16,float16,0,0.3800746599833171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,2,64,0,1,float16,float16,0,0.3699359893798828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,2,64,128,1,float16,fp8,0,0.3786826531092326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,2,64,128,1,fp8,fp8,0,0.5111573139826456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,2,64,0,1,float16,fp8,0,0.3710613250732422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,2,64,0,1,fp8,fp8,0,0.3386880159378052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,4,64,128,1,float16,float16,0,0.3806133270263672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,4,64,0,1,float16,float16,0,0.3720000187555949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,4,64,128,1,float16,fp8,0,0.3813600142796834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,4,64,128,1,fp8,fp8,0,0.5096319913864136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,4,64,0,1,float16,fp8,0,0.36901867389678955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,4,64,0,1,fp8,fp8,0,0.34384000301361084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,8,64,128,1,float16,float16,0,0.385103980700175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,8,64,0,1,float16,float16,0,0.37511467933654785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,8,64,128,1,float16,fp8,0,0.3851360082626343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,8,64,128,1,fp8,fp8,0,0.5144480069478353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,8,64,0,1,float16,fp8,0,0.3752640088399251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,8,64,0,1,fp8,fp8,0,0.34353065490722656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,96,64,128,1,float16,float16,0,0.23082667589187622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,96,64,0,1,float16,float16,0,0.22985066970189413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,96,64,128,1,float16,fp8,0,0.2294666568438212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,96,64,128,1,fp8,fp8,0,0.28198399146397907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,96,64,0,1,float16,fp8,0,0.2262186606725057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,96,64,0,1,fp8,fp8,0,0.19849065939585367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,1,64,128,1,float16,float16,0,0.1997013290723165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,1,64,0,1,float16,float16,0,0.19357866048812866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,1,64,128,1,float16,fp8,0,0.19924799601236978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,1,64,128,1,fp8,fp8,0,0.26710400978724164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,1,64,0,1,float16,fp8,0,0.1929546594619751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,1,64,0,1,fp8,fp8,0,0.18105065822601318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,2,64,128,1,float16,float16,0,0.19839467604955038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,2,64,0,1,float16,float16,0,0.19419733683268228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,2,64,128,1,float16,fp8,0,0.20021865765253702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,2,64,128,1,fp8,fp8,0,0.2653013269106547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,2,64,0,1,float16,fp8,0,0.19320533672968546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,2,64,0,1,fp8,fp8,0,0.18308266003926596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,4,64,128,1,float16,float16,0,0.19965332746505737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,4,64,0,1,float16,float16,0,0.19377066691716513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,4,64,128,1,float16,fp8,0,0.2015893260637919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,4,64,128,1,fp8,fp8,0,0.266975998878479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,4,64,0,1,float16,fp8,0,0.19537599881490073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,4,64,0,1,fp8,fp8,0,0.18254933754603067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,8,64,128,1,float16,float16,0,0.20195732514063516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,8,64,0,1,float16,float16,0,0.19723733266194662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,8,64,128,1,float16,fp8,0,0.20165334145228067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,8,64,128,1,fp8,fp8,0,0.2674506704012553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,8,64,0,1,float16,fp8,0,0.19734932978947958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,8,64,0,1,fp8,fp8,0,0.18468799193700156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,96,64,128,1,float16,float16,0,0.12829867005348206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,96,64,0,1,float16,float16,0,0.1269493301709493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,96,64,128,1,float16,fp8,0,0.12599999705950418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,1,64,128,1,fp8,fp8,0,0.14193066954612732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,96,64,128,1,fp8,fp8,0,0.15345600247383118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,96,64,0,1,float16,fp8,0,0.12475200494130452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,96,64,0,1,fp8,fp8,0,0.11044800281524658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,1,64,128,1,float16,float16,0,0.10602133472760518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,1,64,0,1,float16,float16,0,0.10566932956377666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,1,64,128,1,float16,fp8,0,0.10639466842015584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,1,64,0,1,float16,fp8,0,0.10387200117111206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,1,64,0,1,fp8,fp8,0,0.1013866662979126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,2,64,128,1,float16,float16,0,0.1069653332233429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,2,64,0,1,float16,float16,0,0.10398933291435242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,2,64,128,1,float16,fp8,0,0.10662933190663655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,2,64,128,1,fp8,fp8,0,0.1423786679903666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,2,64,0,1,float16,fp8,0,0.1039520005385081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,2,64,0,1,fp8,fp8,0,0.10140800476074219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,4,64,128,1,float16,float16,0,0.10669333736101787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,4,64,0,1,float16,float16,0,0.10404266913731892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,4,64,128,1,float16,fp8,0,0.107232004404068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,4,64,128,1,fp8,fp8,0,0.14317333698272705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,4,64,0,1,float16,fp8,0,0.1043839951356252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,4,64,0,1,fp8,fp8,0,0.10274133086204529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,8,64,128,1,float16,float16,0,0.1083626647790273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,8,64,0,1,float16,float16,0,0.10499733686447144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,8,64,128,1,float16,fp8,0,0.10772266983985901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,8,64,128,1,fp8,fp8,0,0.1434346636136373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,8,64,0,1,float16,fp8,0,0.10541333754857381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,8,64,0,1,fp8,fp8,0,0.10422399640083313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,96,64,128,1,float16,float16,0,0.07375999788443248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,96,64,0,1,float16,float16,0,0.07576533158620198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,96,64,128,1,float16,fp8,0,0.07369066774845123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,96,64,128,1,fp8,fp8,0,0.0872213343779246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,96,64,0,1,float16,fp8,0,0.0738613357146581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,96,64,0,1,fp8,fp8,0,0.06604266663392384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,1,64,128,1,float16,float16,0,0.0634080022573471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,1,64,0,1,float16,float16,0,0.06363200147946675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,1,64,128,1,float16,fp8,0,0.06267733375231425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,1,64,128,1,fp8,fp8,0,0.07821866869926453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,1,64,0,1,float16,fp8,0,0.06355733176072438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,1,64,0,1,fp8,fp8,0,0.05780800183614095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,2,64,128,1,float16,float16,0,0.06177066763242086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,2,64,0,1,float16,float16,0,0.06242666641871134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,2,64,128,1,float16,fp8,0,0.062261333068211876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,2,64,128,1,fp8,fp8,0,0.07825600107510884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,2,64,0,1,float16,fp8,0,0.06352533400058746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,2,64,0,1,fp8,fp8,0,0.057850668827692665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,4,64,128,1,float16,float16,0,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,4,64,0,1,float16,float16,0,0.06195733447869619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,4,64,128,1,float16,fp8,0,0.06289066871007283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,4,64,128,1,fp8,fp8,0,0.0780266672372818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,4,64,0,1,float16,fp8,0,0.06502933303515117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,4,64,0,1,fp8,fp8,0,0.05752533177534739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,8,64,128,1,float16,float16,0,0.06314133107662201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,8,64,0,1,float16,float16,0,0.06380266447861989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,8,64,128,1,float16,fp8,0,0.06306133170922597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,8,64,128,1,fp8,fp8,0,0.0792799989382426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,8,64,0,1,float16,fp8,0,0.06289066871007283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,8,64,0,1,fp8,fp8,0,0.05773333211739858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,96,64,128,1,float16,float16,0,0.04418666660785675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,96,64,0,1,float16,float16,0,0.04264533519744873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,96,64,128,1,float16,fp8,0,0.04404800136884054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,96,64,128,1,fp8,fp8,0,0.05207466582457224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,96,64,0,1,float16,fp8,0,0.042490666111310325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,96,64,0,1,fp8,fp8,0,0.041365332901477814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,1,64,128,1,float16,float16,0,0.04098666707674662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,1,64,0,1,float16,float16,0,0.039808000127474465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,1,64,128,1,float16,fp8,0,0.04161066561937332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,1,64,128,1,fp8,fp8,0,0.04811733464399973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,1,64,0,1,float16,fp8,0,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,1,64,0,1,fp8,fp8,0,0.0383093332250913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,2,64,128,1,float16,float16,0,0.04068800061941147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,2,64,0,1,float16,float16,0,0.0401653324564298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,2,64,128,1,float16,fp8,0,0.04141333450873693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,2,64,128,1,fp8,fp8,0,0.04805333415667216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,2,64,0,1,float16,fp8,0,0.04038933416207632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,2,64,0,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,4,64,128,1,float16,float16,0,0.040922666589419045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,4,64,0,1,float16,float16,0,0.03992533435424169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,4,64,128,1,float16,fp8,0,0.041349334021409355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,4,64,128,1,fp8,fp8,0,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,4,64,0,1,float16,fp8,0,0.040549332896868386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,4,64,0,1,fp8,fp8,0,0.038549333810806274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,8,64,128,1,float16,float16,0,0.04127999891837438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,8,64,0,1,float16,float16,0,0.04030933231115341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,8,64,128,1,float16,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,8,64,128,1,fp8,fp8,0,0.0484746644894282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,8,64,0,1,float16,fp8,0,0.0403413325548172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,8,64,0,1,fp8,fp8,0,0.03832533210515976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,96,64,128,1,float16,float16,0,0.031114667654037476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,96,64,0,1,float16,float16,0,0.03107200066248576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,96,64,128,1,float16,fp8,0,0.03176533430814743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,96,64,128,1,fp8,fp8,0,0.03669866671164831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,96,64,0,1,float16,fp8,0,0.030975999931494396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,96,64,0,1,fp8,fp8,0,0.029802667597929638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,1,64,128,1,float16,float16,0,0.029157333076000214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,1,64,0,1,float16,float16,0,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,1,64,128,1,float16,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,1,64,128,1,fp8,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,1,64,0,1,float16,fp8,0,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,1,64,0,1,fp8,fp8,0,0.028864001234372456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,2,64,128,1,float16,float16,0,0.029109333952267964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,2,64,0,1,float16,float16,0,0.02868266652027766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,2,64,128,1,float16,fp8,0,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,2,64,128,1,fp8,fp8,0,0.03491200009981791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,2,64,0,1,float16,fp8,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,2,64,0,1,fp8,fp8,0,0.028965334097544353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,4,64,128,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,4,64,0,1,float16,float16,0,0.029050665597120922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,4,64,128,1,float16,fp8,0,0.029882666965325672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,4,64,128,1,fp8,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,4,64,0,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,4,64,0,1,fp8,fp8,0,0.02824000020821889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,8,64,128,1,float16,float16,0,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,8,64,0,1,float16,float16,0,0.02882133424282074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,8,64,128,1,float16,fp8,0,0.02992533395687739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,8,64,128,1,fp8,fp8,0,0.03586666782697042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,8,64,0,1,float16,fp8,0,0.029109333952267964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,8,64,0,1,fp8,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,1,64,128,1,float16,float16,0,1.0430293083190918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,1,64,0,1,float16,float16,0,1.0470293362935383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,1,64,128,1,float16,fp8,0,1.0478613376617432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,1,64,128,1,fp8,fp8,0,1.2702240149180095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,1,64,0,1,float16,fp8,0,1.0428640047709148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,1,64,0,1,fp8,fp8,0,1.270522673924764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,2,64,128,1,float16,float16,0,1.0707733631134033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,2,64,0,1,float16,float16,0,1.0669173399607341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,2,64,128,1,float16,fp8,0,1.0636213620503743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,2,64,128,1,fp8,fp8,0,1.2786933581034343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,2,64,0,1,float16,fp8,0,1.0603626569112141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,4,64,128,1,float16,float16,0,1.066314697265625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,2,64,0,1,fp8,fp8,0,1.2767893473307292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,4,64,0,1,float16,float16,0,1.0713706811269124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,4,64,128,1,float16,fp8,0,1.062933365503947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,4,64,128,1,fp8,fp8,0,1.2783786455790203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,4,64,0,1,float16,fp8,0,1.06112535794576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,4,64,0,1,fp8,fp8,0,1.2743573188781738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,8,64,128,1,float16,float16,0,1.104698657989502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,8,64,0,1,float16,float16,0,1.1074293454488118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,8,64,128,1,float16,fp8,0,1.1030453046162922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,8,64,128,1,fp8,fp8,0,1.3272373676300049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,8,64,0,1,float16,fp8,0,1.1020533243815105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,8,64,0,1,fp8,fp8,0,1.3288906415303547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,96,64,128,1,float16,float16,0,0.645855983098348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,96,64,0,1,float16,float16,0,0.6474239826202393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,96,64,128,1,float16,fp8,0,0.6349279880523682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,96,64,128,1,fp8,fp8,0,0.7134079933166504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,96,64,0,1,float16,fp8,0,0.6301546494166056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,96,64,0,1,fp8,fp8,0,0.720800002415975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,1,64,128,1,float16,float16,0,0.5313119888305664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,1,64,0,1,float16,float16,0,0.530074675877889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,1,64,128,1,float16,fp8,0,0.529802680015564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,1,64,128,1,fp8,fp8,0,0.6437439918518066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,1,64,0,1,float16,fp8,0,0.5318826834360758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,1,64,0,1,fp8,fp8,0,0.6413813432057699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,2,64,128,1,float16,float16,0,0.5390293200810751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,2,64,0,1,float16,float16,0,0.5386773347854614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,2,64,128,1,float16,fp8,0,0.5368959903717041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,2,64,128,1,fp8,fp8,0,0.6480960051218668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,2,64,0,1,float16,fp8,0,0.535210649172465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,2,64,0,1,fp8,fp8,0,0.6472586790720621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,4,64,128,1,float16,float16,0,0.5383573373158773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,4,64,0,1,float16,float16,0,0.5369386672973633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,4,64,128,1,float16,fp8,0,0.5360053380330404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,4,64,128,1,fp8,fp8,0,0.648698647816976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,4,64,0,1,float16,fp8,0,0.5386666854222616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,4,64,0,1,fp8,fp8,0,0.6494933366775513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,8,64,128,1,float16,float16,0,0.5467040141423544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,96,64,128,1,float16,float16,0,0.3332266608874003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,8,64,0,1,float16,float16,0,0.5508053302764893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,8,64,128,1,float16,fp8,0,0.5462079842885336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,8,64,128,1,fp8,fp8,0,0.6665066480636597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,8,64,0,1,float16,fp8,0,0.5457760095596313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,8,64,0,1,fp8,fp8,0,0.6698026657104492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,96,64,0,1,float16,float16,0,0.333568016688029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,96,64,128,1,float16,fp8,0,0.3272213339805603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,96,64,128,1,fp8,fp8,0,0.36937065919240314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,96,64,0,1,float16,fp8,0,0.3269333243370056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,96,64,0,1,fp8,fp8,0,0.3682719866434733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,1,64,128,1,float16,float16,0,0.2735520005226135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,1,64,0,1,float16,float16,0,0.2744106650352478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,1,64,128,1,float16,fp8,0,0.2744586666425069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,1,64,128,1,fp8,fp8,0,0.33238933483759564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,1,64,0,1,float16,fp8,0,0.2741493384043376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,1,64,0,1,fp8,fp8,0,0.3338826497395833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,2,64,128,1,float16,float16,0,0.2742240031560262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,2,64,0,1,float16,float16,0,0.2736053268114726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,2,64,128,1,float16,fp8,0,0.2742240031560262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,2,64,128,1,fp8,fp8,0,0.3349333206812541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,2,64,0,1,float16,fp8,0,0.2751573324203491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,2,64,0,1,fp8,fp8,0,0.33345599969228107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,4,64,128,1,float16,float16,0,0.2757706642150879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,4,64,0,1,float16,float16,0,0.27669332424799603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,4,64,128,1,float16,fp8,0,0.2757546703020732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,4,64,128,1,fp8,fp8,0,0.33455999692281085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,4,64,0,1,float16,fp8,0,0.2770506739616394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,4,64,0,1,fp8,fp8,0,0.33640531698862713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,8,64,128,1,float16,float16,0,0.2811573346455892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,8,64,0,1,float16,float16,0,0.28012265761693317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,8,64,128,1,float16,fp8,0,0.2813013394673665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,8,64,128,1,fp8,fp8,0,0.34167468547821045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,8,64,0,1,float16,fp8,0,0.28019734223683673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,8,64,0,1,fp8,fp8,0,0.34055999914805096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,96,64,128,1,float16,float16,0,0.17691200971603394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,96,64,0,1,float16,float16,0,0.17861332496007284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,96,64,128,1,float16,fp8,0,0.17352533340454102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,96,64,128,1,fp8,fp8,0,0.19719467560450235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,96,64,0,1,float16,fp8,0,0.17435733477274576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,96,64,0,1,fp8,fp8,0,0.19708800315856934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,1,64,128,1,float16,float16,0,0.1455626686414083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,1,64,0,1,float16,float16,0,0.14484799901644388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,1,64,128,1,float16,fp8,0,0.14477333426475525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,1,64,128,1,fp8,fp8,0,0.17949867248535156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,1,64,0,1,float16,fp8,0,0.14470932881037393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,1,64,0,1,fp8,fp8,0,0.17950934171676636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,2,64,128,1,float16,float16,0,0.14484799901644388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,2,64,0,1,float16,float16,0,0.14459199706713358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,2,64,128,1,float16,fp8,0,0.14523200194040933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,2,64,128,1,fp8,fp8,0,0.17899733781814575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,2,64,0,1,float16,fp8,0,0.14512532949447632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,2,64,0,1,fp8,fp8,0,0.18014399210611978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,4,64,128,1,float16,float16,0,0.14532267053922018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,4,64,0,1,float16,float16,0,0.14511467019716898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,4,64,128,1,float16,fp8,0,0.14566399653752646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,4,64,128,1,fp8,fp8,0,0.17990932861963907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,4,64,0,1,float16,fp8,0,0.1488586664199829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,4,64,0,1,fp8,fp8,0,0.17947733402252197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,8,64,128,1,float16,float16,0,0.14758933583895364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,8,64,0,1,float16,float16,0,0.14782399932543436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,8,64,128,1,float16,fp8,0,0.14736533164978027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,8,64,128,1,fp8,fp8,0,0.1818079948425293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,8,64,0,1,float16,fp8,0,0.14718400438626608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,8,64,0,1,fp8,fp8,0,0.18096532424290976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,96,64,128,1,float16,float16,0,0.10062932968139648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,96,64,0,1,float16,float16,0,0.09966400265693665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,96,64,128,1,float16,fp8,0,0.09745599826176961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,96,64,128,1,fp8,fp8,0,0.11197333534558614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,96,64,0,1,float16,fp8,0,0.09790399670600891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,96,64,0,1,fp8,fp8,0,0.11204266548156738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,1,64,128,1,float16,float16,0,0.07915199796358745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,1,64,0,1,float16,float16,0,0.07871466875076294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,1,64,128,1,float16,fp8,0,0.0803413341442744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,1,64,128,1,fp8,fp8,0,0.10099732875823975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,1,64,0,1,float16,fp8,0,0.08046400050322215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,1,64,0,1,fp8,fp8,0,0.10175466537475586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,2,64,128,1,float16,float16,0,0.07969066500663757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,2,64,0,1,float16,float16,0,0.08010133107503255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,2,64,128,1,float16,fp8,0,0.07959466675917308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,2,64,128,1,fp8,fp8,0,0.1011786659558614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,2,64,0,1,float16,fp8,0,0.07958399752775829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,2,64,0,1,fp8,fp8,0,0.10058666268984477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,4,64,128,1,float16,float16,0,0.08044266700744629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,4,64,0,1,float16,float16,0,0.07991466422875722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,8,64,128,1,float16,fp8,0,0.08090666433175404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,4,64,128,1,float16,fp8,0,0.08042666812737782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,4,64,128,1,fp8,fp8,0,0.10158933202425639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,4,64,0,1,float16,fp8,0,0.08019733428955078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,4,64,0,1,fp8,fp8,0,0.10176533460617065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,8,64,128,1,float16,float16,0,0.08140799899895985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,8,64,0,1,float16,float16,0,0.08133333424727122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,8,64,128,1,fp8,fp8,0,0.10311466455459595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,8,64,0,1,float16,fp8,0,0.08051733175913493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,8,64,0,1,fp8,fp8,0,0.10288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,96,64,128,1,float16,float16,0,0.05641599992911021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,96,64,0,1,float16,float16,0,0.056128000219662987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,96,64,128,1,float16,fp8,0,0.05587733288606008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,96,64,128,1,fp8,fp8,0,0.06577066580454509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,96,64,0,1,float16,fp8,0,0.05589333176612854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,96,64,0,1,fp8,fp8,0,0.06553600231806438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,1,64,128,1,float16,float16,0,0.04619733492533366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,1,64,0,1,float16,float16,0,0.04656533400217692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,1,64,128,1,float16,fp8,0,0.04680533210436503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,1,64,128,1,fp8,fp8,0,0.058042665322621666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,1,64,0,1,float16,fp8,0,0.04701866706212362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,1,64,0,1,fp8,fp8,0,0.058176000912984215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,2,64,128,1,float16,float16,0,0.04632000128428141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,2,64,0,1,float16,float16,0,0.04668800036112467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,2,64,128,1,float16,fp8,0,0.04642133414745331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,2,64,128,1,fp8,fp8,0,0.05773333211739858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,2,64,0,1,float16,fp8,0,0.04606399933497111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,2,64,0,1,fp8,fp8,0,0.05712533493836721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,4,64,128,1,float16,float16,0,0.04637333254019419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,4,64,0,1,float16,float16,0,0.04665066798528036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,4,64,128,1,float16,fp8,0,0.046575998266537987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,4,64,128,1,fp8,fp8,0,0.058261334896087646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,4,64,0,1,float16,fp8,0,0.046762665112813316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,4,64,0,1,fp8,fp8,0,0.057904000083605446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,8,64,128,1,float16,float16,0,0.046666666865348816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,8,64,0,1,float16,float16,0,0.047509332497914634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,8,64,128,1,float16,fp8,0,0.04762133459250132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,8,64,128,1,fp8,fp8,0,0.0584799995024999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,8,64,0,1,float16,fp8,0,0.047269334395726524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,8,64,0,1,fp8,fp8,0,0.05916266640027364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,96,64,128,1,float16,float16,0,0.032885332902272545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,96,64,0,1,float16,float16,0,0.03265066693226496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,96,64,128,1,float16,fp8,0,0.032325332363446556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,96,64,128,1,fp8,fp8,0,0.041146665811538696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,96,64,0,1,float16,fp8,0,0.032831999162832894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,96,64,0,1,fp8,fp8,0,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,1,64,128,1,float16,float16,0,0.030266667405764263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,1,64,0,1,float16,float16,0,0.030389333764712017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,1,64,128,1,float16,fp8,0,0.030410667260487873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,1,64,128,1,fp8,fp8,0,0.03798400113979975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,1,64,0,1,float16,fp8,0,0.029663999875386555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,1,64,0,1,fp8,fp8,0,0.03860266755024592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,2,64,0,1,float16,fp8,0,0.0303413321574529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,2,64,128,1,float16,float16,0,0.03046933313210805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,2,64,0,1,float16,float16,0,0.030165334542592365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,2,64,128,1,float16,fp8,0,0.030826665461063385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,2,64,128,1,fp8,fp8,0,0.03789866715669632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,2,64,0,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,4,64,128,1,float16,float16,0,0.02975466599067052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,4,64,0,1,float16,float16,0,0.030447999636332195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,4,64,128,1,float16,fp8,0,0.030165334542592365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,4,64,128,1,fp8,fp8,0,0.038245332737763725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,4,64,0,1,float16,fp8,0,0.030458666384220123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,4,64,0,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,8,64,128,1,float16,float16,0,0.030378667016824085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,8,64,0,1,float16,float16,0,0.030282666285832722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,8,64,128,1,float16,fp8,0,0.030069333811601002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,8,64,128,1,fp8,fp8,0,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,8,64,0,1,float16,fp8,0,0.030346666773160298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,8,64,0,1,fp8,fp8,0,0.03815466662247976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,96,64,128,1,float16,float16,0,0.0239680012067159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,96,64,0,1,float16,float16,0,0.023919999599456787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,96,64,128,1,float16,fp8,0,0.024319998919963837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,96,64,128,1,fp8,fp8,0,0.029887999097506206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,96,64,0,1,float16,fp8,0,0.024175999065240223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,96,64,0,1,fp8,fp8,0,0.030106666187445324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,1,64,128,1,float16,float16,0,0.022357332209746044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,1,64,0,1,float16,float16,0,0.022389332453409832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,1,64,128,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,1,64,128,1,fp8,fp8,0,0.028437333802382152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,1,64,0,1,float16,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,1,64,0,1,fp8,fp8,0,0.02829866607983907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,2,64,128,1,float16,float16,0,0.02233600119749705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,2,64,0,1,float16,float16,0,0.022810667753219604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,2,64,128,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,2,64,128,1,fp8,fp8,0,0.028160000840822857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,2,64,0,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,2,64,0,1,fp8,fp8,0,0.028522667785485584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,4,64,128,1,float16,float16,0,0.022687998910744984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,4,64,0,1,float16,float16,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,4,64,128,1,float16,fp8,0,0.022709332406520844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,4,64,128,1,fp8,fp8,0,0.028602667152881622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,4,64,0,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,4,64,0,1,fp8,fp8,0,0.027994667490323383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,8,64,128,1,float16,float16,0,0.022789334257443745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,8,64,0,1,float16,float16,0,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,8,64,128,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,8,64,128,1,fp8,fp8,0,0.028901333610216778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,8,64,0,1,float16,fp8,0,0.022469334304332733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,8,64,0,1,fp8,fp8,0,0.02863999952872594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,96,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,96,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,96,64,128,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,96,64,128,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,96,64,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,96,64,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,1,64,128,1,float16,float16,0,0.016037333756685257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,1,64,0,1,float16,float16,0,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,1,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,1,64,128,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,1,64,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,1,64,0,1,fp8,fp8,0,0.020266667008399963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,2,64,128,1,float16,float16,0,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,2,64,0,1,float16,float16,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,2,64,128,1,float16,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,2,64,128,1,fp8,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,2,64,0,1,float16,fp8,0,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,2,64,0,1,fp8,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,4,64,128,1,float16,float16,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,4,64,0,1,float16,float16,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,4,64,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,4,64,128,1,fp8,fp8,0,0.020597333709398907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,4,64,0,1,float16,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,4,64,0,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,8,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,8,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,8,64,128,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,8,64,128,1,fp8,fp8,0,0.019786667078733444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,8,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,8,64,0,1,fp8,fp8,0,0.020165332903464634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,1,64,128,1,float16,float16,0,0.48956799507141113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,1,64,0,1,float16,float16,0,0.48899734020233154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,1,64,128,1,float16,fp8,0,0.4882133404413859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,1,64,0,1,float16,fp8,0,0.4884479840596517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,1,64,128,1,fp8,fp8,0,0.8834506670633951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,1,64,0,1,fp8,fp8,0,0.8776799837748209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,2,64,128,1,float16,float16,0,0.4968159993489583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,2,64,0,1,float16,float16,0,0.493557333946228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,2,64,128,1,float16,fp8,0,0.49186134338378906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,2,64,0,1,float16,fp8,0,0.49508265654246014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,2,64,128,1,fp8,fp8,0,0.8803466955820719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,4,64,128,1,float16,float16,0,0.4989706675211589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,2,64,0,1,fp8,fp8,0,0.878602663675944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,4,64,0,1,float16,float16,0,0.49698134263356525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,4,64,128,1,float16,fp8,0,0.4920213222503662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,4,64,0,1,float16,fp8,0,0.4909333388010661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,4,64,128,1,fp8,fp8,0,0.881498654683431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,8,64,128,1,float16,float16,0,0.5107306639353434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,4,64,0,1,fp8,fp8,0,0.8834346930185953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,8,64,0,1,float16,float16,0,0.5079360008239746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,8,64,128,1,float16,fp8,0,0.5025706688563029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,8,64,128,1,fp8,fp8,0,0.906165361404419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,8,64,0,1,float16,fp8,0,0.5034186840057373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,8,64,0,1,fp8,fp8,0,0.9097440242767334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,96,64,128,1,float16,float16,0,0.3167733351389567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,96,64,0,1,float16,float16,0,0.3173706730206807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,96,64,128,1,float16,fp8,0,0.30820266405741376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,96,64,128,1,fp8,fp8,0,0.4891786575317383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,96,64,0,1,float16,fp8,0,0.30929599205652875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,96,64,0,1,fp8,fp8,0,0.48986132939656574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,1,64,128,1,float16,float16,0,0.25302932659784955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,1,64,0,1,float16,float16,0,0.25276799996693927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,1,64,128,1,float16,fp8,0,0.2532266577084859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,1,64,128,1,fp8,fp8,0,0.45026667912801105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,1,64,0,1,float16,fp8,0,0.25278933842976886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,1,64,0,1,fp8,fp8,0,0.45001598199208576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,2,64,128,1,float16,float16,0,0.2550453344980876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,2,64,0,1,float16,float16,0,0.2542506655057271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,2,64,128,1,float16,fp8,0,0.25464532772699994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,2,64,128,1,fp8,fp8,0,0.45264001687367755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,2,64,0,1,float16,fp8,0,0.2553439935048421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,2,64,0,1,fp8,fp8,0,0.45183467864990234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,4,64,128,1,float16,float16,0,0.2553600072860718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,4,64,0,1,float16,float16,0,0.2551199992497762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,4,64,128,1,float16,fp8,0,0.25432000557581586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,4,64,128,1,fp8,fp8,0,0.4538880189259847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,4,64,0,1,float16,fp8,0,0.25421865781148273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,4,64,0,1,fp8,fp8,0,0.45289067427317303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,8,64,128,1,float16,float16,0,0.25989333788553876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,8,64,0,1,float16,float16,0,0.2616159915924072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,8,64,128,1,float16,fp8,0,0.2590240041414897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,8,64,128,1,fp8,fp8,0,0.46268800894419354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,8,64,0,1,float16,fp8,0,0.2607733408610026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,96,64,128,1,float16,float16,0,0.1669173240661621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,8,64,0,1,fp8,fp8,0,0.46182934443155926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,96,64,0,1,float16,float16,0,0.16657066345214844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,96,64,128,1,float16,fp8,0,0.1630506714185079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,96,64,128,1,fp8,fp8,0,0.25569067398707074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,96,64,0,1,float16,fp8,0,0.16302399833997092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,96,64,0,1,fp8,fp8,0,0.26316267251968384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,1,64,128,1,float16,float16,0,0.13338667154312134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,1,64,0,1,float16,float16,0,0.13455999890963236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,1,64,128,1,float16,fp8,0,0.1338879962762197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,1,64,128,1,fp8,fp8,0,0.23873066902160645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,1,64,0,1,float16,fp8,0,0.1344373325506846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,1,64,0,1,fp8,fp8,0,0.23809067408243814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,2,64,128,1,float16,float16,0,0.1337440013885498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,2,64,0,1,float16,float16,0,0.13352533181508383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,2,64,128,1,float16,fp8,0,0.1337440013885498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,2,64,128,1,fp8,fp8,0,0.23861332734425864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,2,64,0,1,float16,fp8,0,0.13333866993586221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,2,64,0,1,fp8,fp8,0,0.23810132344563803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,4,64,128,1,float16,float16,0,0.1350879967212677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,4,64,0,1,float16,float16,0,0.13401066263516745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,4,64,128,1,float16,fp8,0,0.13479999701182047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,4,64,128,1,fp8,fp8,0,0.23935467004776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,4,64,0,1,float16,fp8,0,0.1339413324991862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,4,64,0,1,fp8,fp8,0,0.23803732792536417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,8,64,128,1,float16,float16,0,0.13749866684277853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,8,64,0,1,float16,float16,0,0.1367733379205068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,8,64,128,1,float16,fp8,0,0.13724799950917563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,8,64,128,1,fp8,fp8,0,0.24133867025375366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,8,64,0,1,float16,fp8,0,0.13734400272369385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,8,64,0,1,fp8,fp8,0,0.2430773377418518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,96,64,128,1,float16,float16,0,0.0928000013033549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,96,64,0,1,float16,float16,0,0.09231467048327129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,96,64,128,1,float16,fp8,0,0.08939733107884724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,96,64,128,1,fp8,fp8,0,0.13889066378275552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,96,64,0,1,float16,fp8,0,0.08942932883898418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,96,64,0,1,fp8,fp8,0,0.14032000303268433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,1,64,128,1,float16,float16,0,0.07294400036334991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,1,64,0,1,float16,float16,0,0.07344533503055573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,1,64,128,1,float16,fp8,0,0.07388799885908763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,1,64,128,1,fp8,fp8,0,0.13055466612180075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,1,64,0,1,float16,fp8,0,0.07383466760317485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,1,64,0,1,fp8,fp8,0,0.1291146675745646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,2,64,128,1,float16,float16,0,0.07215466598669688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,2,64,0,1,float16,float16,0,0.07236266632874806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,2,64,128,1,float16,fp8,0,0.07287466526031494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,2,64,128,1,fp8,fp8,0,0.13105600078900656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,2,64,0,1,float16,fp8,0,0.0738613357146581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,2,64,0,1,fp8,fp8,0,0.12904000282287598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,4,64,128,1,float16,float16,0,0.07403199871381123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,4,64,0,1,float16,float16,0,0.07334400216738383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,4,64,128,1,float16,fp8,0,0.07317333420117696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,4,64,128,1,fp8,fp8,0,0.1297920048236847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,4,64,0,1,float16,fp8,0,0.0734559992949168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,4,64,0,1,fp8,fp8,0,0.13034666577974954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,8,64,128,1,float16,float16,0,0.07567466795444489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,8,64,0,1,float16,float16,0,0.07519466678301494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,8,64,128,1,float16,fp8,0,0.07518933216730754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,8,64,128,1,fp8,fp8,0,0.13155200084050497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,8,64,0,1,float16,fp8,0,0.07412266731262207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,8,64,0,1,fp8,fp8,0,0.1311840017636617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,96,64,128,1,float16,float16,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,96,64,0,1,float16,float16,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,96,64,128,1,float16,fp8,0,0.05026666820049286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,96,64,128,1,fp8,fp8,0,0.08016533156236012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,96,64,0,1,float16,fp8,0,0.050527999798456825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,96,64,0,1,fp8,fp8,0,0.07975466549396515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,1,64,128,1,float16,float16,0,0.042506664991378784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,1,64,0,1,float16,float16,0,0.04308266441027323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,1,64,128,1,float16,fp8,0,0.042730664213498436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,1,64,128,1,fp8,fp8,0,0.07337066531181335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,1,64,0,1,float16,fp8,0,0.04273599882920583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,1,64,0,1,fp8,fp8,0,0.07357333103815715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,2,64,128,1,float16,float16,0,0.042447999119758606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,2,64,0,1,float16,float16,0,0.042778665820757546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,2,64,128,1,float16,fp8,0,0.042693331837654114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,2,64,128,1,fp8,fp8,0,0.07213866710662842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,2,64,0,1,float16,fp8,0,0.04257600009441376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,2,64,0,1,fp8,fp8,0,0.07271466652552287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,4,64,128,1,float16,float16,0,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,4,64,0,1,float16,float16,0,0.0428959975639979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,4,64,128,1,float16,fp8,0,0.042624001701672874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,4,64,128,1,fp8,fp8,0,0.07220800220966339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,4,64,0,1,float16,fp8,0,0.042778665820757546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,4,64,0,1,fp8,fp8,0,0.07291199763615926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,8,64,128,1,float16,float16,0,0.043477331598599754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,8,64,0,1,float16,float16,0,0.04364799956480662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,8,64,128,1,float16,fp8,0,0.044010668992996216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,8,64,128,1,fp8,fp8,0,0.07390933235486348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,8,64,0,1,float16,fp8,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,8,64,0,1,fp8,fp8,0,0.0730560024579366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,96,64,128,1,float16,float16,0,0.030031998952229817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,96,64,0,1,float16,float16,0,0.030949334303538006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,96,64,128,1,float16,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,96,64,128,1,fp8,fp8,0,0.04976533353328705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,96,64,0,1,float16,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,96,64,0,1,fp8,fp8,0,0.04909333089987437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,1,64,128,1,float16,float16,0,0.028143999477227528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,1,64,0,1,float16,float16,0,0.028736000259717304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,1,64,128,1,float16,fp8,0,0.02792533238728841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,1,64,128,1,fp8,fp8,0,0.0458186666170756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,1,64,0,1,float16,fp8,0,0.02865600089232127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,1,64,0,1,fp8,fp8,0,0.04614933331807455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,2,64,128,1,float16,float16,0,0.02826133370399475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,2,64,0,1,float16,float16,0,0.028618666032950085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,2,64,128,1,float16,fp8,0,0.02850666642189026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,2,64,128,1,fp8,fp8,0,0.04584533472855886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,2,64,0,1,float16,fp8,0,0.028149334092934925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,2,64,0,1,fp8,fp8,0,0.046015997727712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,4,64,128,1,float16,float16,0,0.028938665986061096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,4,64,0,1,float16,float16,0,0.028912000358104706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,4,64,128,1,float16,fp8,0,0.028704000016053517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,4,64,128,1,fp8,fp8,0,0.04610666632652283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,4,64,0,1,float16,fp8,0,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,4,64,0,1,fp8,fp8,0,0.04570133487383524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,8,64,128,1,float16,float16,0,0.028410665690898895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,8,64,0,1,float16,float16,0,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,8,64,128,1,float16,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,8,64,128,1,fp8,fp8,0,0.04621333380540212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,8,64,0,1,float16,fp8,0,0.029002666473388672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,8,64,0,1,fp8,fp8,0,0.04650666813055674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,96,64,128,1,float16,float16,0,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,96,64,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,96,64,128,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,96,64,128,1,fp8,fp8,0,0.03236266722281774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,96,64,0,1,float16,fp8,0,0.021701333423455555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,96,64,0,1,fp8,fp8,0,0.032629333436489105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,1,64,128,1,float16,float16,0,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,1,64,0,1,float16,float16,0,0.02063999945918719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,1,64,128,1,float16,fp8,0,0.020842666427294414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,1,64,128,1,fp8,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,1,64,0,1,float16,fp8,0,0.019930666933457058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,1,64,0,1,fp8,fp8,0,0.030693332354227703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,2,64,128,1,float16,float16,0,0.020394666741291683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,2,64,0,1,float16,float16,0,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,2,64,128,1,float16,fp8,0,0.020842666427294414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,2,64,128,1,fp8,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,2,64,0,1,float16,fp8,0,0.0205226664741834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,2,64,0,1,fp8,fp8,0,0.031104000906149547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,4,64,128,1,float16,float16,0,0.02035733312368393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,4,64,0,1,float16,float16,0,0.020351999749739964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,4,64,128,1,float16,fp8,0,0.02024000013868014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,4,64,128,1,fp8,fp8,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,4,64,0,1,float16,fp8,0,0.02070933332045873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,4,64,0,1,fp8,fp8,0,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,8,64,128,1,float16,float16,0,0.020400000115235645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,8,64,0,1,float16,float16,0,0.020224000016848247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,8,64,128,1,float16,fp8,0,0.02075733368595441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,8,64,128,1,fp8,fp8,0,0.030597334106763203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,8,64,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,8,64,0,1,fp8,fp8,0,0.03091199944416682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,96,64,128,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,96,64,0,1,float16,float16,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,96,64,128,1,float16,fp8,0,0.01603200038274129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,96,64,128,1,fp8,fp8,0,0.02489600082238515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,96,64,0,1,float16,fp8,0,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,96,64,0,1,fp8,fp8,0,0.02459733436505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,1,64,128,1,float16,float16,0,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,1,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,1,64,128,1,float16,fp8,0,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,1,64,128,1,fp8,fp8,0,0.023999998966852825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,1,64,0,1,float16,fp8,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,1,64,0,1,fp8,fp8,0,0.02405333270629247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,2,64,128,1,float16,float16,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,2,64,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,2,64,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,2,64,128,1,fp8,fp8,0,0.024165332317352295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,2,64,0,1,float16,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,2,64,0,1,fp8,fp8,0,0.023951999843120575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,4,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,4,64,0,1,float16,float16,0,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,4,64,128,1,float16,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,8,64,128,1,float16,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,4,64,128,1,fp8,fp8,0,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,4,64,0,1,float16,fp8,0,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,4,64,0,1,fp8,fp8,0,0.023685333629449207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,8,64,128,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,8,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,8,64,128,1,fp8,fp8,0,0.02403733382622401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,8,64,0,1,float16,fp8,0,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,8,64,0,1,fp8,fp8,0,0.024773334463437397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,96,64,128,1,float16,float16,0,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,96,64,0,1,float16,float16,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,96,64,128,1,float16,fp8,0,0.014015999933083853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,96,64,128,1,fp8,fp8,0,0.01985599969824155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,96,64,0,1,float16,fp8,0,0.014015999933083853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,96,64,0,1,fp8,fp8,0,0.020346666375796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,1,64,128,1,float16,float16,0,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,1,64,0,1,float16,float16,0,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,1,64,128,1,float16,fp8,0,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,1,64,128,1,fp8,fp8,0,0.019727999965349834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,1,64,0,1,float16,fp8,0,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,1,64,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,2,64,128,1,float16,float16,0,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,2,64,0,1,float16,float16,0,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,2,64,128,1,float16,fp8,0,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,2,64,128,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,2,64,0,1,float16,fp8,0,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,2,64,0,1,fp8,fp8,0,0.01985599969824155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,4,64,128,1,float16,float16,0,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,4,64,0,1,float16,float16,0,0.01349866638580958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,4,64,128,1,float16,fp8,0,0.01403733342885971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,4,64,128,1,fp8,fp8,0,0.019813333948453266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,4,64,0,1,float16,fp8,0,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,4,64,0,1,fp8,fp8,0,0.02057066683967908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,8,64,128,1,float16,float16,0,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,8,64,0,1,float16,float16,0,0.01392000044385592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,8,64,128,1,float16,fp8,0,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,8,64,128,1,fp8,fp8,0,0.019733333339293797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,8,64,0,1,float16,fp8,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,8,64,0,1,fp8,fp8,0,0.019765333582957584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,1,64,128,1,float16,float16,0,0.2876853346824646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,1,64,0,1,float16,float16,0,0.28833067417144775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,1,64,128,1,float16,fp8,0,0.2890239953994751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,1,64,128,1,fp8,fp8,0,0.7303946812947592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,1,64,0,1,float16,fp8,0,0.28855466842651367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,1,64,0,1,fp8,fp8,0,0.7277812957763672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,2,64,128,1,float16,float16,0,0.29135467608769733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,2,64,0,1,float16,float16,0,0.2908906737963359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,2,64,128,1,float16,fp8,0,0.2905120054880778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,2,64,128,1,fp8,fp8,0,0.7346453666687012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,2,64,0,1,float16,fp8,0,0.290175994237264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,2,64,0,1,fp8,fp8,0,0.7290240128835043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,4,64,128,1,float16,float16,0,0.29068267345428467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,4,64,0,1,float16,float16,0,0.29012266794840497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,4,64,128,1,float16,fp8,0,0.28991466760635376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,4,64,128,1,fp8,fp8,0,0.7338506380716959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,4,64,0,1,float16,fp8,0,0.2893120050430298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,8,64,128,1,float16,float16,0,0.2964479923248291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,4,64,0,1,fp8,fp8,0,0.7344906330108643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,8,64,0,1,float16,float16,0,0.29526933034261066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,8,64,128,1,float16,fp8,0,0.29395200808842975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,8,64,128,1,fp8,fp8,0,0.7381493250528971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,8,64,0,1,float16,fp8,0,0.294650673866272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,96,64,128,1,float16,float16,0,0.17838400602340698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,8,64,0,1,fp8,fp8,0,0.7418560187021891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,96,64,0,1,float16,float16,0,0.17782400051752725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,96,64,128,1,float16,fp8,0,0.17239999771118164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,96,64,128,1,fp8,fp8,0,0.39700265725453693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,96,64,0,1,float16,fp8,0,0.17366933822631836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,1,64,128,1,float16,float16,0,0.15099199612935385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,96,64,0,1,fp8,fp8,0,0.39552533626556396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,1,64,0,1,float16,float16,0,0.1509066621462504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,1,64,128,1,float16,fp8,0,0.15081600348154703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,1,64,128,1,fp8,fp8,0,0.37861867745717365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,1,64,0,1,float16,fp8,0,0.15149333079655966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,1,64,0,1,fp8,fp8,0,0.3770933151245117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,2,64,128,1,float16,float16,0,0.15154133240381876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,2,64,0,1,float16,float16,0,0.15199466546376547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,2,64,128,1,float16,fp8,0,0.15100266536076865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,2,64,128,1,fp8,fp8,0,0.38005868593851727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,2,64,0,1,float16,fp8,0,0.1511573294798533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,2,64,0,1,fp8,fp8,0,0.37911999225616455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,4,64,128,1,float16,float16,0,0.1509119967619578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,4,64,0,1,float16,float16,0,0.15173332889874777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,4,64,128,1,float16,fp8,0,0.1518880029519399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,4,64,128,1,fp8,fp8,0,0.3790026505788167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,4,64,0,1,float16,fp8,0,0.1514133314291636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,4,64,0,1,fp8,fp8,0,0.3803199927012126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,8,64,128,1,float16,float16,0,0.15333333611488342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,8,64,0,1,float16,float16,0,0.15389866630236307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,8,64,128,1,float16,fp8,0,0.15402133266131082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,8,64,128,1,fp8,fp8,0,0.37964268525441486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,8,64,0,1,float16,fp8,0,0.15361066659291586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,96,64,128,1,float16,float16,0,0.09692266583442688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,8,64,0,1,fp8,fp8,0,0.3816693226496379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,96,64,0,1,float16,float16,0,0.0962666670481364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,96,64,128,1,float16,fp8,0,0.09503466884295146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,96,64,128,1,fp8,fp8,0,0.2144320011138916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,96,64,0,1,float16,fp8,0,0.09443733096122742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,96,64,0,1,fp8,fp8,0,0.21013865868250528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,1,64,128,1,float16,float16,0,0.08126399914423625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,1,64,0,1,float16,float16,0,0.08110400040944417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,1,64,128,1,float16,fp8,0,0.08171199758847554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,1,64,128,1,fp8,fp8,0,0.20015466213226318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,1,64,0,1,float16,fp8,0,0.08190933366616567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,1,64,0,1,fp8,fp8,0,0.20003734032313028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,2,64,128,1,float16,float16,0,0.08228800197442372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,2,64,0,1,float16,float16,0,0.08167999982833862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,2,64,128,1,float16,fp8,0,0.0817333310842514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,2,64,128,1,fp8,fp8,0,0.1990613341331482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,2,64,0,1,float16,fp8,0,0.08242666721343994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,2,64,0,1,fp8,fp8,0,0.1997013290723165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,4,64,128,1,float16,float16,0,0.0823359986146291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,4,64,0,1,float16,float16,0,0.08157866696516673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,4,64,128,1,float16,fp8,0,0.08210133512814839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,4,64,128,1,fp8,fp8,0,0.19881600141525269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,4,64,0,1,float16,fp8,0,0.08247466882069905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,4,64,0,1,fp8,fp8,0,0.19904534022013345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,8,64,128,1,float16,float16,0,0.08303999900817871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,8,64,0,1,float16,float16,0,0.08364799618721008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,8,64,128,1,float16,fp8,0,0.0828959991534551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,8,64,128,1,fp8,fp8,0,0.20141865809758505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,8,64,0,1,float16,fp8,0,0.08358933528264363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,8,64,0,1,fp8,fp8,0,0.20055999358495077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,96,64,128,1,float16,float16,0,0.053541332483291626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,96,64,0,1,float16,float16,0,0.05322133501370748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,96,64,128,1,float16,fp8,0,0.05195199946562449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,96,64,128,1,fp8,fp8,0,0.11600533127784729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,96,64,0,1,float16,fp8,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,96,64,0,1,fp8,fp8,0,0.1160640021165212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,1,64,128,1,float16,float16,0,0.04524266719818115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,1,64,0,1,float16,float16,0,0.045168002446492515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,1,64,128,1,float16,fp8,0,0.04571733375390371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,1,64,128,1,fp8,fp8,0,0.10803733269373576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,1,64,0,1,float16,fp8,0,0.04550399879614512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,1,64,0,1,fp8,fp8,0,0.10839999715487163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,2,64,128,1,float16,float16,0,0.045168002446492515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,2,64,0,1,float16,float16,0,0.04572266836961111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,2,64,128,1,float16,fp8,0,0.04548799991607666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,2,64,128,1,fp8,fp8,0,0.10711466272672017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,2,64,0,1,float16,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,2,64,0,1,fp8,fp8,0,0.10671466588973999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,4,64,128,1,float16,float16,0,0.045653333266576133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,4,64,0,1,float16,float16,0,0.045328001181284584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,4,64,128,1,float16,fp8,0,0.04571199913819631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,4,64,128,1,fp8,fp8,0,0.10803733269373576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,4,64,0,1,float16,fp8,0,0.045509333411852516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,4,64,0,1,fp8,fp8,0,0.10731732845306396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,8,64,128,1,float16,float16,0,0.04590400060017904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,8,64,0,1,float16,float16,0,0.048672000567118325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,8,64,128,1,float16,fp8,0,0.046944002310434975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,8,64,128,1,fp8,fp8,0,0.1081119974454244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,8,64,0,1,float16,fp8,0,0.04678399860858917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,8,64,0,1,fp8,fp8,0,0.10874133308728536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,96,64,128,1,float16,float16,0,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,96,64,0,1,float16,float16,0,0.03177600105603536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,96,64,128,1,float16,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,96,64,128,1,fp8,fp8,0,0.06794133285681407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,96,64,0,1,float16,fp8,0,0.03468266626199087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,96,64,0,1,fp8,fp8,0,0.06779199838638306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,1,64,128,1,float16,float16,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,1,64,0,1,float16,float16,0,0.029706666866938274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,1,64,128,1,float16,fp8,0,0.030608000854651134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,1,64,128,1,fp8,fp8,0,0.06499733527501424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,1,64,0,1,float16,fp8,0,0.029802667597929638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,1,64,0,1,fp8,fp8,0,0.06788800160090129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,2,64,128,1,float16,float16,0,0.030005333324273426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,2,64,0,1,float16,float16,0,0.02975466599067052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,2,64,128,1,float16,fp8,0,0.02972800036271413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,2,64,128,1,fp8,fp8,0,0.06504533191521962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,2,64,0,1,float16,fp8,0,0.03018666555484136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,2,64,0,1,fp8,fp8,0,0.06458133459091187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,4,64,128,1,float16,float16,0,0.030192000170548756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,4,64,0,1,float16,float16,0,0.029866665601730347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,4,64,128,1,float16,fp8,0,0.030608000854651134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,4,64,128,1,fp8,fp8,0,0.06372266511122386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,4,64,0,1,float16,fp8,0,0.03070933371782303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,4,64,0,1,fp8,fp8,0,0.06453333298365276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,8,64,128,1,float16,float16,0,0.030693332354227703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,8,64,0,1,float16,float16,0,0.030447999636332195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,8,64,128,1,float16,fp8,0,0.03044266750415166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,8,64,128,1,fp8,fp8,0,0.06418133278687795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,8,64,0,1,float16,fp8,0,0.030261332790056866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,8,64,0,1,fp8,fp8,0,0.06387199958165486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,96,64,128,1,float16,float16,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,96,64,0,1,float16,float16,0,0.020773333807786305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,96,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,96,64,128,1,fp8,fp8,0,0.04244266450405121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,96,64,0,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,96,64,0,1,fp8,fp8,0,0.041349334021409355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,1,64,128,1,float16,float16,0,0.020261333634455998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,1,64,0,1,float16,float16,0,0.020175999651352566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,1,64,128,1,float16,fp8,0,0.020154666155576706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,1,64,128,1,fp8,fp8,0,0.04081599911053976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,2,64,128,1,fp8,fp8,0,0.04049599915742874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,1,64,0,1,float16,fp8,0,0.02040533348917961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,1,64,0,1,fp8,fp8,0,0.040847999354203544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,2,64,128,1,float16,float16,0,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,2,64,0,1,float16,float16,0,0.020234666764736176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,2,64,128,1,float16,fp8,0,0.02077866718173027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,2,64,0,1,float16,fp8,0,0.02067199970285098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,2,64,0,1,fp8,fp8,0,0.04057066639264425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,4,64,128,1,float16,float16,0,0.020319999506076176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,4,64,0,1,float16,float16,0,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,4,64,128,1,float16,fp8,0,0.02067733307679494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,4,64,128,1,fp8,fp8,0,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,4,64,0,1,float16,fp8,0,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,4,64,0,1,fp8,fp8,0,0.0402399996916453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,8,64,128,1,float16,float16,0,0.020618667205174763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,8,64,0,1,float16,float16,0,0.02060266708334287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,8,64,128,1,float16,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,8,64,128,1,fp8,fp8,0,0.04099733382463455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,8,64,0,1,float16,fp8,0,0.020351999749739964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,8,64,0,1,fp8,fp8,0,0.03992533435424169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,96,64,128,1,float16,float16,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,96,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,96,64,128,1,float16,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,96,64,128,1,fp8,fp8,0,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,96,64,0,1,float16,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,96,64,0,1,fp8,fp8,0,0.028330666323502857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,1,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,1,64,0,1,float16,float16,0,0.014661333213249842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,1,64,128,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,1,64,128,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,1,64,0,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,2,64,128,1,float16,float16,0,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,2,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,2,64,128,1,float16,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,2,64,128,1,fp8,fp8,0,0.027818667391935985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,2,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,2,64,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,4,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,4,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,4,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,4,64,128,1,fp8,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,4,64,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,4,64,0,1,fp8,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,8,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,8,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,8,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,8,64,128,1,fp8,fp8,0,0.027952000498771667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,8,64,0,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,8,64,0,1,fp8,fp8,0,0.02775999903678894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,96,64,128,1,float16,float16,0,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,96,64,0,1,float16,float16,0,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,96,64,128,1,float16,fp8,0,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,96,64,128,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,96,64,0,1,float16,fp8,0,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,96,64,0,1,fp8,fp8,0,0.023872000475724537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,1,64,128,1,float16,float16,0,0.013744000345468521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,1,64,0,1,float16,float16,0,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,1,64,128,1,float16,fp8,0,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,1,64,128,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,1,64,0,1,float16,fp8,0,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,1,64,0,1,fp8,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,2,64,128,1,float16,float16,0,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,2,64,0,1,float16,float16,0,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,2,64,128,1,float16,fp8,0,0.01434133326013883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,2,64,128,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,2,64,0,1,float16,fp8,0,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,2,64,0,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,4,64,128,1,float16,float16,0,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,4,64,0,1,float16,float16,0,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,4,64,128,1,float16,fp8,0,0.014426667243242264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,4,64,128,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,4,64,0,1,float16,fp8,0,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,4,64,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,8,64,128,1,float16,float16,0,0.013749333719412485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,8,64,0,1,float16,float16,0,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,8,64,128,1,float16,fp8,0,0.013994666437307993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,8,64,128,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,8,64,0,1,float16,fp8,0,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,8,64,0,1,fp8,fp8,0,0.02250133454799652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,96,64,128,1,float16,float16,0,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,96,64,0,1,float16,float16,0,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,96,64,128,1,float16,fp8,0,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,96,64,128,1,fp8,fp8,0,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,96,64,0,1,float16,fp8,0,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,96,64,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,1,64,128,1,float16,float16,0,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,1,64,0,1,float16,float16,0,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,1,64,128,1,float16,fp8,0,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,1,64,128,1,fp8,fp8,0,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,1,64,0,1,float16,fp8,0,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,1,64,0,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,2,64,0,1,fp8,fp8,0,0.018640000373125076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,2,64,128,1,float16,float16,0,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,2,64,0,1,float16,float16,0,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,2,64,128,1,float16,fp8,0,0.01227733368674914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,2,64,128,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,2,64,0,1,float16,fp8,0,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,4,64,128,1,float16,float16,0,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,4,64,0,1,float16,float16,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,4,64,128,1,float16,fp8,0,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,4,64,128,1,fp8,fp8,0,0.01836799954374631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,4,64,0,1,float16,fp8,0,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,4,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,8,64,128,1,float16,float16,0,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,8,64,0,1,float16,float16,0,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,8,64,128,1,float16,fp8,0,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,1,64,128,1,fp8,fp8,0,0.657696008682251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,8,64,128,1,fp8,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,8,64,0,1,float16,fp8,0,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,8,64,0,1,fp8,fp8,0,0.01868266612291336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,1,64,128,1,float16,float16,0,0.20895999670028687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,1,64,0,1,float16,float16,0,0.21014400323232016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,1,64,128,1,float16,fp8,0,0.21009065707524618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,1,64,0,1,float16,fp8,0,0.20856000979741415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,1,64,0,1,fp8,fp8,0,0.6593493223190308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,2,64,128,1,float16,float16,0,0.21045867602030435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,2,64,0,1,float16,float16,0,0.21045867602030435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,2,64,128,1,float16,fp8,0,0.20985066890716553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,2,64,0,1,float16,fp8,0,0.21107200781504312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,2,64,128,1,fp8,fp8,0,0.6568160057067871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,4,64,128,1,float16,float16,0,0.20938666661580405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,2,64,0,1,fp8,fp8,0,0.6611306667327881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,4,64,0,1,float16,float16,0,0.21150400241216025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,4,64,128,1,float16,fp8,0,0.21024000644683838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,4,64,128,1,fp8,fp8,0,0.6623146533966064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,4,64,0,1,float16,fp8,0,0.2100106676419576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,8,64,128,1,float16,float16,0,0.2132479945818583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,4,64,0,1,fp8,fp8,0,0.6617546478907267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,8,64,0,1,float16,float16,0,0.2125599980354309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,8,64,128,1,float16,fp8,0,0.21226133902867636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,8,64,128,1,fp8,fp8,0,0.6607466538747152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,8,64,0,1,float16,fp8,0,0.21277334292729697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,8,64,0,1,fp8,fp8,0,0.6647786696751913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,96,64,128,1,float16,float16,0,0.11972266435623169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,96,64,0,1,float16,float16,0,0.1193386713663737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,96,64,128,1,float16,fp8,0,0.11796266833941142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,96,64,128,1,fp8,fp8,0,0.3491520086924235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,96,64,0,1,float16,fp8,0,0.11694399515787761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,96,64,0,1,fp8,fp8,0,0.3503146568934123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,1,64,128,1,float16,float16,0,0.1106719970703125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,1,64,0,1,float16,float16,0,0.11064533392588298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,1,64,128,1,float16,fp8,0,0.11095466216405232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,1,64,128,1,fp8,fp8,0,0.33847999572753906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,1,64,0,1,float16,fp8,0,0.11054399609565735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,1,64,0,1,fp8,fp8,0,0.33947734038035077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,2,64,128,1,float16,float16,0,0.1107360025246938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,2,64,0,1,float16,float16,0,0.11125333110491435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,2,64,128,1,float16,fp8,0,0.11081066727638245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,2,64,128,1,fp8,fp8,0,0.34173866113026935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,2,64,0,1,float16,fp8,0,0.11106666922569275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,2,64,0,1,fp8,fp8,0,0.3387519915898641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,4,64,128,1,float16,float16,0,0.11135466893513997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,4,64,0,1,float16,float16,0,0.11102400223414104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,4,64,128,1,float16,fp8,0,0.11115200320879619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,4,64,128,1,fp8,fp8,0,0.34030401706695557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,4,64,0,1,float16,fp8,0,0.11157332857449849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,4,64,0,1,fp8,fp8,0,0.3421013355255127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,8,64,128,1,float16,float16,0,0.11317867040634155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,8,64,0,1,float16,float16,0,0.11195199688275655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,8,64,128,1,float16,fp8,0,0.11178666353225708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,8,64,128,1,fp8,fp8,0,0.3408373196919759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,8,64,0,1,float16,fp8,0,0.11184533437093098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,8,64,0,1,fp8,fp8,0,0.34244799613952637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,96,64,128,1,float16,float16,0,0.06390400230884552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,96,64,0,1,float16,float16,0,0.06431999802589417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,96,64,128,1,float16,fp8,0,0.0631520003080368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,96,64,128,1,fp8,fp8,0,0.18602667252222696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,96,64,0,1,float16,fp8,0,0.06285866598288219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,96,64,0,1,fp8,fp8,0,0.18555200099945068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,1,64,128,1,float16,float16,0,0.059802666306495667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,1,64,0,1,float16,float16,0,0.059717332323392235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,1,64,128,1,float16,fp8,0,0.05981333553791046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,1,64,128,1,fp8,fp8,0,0.17907200256983438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,1,64,0,1,float16,fp8,0,0.059952000776926674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,1,64,0,1,fp8,fp8,0,0.1777013341585795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,2,64,128,1,float16,float16,0,0.05991466840108236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,2,64,0,1,float16,float16,0,0.05972266693909963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,2,64,128,1,float16,fp8,0,0.05997333427270254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,2,64,128,1,fp8,fp8,0,0.17778132359186807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,2,64,0,1,float16,fp8,0,0.05997333427270254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,2,64,0,1,fp8,fp8,0,0.17798399925231934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,4,64,128,1,float16,float16,0,0.05981333553791046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,4,64,0,1,float16,float16,0,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,4,64,128,1,float16,fp8,0,0.05991466840108236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,4,64,128,1,fp8,fp8,0,0.17757866779963175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,4,64,0,1,float16,fp8,0,0.06005866825580597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,4,64,0,1,fp8,fp8,0,0.17859200636545816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,8,64,128,1,float16,float16,0,0.060421332716941833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,8,64,0,1,float16,float16,0,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,8,64,128,1,float16,fp8,0,0.06076799829800924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,8,64,128,1,fp8,fp8,0,0.1801919937133789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,8,64,0,1,float16,fp8,0,0.06043733159701029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,8,64,0,1,fp8,fp8,0,0.17897067467371622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,96,64,128,1,float16,float16,0,0.037258667250474296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,96,64,0,1,float16,float16,0,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,96,64,128,1,float16,fp8,0,0.03748266647259394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,96,64,128,1,fp8,fp8,0,0.1018399993578593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,96,64,0,1,float16,fp8,0,0.037317333122094475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,96,64,0,1,fp8,fp8,0,0.10316266616185506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,1,64,128,1,float16,float16,0,0.037077332536379494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,1,64,0,1,float16,float16,0,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,1,64,128,1,float16,fp8,0,0.037077332536379494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,1,64,128,1,fp8,fp8,0,0.10036266843477885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,1,64,0,1,float16,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,1,64,0,1,fp8,fp8,0,0.0993333359559377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,2,64,128,1,float16,float16,0,0.036933332681655884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,2,64,0,1,float16,float16,0,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,2,64,128,1,float16,fp8,0,0.037263999382654824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,2,64,128,1,fp8,fp8,0,0.10019200046857198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,2,64,0,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,2,64,0,1,fp8,fp8,0,0.10094400246938069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,4,64,128,1,float16,float16,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,4,64,0,1,float16,float16,0,0.03702933341264725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,4,64,128,1,float16,fp8,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,4,64,128,1,fp8,fp8,0,0.10038933157920837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,4,64,0,1,float16,fp8,0,0.03696533292531967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,4,64,0,1,fp8,fp8,0,0.09982400139172871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,8,64,128,1,float16,float16,0,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,8,64,0,1,float16,float16,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,8,64,128,1,float16,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,8,64,128,1,fp8,fp8,0,0.09917333722114563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,8,64,0,1,float16,fp8,0,0.03734933336575826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,8,64,0,1,fp8,fp8,0,0.10106666882832845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,96,64,128,1,float16,float16,0,0.024330665667851765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,96,64,0,1,float16,float16,0,0.023984000086784363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,96,64,128,1,float16,fp8,0,0.02481066683928172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,96,64,128,1,fp8,fp8,0,0.060133333007494606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,96,64,0,1,float16,fp8,0,0.024490666886170704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,96,64,0,1,fp8,fp8,0,0.06090133388837179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,1,64,128,1,float16,float16,0,0.024336000283559162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,1,64,0,1,float16,float16,0,0.02446399877468745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,1,64,128,1,float16,fp8,0,0.024485332270463307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,1,64,128,1,fp8,fp8,0,0.05941333373387655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,1,64,0,1,float16,fp8,0,0.024405332903067272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,1,64,0,1,fp8,fp8,0,0.058789332707722984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,2,64,128,1,float16,float16,0,0.024101334313551586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,2,64,0,1,float16,float16,0,0.024432001014550526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,2,64,128,1,float16,fp8,0,0.024618667860825855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,2,64,128,1,fp8,fp8,0,0.05959466596444448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,2,64,0,1,float16,fp8,0,0.024559999505678814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,2,64,0,1,fp8,fp8,0,0.05938133100668589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,4,64,128,1,float16,float16,0,0.024085332949956257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,4,64,0,1,float16,float16,0,0.024197332561016083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,4,64,128,1,float16,fp8,0,0.024277334411938984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,4,64,128,1,fp8,fp8,0,0.05826666454474131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,4,64,0,1,float16,fp8,0,0.024832000335057575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,4,64,0,1,fp8,fp8,0,0.05958400170008341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,8,64,128,1,float16,float16,0,0.02465066562096278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,8,64,0,1,float16,float16,0,0.024522667129834492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,8,64,128,1,float16,fp8,0,0.024336000283559162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,8,64,128,1,fp8,fp8,0,0.059008002281188965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,8,64,0,1,float16,fp8,0,0.02441066751877467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,8,64,0,1,fp8,fp8,0,0.05867200096448263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,96,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,96,64,0,1,float16,float16,0,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,96,64,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,96,64,128,1,fp8,fp8,0,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,96,64,0,1,float16,fp8,0,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,96,64,0,1,fp8,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,1,64,128,1,float16,float16,0,0.01632533346613248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,1,64,0,1,float16,float16,0,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,1,64,128,1,float16,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,1,64,128,1,fp8,fp8,0,0.03666666646798452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,1,64,0,1,float16,fp8,0,0.016208000481128693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,1,64,0,1,fp8,fp8,0,0.03669866671164831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,2,64,128,1,float16,float16,0,0.016389333953460056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,2,64,0,1,float16,float16,0,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,2,64,128,1,float16,fp8,0,0.016645333419243496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,2,64,128,1,fp8,fp8,0,0.036517334481080375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,2,64,0,1,float16,fp8,0,0.016373333831628162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,2,64,0,1,fp8,fp8,0,0.03645866612593333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,4,64,128,1,float16,float16,0,0.016352000335852306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,4,64,0,1,float16,float16,0,0.016634666671355564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,4,64,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,4,64,128,1,fp8,fp8,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,4,64,0,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,4,64,0,1,fp8,fp8,0,0.03633599976698557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,8,64,128,1,float16,float16,0,0.01632533346613248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,8,64,0,1,float16,float16,0,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,8,64,128,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,8,64,128,1,fp8,fp8,0,0.036837334434191384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,8,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,8,64,0,1,fp8,fp8,0,0.0364479993780454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,96,64,128,1,float16,float16,0,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,96,64,0,1,float16,float16,0,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,96,64,128,1,float16,fp8,0,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,96,64,128,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,96,64,0,1,float16,fp8,0,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,96,64,0,1,fp8,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,1,64,128,1,float16,float16,0,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,1,64,0,1,float16,float16,0,0.014202666779359182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,1,64,128,1,float16,fp8,0,0.013951999445756277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,1,64,128,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,1,64,0,1,float16,fp8,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,1,64,0,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,2,64,128,1,float16,float16,0,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,2,64,0,1,float16,float16,0,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,2,64,128,1,float16,fp8,0,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,2,64,128,1,fp8,fp8,0,0.026629333694775898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,2,64,0,1,float16,fp8,0,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,2,64,0,1,fp8,fp8,0,0.026554666459560394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,4,64,128,1,float16,float16,0,0.01413333291808764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,4,64,0,1,float16,float16,0,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,4,64,128,1,float16,fp8,0,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,4,64,128,1,fp8,fp8,0,0.02683199942111969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,4,64,0,1,float16,fp8,0,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,4,64,0,1,fp8,fp8,0,0.026309333741664886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,8,64,128,1,float16,float16,0,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,8,64,0,1,float16,float16,0,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,8,64,128,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,8,64,128,1,fp8,fp8,0,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,8,64,0,1,float16,fp8,0,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,8,64,0,1,fp8,fp8,0,0.026554666459560394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,1,64,128,1,float16,float16,0,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,96,64,128,1,float16,float16,0,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,96,64,0,1,float16,float16,0,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,96,64,128,1,float16,fp8,0,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,96,64,128,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,96,64,0,1,float16,fp8,0,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,96,64,0,1,fp8,fp8,0,0.022682666778564453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,1,64,0,1,float16,float16,0,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,1,64,128,1,float16,fp8,0,0.013434667140245438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,1,64,128,1,fp8,fp8,0,0.022111999491850536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,1,64,0,1,float16,fp8,0,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,1,64,0,1,fp8,fp8,0,0.022789334257443745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,2,64,128,1,float16,float16,0,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,2,64,0,1,float16,float16,0,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,2,64,128,1,float16,fp8,0,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,2,64,128,1,fp8,fp8,0,0.022319999833901722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,2,64,0,1,float16,fp8,0,0.013466666142145792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,2,64,0,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,4,64,128,1,float16,float16,0,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,4,64,0,1,float16,float16,0,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,4,64,128,1,float16,fp8,0,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,4,64,128,1,fp8,fp8,0,0.022469334304332733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,4,64,0,1,float16,fp8,0,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,4,64,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,8,64,128,1,float16,float16,0,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,8,64,0,1,float16,float16,0,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,8,64,128,1,float16,fp8,0,0.013306666165590286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,8,64,128,1,fp8,fp8,0,0.025839999318122864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,8,64,0,1,float16,fp8,0,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,8,64,0,1,fp8,fp8,0,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,96,64,128,1,float16,float16,0,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,96,64,0,1,float16,float16,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,96,64,128,1,float16,fp8,0,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,96,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,96,64,0,1,float16,fp8,0,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,96,64,0,1,fp8,fp8,0,0.020448000480731327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,1,64,128,1,float16,float16,0,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,1,64,0,1,float16,float16,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,1,64,128,1,float16,fp8,0,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,1,64,128,1,fp8,fp8,0,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,1,64,0,1,float16,fp8,0,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,1,64,0,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,2,64,128,1,float16,float16,0,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,2,64,0,1,float16,float16,0,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,2,64,128,1,float16,fp8,0,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,2,64,128,1,fp8,fp8,0,0.019823999454577763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,2,64,0,1,float16,fp8,0,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,2,64,0,1,fp8,fp8,0,0.019765333582957584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,4,64,128,1,float16,float16,0,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,4,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,4,64,128,1,float16,fp8,0,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,4,64,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,8,64,128,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,4,64,0,1,float16,fp8,0,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,4,64,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,8,64,128,1,float16,float16,0,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,8,64,0,1,float16,float16,0,0.012335999558369318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,8,64,128,1,float16,fp8,0,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,8,64,0,1,float16,fp8,0,0.012618667135636011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,8,64,0,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,float16,0,5.615386962890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,fp8,0,5.511589050292969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,64,128,1,fp8,fp8,0,7.324687957763672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,float16,0,5.681264241536458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,fp8,0,5.603247960408528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,64,128,1,fp8,fp8,0,7.392927805582683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,float16,0,5.698421478271484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,float16,0,40.4476064046224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,fp8,0,5.630282719930013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,64,0,1,fp8,fp8,0,34.338452657063804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,fp8,0,39.81773885091146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,64,128,1,fp8,fp8,0,7.434282938639323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,float16,0,5.747077306111653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,64,0,1,fp8,fp8,0,34.38403828938802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,fp8,0,5.626031875610352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,float16,0,40.430651346842446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,fp8,0,41.08167012532552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,64,128,1,fp8,fp8,0,7.422522862752278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,float16,0,3.1716480255126953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,float16,0,40.57068379720052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,fp8,0,3.1589279174804688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,64,128,1,fp8,fp8,0,4.138869285583496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,64,0,1,fp8,fp8,0,34.43798319498698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,fp8,0,39.64279429117838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,float16,0,2.8344640731811523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,fp8,0,2.7849388122558594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,float16,0,19.56055450439453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,float16,0,40.864784240722656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,64,128,1,fp8,fp8,0,3.7086187998453775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,fp8,0,19.7415033976237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,64,0,1,fp8,fp8,0,34.70800018310547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,64,0,1,fp8,fp8,0,17.70465087890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,float16,0,2.869413375854492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,fp8,0,2.839178721110026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,fp8,0,39.996665954589844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,64,128,1,fp8,fp8,0,3.759205182393392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,float16,0,19.204911549886067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,float16,0,2.897045453389486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,64,0,1,fp8,fp8,0,17.269568125406902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,fp8,0,19.30714162190755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,fp8,0,2.85150941212972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,64,128,1,fp8,fp8,0,3.7943948109944663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,float16,0,2.9041547775268555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,float16,0,19.456746419270832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,fp8,0,2.8676799138387046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,64,0,1,fp8,fp8,0,17.464069366455078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,64,128,1,fp8,fp8,0,3.82586669921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,fp8,0,19.24303944905599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,float16,0,19.164122263590496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,float16,0,1.6560427347819011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,fp8,0,19.726954142252605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,fp8,0,1.6710666020711262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,64,0,1,fp8,fp8,0,17.328020731608074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,64,128,1,fp8,fp8,0,2.153706709543864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,float16,0,1.5308373769124348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,fp8,0,1.5074399312337239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,float16,0,18.987642923990887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,float16,0,9.882309595743815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,64,128,1,fp8,fp8,0,1.9777386983235676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,64,0,1,fp8,fp8,0,8.980688095092773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,fp8,0,10.045087814331055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,64,0,1,fp8,fp8,0,17.350245157877605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,fp8,0,19.09222412109375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,float16,0,9.712778727213541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,float16,0,1.537999947865804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,fp8,0,9.549877166748047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,64,0,1,fp8,fp8,0,8.776325225830078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,fp8,0,1.5122507413228352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,64,128,1,fp8,fp8,0,1.9830560684204102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,float16,0,9.638821283976236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,float16,0,1.5427306493123372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,64,0,1,fp8,fp8,0,8.79742431640625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,fp8,0,9.685951868693033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,fp8,0,1.5208373069763184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,64,128,1,fp8,fp8,0,1.9771893819173176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,float16,0,9.575125376383463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,float16,0,1.548144022623698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,fp8,0,1.5301119486490886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,fp8,0,9.63096555074056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,64,128,1,fp8,fp8,0,1.9981279373168945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,64,0,1,fp8,fp8,0,8.775221506754557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,float16,0,9.833157221476236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,float16,0,1.0776586532592773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,fp8,0,1.0765173435211182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,64,128,1,fp8,fp8,0,1.2988959948221843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,float16,0,1.076522668202718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,fp8,0,9.7215944925944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,64,0,1,fp8,fp8,0,8.799269358317057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,fp8,0,1.0779679616292317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,64,128,1,fp8,fp8,0,1.2971573670705159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,float16,0,5.208448092142741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,64,0,1,fp8,fp8,0,4.7788747151692705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,fp8,0,5.169280052185059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,float16,0,1.0780586401621501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,fp8,0,1.0827840169270833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,float16,0,5.174458821614583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,64,128,1,fp8,fp8,0,1.292416016260783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,fp8,0,5.187456130981445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,64,0,1,fp8,fp8,0,4.724768002827962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,float16,0,5.213813463846843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,fp8,0,5.1691999435424805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,float16,0,1.0767412980397542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,64,0,1,fp8,fp8,0,4.762234687805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,fp8,0,1.0734346707661946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,64,128,1,fp8,fp8,0,1.2934239705403645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,float16,0,5.169599850972493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,float16,0,1.0735627015431721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,fp8,0,5.222720146179199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,64,0,1,fp8,fp8,0,4.736426671346028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,fp8,0,1.0780586401621501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,64,128,1,fp8,fp8,0,1.2923466364542644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,float16,0,5.209280014038086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,fp8,0,5.140042622884114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,64,0,1,fp8,fp8,0,4.769040107727051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,float16,0,4.183157285054524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,fp8,0,4.101301193237305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,float16,0,4.235712051391602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,64,128,1,fp8,fp8,0,5.472725550333659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,fp8,0,4.205856005350749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,64,128,1,fp8,fp8,0,5.544122695922852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,float16,0,4.247445424397786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,float16,0,21.92333984375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,64,0,1,fp8,fp8,0,20.16484832763672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,fp8,0,4.20195738474528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,fp8,0,22.16137186686198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,64,128,1,fp8,fp8,0,5.571125030517578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,float16,0,4.304368019104004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,float16,0,22.521067301432293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,fp8,0,21.899370829264324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,64,0,1,fp8,fp8,0,20.080992380777996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,fp8,0,4.2590131759643555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,64,128,1,fp8,fp8,0,5.616463979085286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,float16,0,2.3379467328389487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,float16,0,22.169764200846355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,fp8,0,2.337392012278239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,64,128,1,fp8,fp8,0,3.133082707722982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,64,0,1,fp8,fp8,0,20.078698476155598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,fp8,0,22.27423350016276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,float16,0,2.1236960093180337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,float16,0,22.21050771077474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,fp8,0,2.0847039222717285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,float16,0,11.52243169148763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,fp8,0,11.411994934082031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,64,128,1,fp8,fp8,0,2.789482752482096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,64,0,1,fp8,fp8,0,20.145579020182293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,64,0,1,fp8,fp8,0,10.475311915079752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,float16,0,2.138026714324951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,fp8,0,22.38117218017578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,float16,0,11.527792612711588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,fp8,0,2.096394697825114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,64,128,1,fp8,fp8,0,2.803797403971354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,fp8,0,11.07855478922526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,64,0,1,fp8,fp8,0,10.093493143717447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,float16,0,11.08254877726237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,float16,0,2.147461255391439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,fp8,0,2.109615961710612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,64,0,1,fp8,fp8,0,10.080490748087565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,fp8,0,11.181434631347656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,64,128,1,fp8,fp8,0,2.7958027521769204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,float16,0,2.1563307444254556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,fp8,0,2.118565400441488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,64,128,1,fp8,fp8,0,2.8349440892537436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,float16,0,1.2324533462524414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,fp8,0,1.2503999869028728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,float16,0,11.115071614583334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,64,0,1,fp8,fp8,0,10.104031880696615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,64,128,1,fp8,fp8,0,1.6207466125488281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,fp8,0,11.184378306070963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,float16,0,5.817045211791992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,float16,0,11.137866973876953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,64,0,1,fp8,fp8,0,10.114410400390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,float16,0,1.1547306378682454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,fp8,0,11.028010050455729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,fp8,0,1.1429920196533203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,64,128,1,fp8,fp8,0,1.5086453755696614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,float16,0,1.1634079615275066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,fp8,0,5.742848078409831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,64,0,1,fp8,fp8,0,5.298298517862956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,fp8,0,1.152997334798177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,64,128,1,fp8,fp8,0,1.4969065984090169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,float16,0,5.80734380086263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,float16,0,1.1732640266418457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,64,0,1,fp8,fp8,0,5.150981267293294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,fp8,0,5.662298838297526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,fp8,0,1.1510186990102131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,float16,0,5.760288238525391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,64,128,1,fp8,fp8,0,1.5097972551981609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,float16,0,1.1717759768168132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,64,0,1,fp8,fp8,0,5.15773868560791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,fp8,0,5.656880060831706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,fp8,0,1.1492693424224854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,64,128,1,fp8,fp8,0,1.516149361928304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,float16,0,5.669514973958333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,float16,0,0.812730630238851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,fp8,0,0.8156373500823975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,fp8,0,5.634506861368815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,64,0,1,fp8,fp8,0,5.242821375528972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,64,128,1,fp8,fp8,0,0.9844000339508057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,float16,0,5.751855850219727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,float16,0,3.1169281005859375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,64,0,1,fp8,fp8,0,5.218565305074056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,fp8,0,5.641477584838867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,fp8,0,3.0856479008992515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,64,0,1,fp8,fp8,0,2.8424533208211265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,float16,0,0.8121066888173422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,fp8,0,0.8154293696085612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,64,128,1,fp8,fp8,0,0.9808746973673502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,float16,0,3.096325238545736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,fp8,0,3.0889174143473306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,float16,0,0.8091626962025961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,64,0,1,fp8,fp8,0,2.8383893966674805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,fp8,0,0.8125973542531332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,float16,0,3.0845600763956704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,64,128,1,fp8,fp8,0,0.9815466403961182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,64,128,1,fp8,fp8,0,0.9809919993082682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,fp8,0,3.085322697957357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,64,0,1,fp8,fp8,0,2.828362782796224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,float16,0,0.8139146963755289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,fp8,0,0.8176533381144205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,float16,0,3.048426628112793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,float16,0,0.8143413066864014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,64,0,1,fp8,fp8,0,2.8463732401529946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,fp8,0,3.0791200002034507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,fp8,0,0.8129386901855469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,64,128,1,fp8,fp8,0,0.9811200300852457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,float16,0,3.068256060282389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,fp8,0,3.0745973587036133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,64,0,1,fp8,fp8,0,2.8229548136393228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,float16,0,3.4637120564778647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,fp8,0,3.3954292933146157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,64,128,1,fp8,fp8,0,4.520330746968587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,float16,0,3.5284318923950195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,fp8,0,3.482250531514486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,float16,0,15.681317647298178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,64,128,1,fp8,fp8,0,4.617184003194173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,64,0,1,fp8,fp8,0,14.247659047444662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,float16,0,3.529936154683431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,fp8,0,15.575055440266928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,fp8,0,3.4909280141194663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,float16,0,15.58196767171224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,64,128,1,fp8,fp8,0,4.6062774658203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,64,0,1,fp8,fp8,0,14.313482920328775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,fp8,0,15.857322692871094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,float16,0,3.5577920277913413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,float16,0,16.275349934895832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,fp8,0,3.514794667561849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,64,128,1,fp8,fp8,0,4.6383412679036455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,fp8,0,15.863919576009115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,64,0,1,fp8,fp8,0,14.347381591796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,float16,0,1.9499306678771973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,fp8,0,1.9408480326334636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,float16,0,15.666645050048828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,64,128,1,fp8,fp8,0,2.576848030090332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,float16,0,8.139776229858398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,fp8,0,17.0033442179362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,64,0,1,fp8,fp8,0,14.365061442057291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,float16,0,1.770410696665446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,fp8,0,8.092266718546549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,64,0,1,fp8,fp8,0,7.505738576253255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,fp8,0,1.7489013671875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,64,128,1,fp8,fp8,0,2.337205410003662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,float16,0,7.864351908365886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,float16,0,1.776917298634847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,fp8,0,1.7460907300313313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,fp8,0,7.922373453776042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,64,0,1,fp8,fp8,0,7.196383794148763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,64,128,1,fp8,fp8,0,2.3303306897481284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,float16,0,1.779861291249593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,float16,0,7.893594741821289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,fp8,0,1.7651573816935222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,64,128,1,fp8,fp8,0,2.3270187377929688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,float16,0,1.8006879488627117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,fp8,0,1.7633973757425945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,64,0,1,fp8,fp8,0,7.19761594136556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,fp8,0,7.901797612508138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,64,128,1,fp8,fp8,0,2.35750404993693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,float16,0,7.87548828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,float16,0,1.0360533396402996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,64,0,1,fp8,fp8,0,7.213813145955403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,fp8,0,8.006160100301107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,fp8,0,1.0565280119578044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,64,128,1,fp8,fp8,0,1.363317330678304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,float16,0,7.946559906005859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,float16,0,0.9752853711446127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,float16,0,4.210176150004069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,fp8,0,0.971839984258016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,fp8,0,4.150725364685059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,64,128,1,fp8,fp8,0,1.2574559847513835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,64,0,1,fp8,fp8,0,3.8502985636393228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,64,0,1,fp8,fp8,0,7.236650466918945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,fp8,0,7.918848037719727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,float16,0,4.115824063618978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,float16,0,0.9753386974334717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,fp8,0,4.114389419555664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,64,0,1,fp8,fp8,0,3.7294187545776367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,float16,0,4.099797248840332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,fp8,0,0.9528480370839437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,64,128,1,fp8,fp8,0,1.2378506660461426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,fp8,0,4.027701377868652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,float16,0,0.9742720127105713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,fp8,0,0.9549919764200846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,64,0,1,fp8,fp8,0,3.7293599446614585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,float16,0,4.141605377197266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,64,128,1,fp8,fp8,0,1.2503413359324138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,float16,0,0.9769759972890218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,fp8,0,4.030511856079102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,64,0,1,fp8,fp8,0,3.733130772908529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,fp8,0,0.9606880346934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,64,128,1,fp8,fp8,0,1.2600959936777751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,float16,0,4.052874565124512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,float16,0,0.6837173302968343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,fp8,0,4.076234817504883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,fp8,0,0.6829973061879476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,64,0,1,fp8,fp8,0,3.7348000208536782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,float16,0,2.211631933848063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,64,128,1,fp8,fp8,0,0.8222400347391764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,fp8,0,2.242213408152262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,64,0,1,fp8,fp8,0,2.071983973185221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,float16,0,0.6829866568247477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,fp8,0,0.6832373142242432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,float16,0,2.2445759773254395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,64,128,1,fp8,fp8,0,0.8246826330820719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,fp8,0,2.230192025502523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,64,0,1,fp8,fp8,0,2.068496068318685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,float16,0,0.682965358098348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,fp8,0,0.6834399700164795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,64,128,1,fp8,fp8,0,0.8205599784851074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,float16,0,2.2274133364359536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,float16,0,0.6817493438720703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,fp8,0,0.6836746533711752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,fp8,0,2.2457547187805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,64,0,1,fp8,fp8,0,2.071397304534912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,64,0,1,fp8,fp8,0,2.082042694091797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,64,128,1,fp8,fp8,0,0.8237120310465494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,float16,0,2.226794719696045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,fp8,0,2.2428852717081704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,float16,0,0.681546688079834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,fp8,0,0.6823946634928385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,64,128,1,fp8,fp8,0,0.8248799641927084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,fp8,0,5.396218617757161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,float16,0,2.2451839447021484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,fp8,0,2.24398406346639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,64,0,1,fp8,fp8,0,2.0826667149861655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,float16,0,5.476661046346028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,64,128,1,fp8,fp8,0,7.179178873697917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,float16,0,20.626789093017578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,float16,0,5.5977223714192705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,fp8,0,5.539407730102539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,fp8,0,20.790677388509113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,64,0,1,fp8,fp8,0,18.85641098022461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,64,128,1,fp8,fp8,0,7.235157648722331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,float16,0,21.07742436726888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,float16,0,5.607754389444987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,fp8,0,5.547082901000977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,64,128,1,fp8,fp8,0,7.231599807739258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,float16,0,5.6411787668863935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,fp8,0,20.643573760986328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,64,0,1,fp8,fp8,0,18.960015614827473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,fp8,0,5.5536855061848955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,64,128,1,fp8,fp8,0,7.316314697265625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,float16,0,3.093722661336263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,float16,0,20.898975372314453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,64,0,1,fp8,fp8,0,19.007829030354817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,fp8,0,20.993595123291016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,fp8,0,3.048954645792643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,64,128,1,fp8,fp8,0,4.043183962504069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,float16,0,2.719109217325846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,float16,0,20.939076741536457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,float16,0,10.75375493367513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,fp8,0,2.683749198913574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,fp8,0,10.961488087972006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,64,0,1,fp8,fp8,0,10.144314448038736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,64,128,1,fp8,fp8,0,3.657461484273275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,64,0,1,fp8,fp8,0,19.035717010498047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,fp8,0,21.01566441853841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,float16,0,2.7879308064778647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,float16,0,10.315701166788736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,fp8,0,2.7435731887817383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,64,0,1,fp8,fp8,0,9.474981307983398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,fp8,0,10.371503829956055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,float16,0,10.600586573282877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,64,128,1,fp8,fp8,0,3.6661758422851562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,float16,0,2.7778612772623696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,fp8,0,10.393173217773438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,fp8,0,2.742997487386068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,64,0,1,fp8,fp8,0,9.54316775004069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,64,128,1,fp8,fp8,0,3.6841119130452475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,float16,0,10.438447952270508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,fp8,0,10.493845621744791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,float16,0,2.8103040059407554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,fp8,0,2.76584529876709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,64,0,1,fp8,fp8,0,9.531882603963217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,64,128,1,fp8,fp8,0,3.6943947474161782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,float16,0,10.404160181681315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,float16,0,1.5435627301534016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,fp8,0,10.425829569498697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,fp8,0,1.5589547157287598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,64,0,1,fp8,fp8,0,9.631711959838867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,float16,0,5.390848159790039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,64,128,1,fp8,fp8,0,2.0486879348754883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,float16,0,1.4210186004638672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,fp8,0,1.3956640561421711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,64,0,1,fp8,fp8,0,4.990261395772298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,fp8,0,5.494533538818359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,float16,0,5.195311864217122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,64,128,1,fp8,fp8,0,1.862837314605713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,float16,0,1.43230406443278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,fp8,0,5.26801077524821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,64,0,1,fp8,fp8,0,4.786586761474609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,float16,0,5.280336062113444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,fp8,0,1.405610720316569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,64,128,1,fp8,fp8,0,1.857653299967448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,float16,0,1.4343573252360027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,fp8,0,5.1708478927612305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,64,0,1,fp8,fp8,0,4.806714693705241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,fp8,0,1.4050505956013997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,float16,0,5.287455876668294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,64,128,1,fp8,fp8,0,1.871008078257243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,fp8,0,5.22377077738444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,float16,0,1.441813309987386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,64,0,1,fp8,fp8,0,4.815717379252116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,fp8,0,1.4143519401550293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,64,128,1,fp8,fp8,0,1.871840000152588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,float16,0,0.8371413548787435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,float16,0,5.220693270365397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,fp8,0,5.304693222045898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,64,0,1,fp8,fp8,0,4.854042689005534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,fp8,0,0.8434933026631674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,float16,0,2.7564640045166016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,64,128,1,fp8,fp8,0,1.0891786416371663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,float16,0,0.7849067052205404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,64,0,1,fp8,fp8,0,2.4855039914449057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,fp8,0,2.7312746047973633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,fp8,0,0.7696213722229004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,64,0,1,fp8,fp8,0,2.5934133529663086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,float16,0,2.7261759440104165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,64,128,1,fp8,fp8,0,1.0019893646240234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,fp8,0,2.690757433573405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,float16,0,0.7761653264363607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,fp8,0,0.7723146279652914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,64,128,1,fp8,fp8,0,1.0071199735005696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,float16,0,2.6880534489949546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,fp8,0,2.6941919326782227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,float16,0,0.7814506689707438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,64,0,1,fp8,fp8,0,2.504432042439779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,fp8,0,0.7663679917653402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,64,128,1,fp8,fp8,0,1.0042346318562825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,float16,0,2.7070134480794272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,fp8,0,2.6834987004597983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,float16,0,0.7866079807281494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,64,0,1,fp8,fp8,0,2.491722742716471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,64,0,1,fp8,fp8,0,2.512885411580404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,float16,0,2.7380800247192383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,fp8,0,0.7777439753214518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,64,128,1,fp8,fp8,0,1.0116426944732666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,fp8,0,2.705018679300944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,float16,0,0.5508480072021484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,float16,0,1.531765302022298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,fp8,0,0.5509759982426962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,64,128,1,fp8,fp8,0,0.6675093173980713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,fp8,0,1.5206133524576824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,64,0,1,fp8,fp8,0,1.4321120580037434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,float16,0,0.5501279830932617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,float16,0,1.5215946833292644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,fp8,0,0.5502453247706095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,64,128,1,fp8,fp8,0,0.666213313738505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,fp8,0,1.5236852963765461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,64,0,1,fp8,fp8,0,1.4303199450174968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,float16,0,0.5503040154774984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,float16,0,1.5317546526590984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,fp8,0,0.5506026744842529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,64,128,1,fp8,fp8,0,0.662773331006368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,fp8,0,1.5248053868611653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,float16,0,0.5487893422444662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,64,0,1,fp8,fp8,0,1.42902406056722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,float16,0,1.5314666430155437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,fp8,0,0.5506506760915121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,64,128,1,fp8,fp8,0,0.6663573185602824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,fp8,0,1.5256106058756511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,64,0,1,fp8,fp8,0,1.4201866785685222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,float16,0,0.549946665763855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,float16,0,1.5219465891520183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,fp8,0,0.5510400136311849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,64,128,1,fp8,fp8,0,0.6661866505940756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,fp8,0,1.5255093574523926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,64,0,1,fp8,fp8,0,1.4285546938578289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,float16,0,4.084389368693034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,fp8,0,4.023221333821614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,64,128,1,fp8,fp8,0,5.341712315877278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,float16,0,12.419530232747396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,float16,0,4.175279935201009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,fp8,0,4.131994565327962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,fp8,0,12.200885772705078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,64,0,1,fp8,fp8,0,11.283072153727213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,64,128,1,fp8,fp8,0,5.468122482299805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,float16,0,4.193903923034668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,float16,0,12.418805440266928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,fp8,0,4.165317217508952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,64,128,1,fp8,fp8,0,5.4542185465494795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,64,0,1,fp8,fp8,0,11.351487477620443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,fp8,0,12.375381469726562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,float16,0,4.245008150736491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,float16,0,12.349669138590494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,fp8,0,4.178991953531901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,fp8,0,12.403301239013672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,64,128,1,fp8,fp8,0,5.519936243693034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,64,0,1,fp8,fp8,0,11.405364990234375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,float16,0,2.2996959686279297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,fp8,0,2.2950132687886557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,64,128,1,fp8,fp8,0,3.0233920415242515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,float16,0,12.504277547200521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,float16,0,6.480517069498698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,float16,0,2.072106679280599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,64,0,1,fp8,fp8,0,5.9780426025390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,fp8,0,12.345322926839193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,fp8,0,6.457418441772461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,fp8,0,2.021120071411133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,64,128,1,fp8,fp8,0,2.6937440236409507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,64,0,1,fp8,fp8,0,11.442298889160156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,float16,0,2.0678933461507163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,float16,0,6.165242513020833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,fp8,0,2.048703988393148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,64,128,1,fp8,fp8,0,2.7148958841959634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,fp8,0,6.156442642211914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,float16,0,2.076848030090332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,64,0,1,fp8,fp8,0,5.651552200317383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,float16,0,6.146111806233724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,fp8,0,2.044117291768392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,64,128,1,fp8,fp8,0,2.7580480575561523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,64,0,1,fp8,fp8,0,5.707551956176758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,fp8,0,6.1183522542317705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,float16,0,2.084965387980143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,float16,0,6.180949529012044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,fp8,0,2.0623253186543784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,64,0,1,fp8,fp8,0,5.690357208251953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,float16,0,1.1685760021209717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,64,128,1,fp8,fp8,0,2.728405316670736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,fp8,0,6.132389068603516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,fp8,0,1.1729919910430908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,float16,0,6.223461151123047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,64,128,1,fp8,fp8,0,1.5413600603739421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,float16,0,3.2459147771199546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,float16,0,1.0732693672180176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,fp8,0,1.0601173241933186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,64,0,1,fp8,fp8,0,5.744394938151042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,fp8,0,6.141130447387695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,fp8,0,3.262415885925293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,64,0,1,fp8,fp8,0,3.0476051966349282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,float16,0,3.1382506688435874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,64,128,1,fp8,fp8,0,1.4127039909362793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,fp8,0,3.14794127146403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,float16,0,1.0782453219095867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,64,0,1,fp8,fp8,0,2.8907413482666016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,float16,0,3.1298933029174805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,fp8,0,1.0676373640696208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,64,128,1,fp8,fp8,0,1.3963680267333984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,fp8,0,3.1153761545817056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,64,0,1,fp8,fp8,0,2.908074696858724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,float16,0,1.077834685643514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,fp8,0,1.0632479985555012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,float16,0,3.1419092814127603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,64,128,1,fp8,fp8,0,1.3959306081136067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,fp8,0,3.1536213556925454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,64,0,1,fp8,fp8,0,2.891018549601237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,float16,0,1.0811573664347331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,fp8,0,1.0685546398162842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,float16,0,3.1477333704630532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,64,128,1,fp8,fp8,0,1.41701873143514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,fp8,0,3.130885442097982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,64,0,1,fp8,fp8,0,2.8846613566080728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,float16,0,0.6273013353347778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,float16,0,1.6596320470174153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,fp8,0,0.6389653285344442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,64,128,1,fp8,fp8,0,0.8280479907989502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,fp8,0,1.6756480534871419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,float16,0,0.5921440124511719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,64,0,1,fp8,fp8,0,1.5894880294799805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,float16,0,1.6163573265075684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,fp8,0,0.5793706576029459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,64,128,1,fp8,fp8,0,0.7579253514607748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,fp8,0,1.6157174110412598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,float16,0,0.5901013215382894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,64,0,1,fp8,fp8,0,1.5125759442647297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,fp8,0,0.5835893154144287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,float16,0,1.63591464360555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,64,128,1,fp8,fp8,0,0.756330649058024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,fp8,0,1.6177066167195637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,float16,0,0.5920799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,64,0,1,fp8,fp8,0,1.511722723642985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,fp8,0,0.5843253135681152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,64,128,1,fp8,fp8,0,0.7656586964925131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,float16,0,1.6295733451843262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,fp8,0,1.6290826797485352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,64,0,1,fp8,fp8,0,1.5075146357218425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,float16,0,0.59388800462087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,fp8,0,0.587551991144816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,float16,0,1.6293813387552898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,64,128,1,fp8,fp8,0,0.7680319945017496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,float16,0,0.4201333522796631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,fp8,0,1.6330827077229817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,64,0,1,fp8,fp8,0,1.5245226224263508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,float16,0,0.9454879760742188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,fp8,0,0.41941332817077637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,64,128,1,fp8,fp8,0,0.5082399845123291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,fp8,0,0.9501866499582926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,float16,0,0.42049066225687665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,64,0,1,fp8,fp8,0,0.863696018854777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,float16,0,0.9504853089650472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,fp8,0,0.4185546636581421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,64,128,1,fp8,fp8,0,0.5063466628392538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,fp8,0,0.9509867032368978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,float16,0,0.4198826551437378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,64,0,1,fp8,fp8,0,0.859440008799235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,float16,0,0.9549226760864258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,fp8,0,0.41992000738779706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,64,128,1,fp8,fp8,0,0.5062400102615356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,fp8,0,0.9534880320231119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,64,0,1,fp8,fp8,0,0.8611199855804443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,float16,0,0.4183573325475057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,fp8,0,0.41833066940307617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,float16,0,0.9569706916809082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,64,128,1,fp8,fp8,0,0.5092693169911703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,fp8,0,0.9541227022806803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,float16,0,0.41766401131947833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,64,0,1,fp8,fp8,0,0.8598453203837076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,fp8,0,0.41936532656351727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,float16,0,0.9536906878153483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,64,128,1,fp8,fp8,0,0.5065973202387491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,fp8,0,0.9536960124969482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,64,0,1,fp8,fp8,0,0.8637866973876953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,float16,0,5.441488265991211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,fp8,0,5.370693206787109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,64,128,1,fp8,fp8,0,7.053957621256511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,float16,0,5.538703918457031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,float16,0,12.070426940917969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,fp8,0,5.502874374389648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,fp8,0,11.969114939371744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,64,0,1,fp8,fp8,0,11.129231770833334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,64,128,1,fp8,fp8,0,7.2367197672526045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,float16,0,5.590250651041667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,float16,0,12.206826527913412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,fp8,0,5.515626907348633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,fp8,0,12.109306335449219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,64,0,1,fp8,fp8,0,11.21930185953776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,64,128,1,fp8,fp8,0,7.18394152323405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,float16,0,5.6359202067057295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,float16,0,12.188725789388021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,fp8,0,5.575578689575195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,fp8,0,12.226661682128906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,64,0,1,fp8,fp8,0,11.244277954101562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,float16,0,3.0807679494222007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,64,128,1,fp8,fp8,0,7.249488194783528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,float16,0,12.236282348632812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,fp8,0,3.0584853490193686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,64,128,1,fp8,fp8,0,3.9637600580851235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,float16,0,6.445583979288737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,64,0,1,fp8,fp8,0,11.29739761352539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,float16,0,2.6921278635660806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,fp8,0,12.308607737223307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,fp8,0,6.368431727091472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,64,0,1,fp8,fp8,0,6.000384012858073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,fp8,0,2.6288426717122397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,64,128,1,fp8,fp8,0,3.5376853942871094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,float16,0,6.042634963989258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,float16,0,2.734208106994629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,fp8,0,2.6745548248291016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,64,0,1,fp8,fp8,0,5.5893815358479815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,fp8,0,5.954794565836589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,64,128,1,fp8,fp8,0,3.5727628072102866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,float16,0,2.7481066385904946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,float16,0,6.05833625793457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,fp8,0,2.691653251647949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,fp8,0,6.034661610921224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,64,0,1,fp8,fp8,0,5.692736307779948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,64,128,1,fp8,fp8,0,3.610896110534668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,float16,0,6.07916259765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,float16,0,2.774847984313965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,fp8,0,6.013989130655925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,64,0,1,fp8,fp8,0,5.620719909667969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,fp8,0,2.725914637247721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,float16,0,3.183552106221517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,float16,0,1.524602731068929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,fp8,0,1.508405367533366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,float16,0,6.0667463938395185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,64,128,1,fp8,fp8,0,3.6420160929361978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,64,0,1,fp8,fp8,0,5.697935740152995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,fp8,0,6.075146357218425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,64,128,1,fp8,fp8,0,1.9754346211751301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,float16,0,1.3681440353393555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,fp8,0,3.1799465815226235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,64,0,1,fp8,fp8,0,3.016175905863444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,fp8,0,1.345029354095459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,float16,0,3.0223306020100913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,64,128,1,fp8,fp8,0,1.785919984181722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,fp8,0,3.034367879231771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,64,0,1,fp8,fp8,0,2.8038934071858725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,float16,0,1.3722507158915203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,64,128,1,fp8,fp8,0,1.80077330271403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,fp8,0,1.3535092671712239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,float16,0,3.0455573399861655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,float16,0,1.3925546010335286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,fp8,0,2.995290756225586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,64,0,1,fp8,fp8,0,2.8131465911865234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,fp8,0,1.355178674062093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,64,128,1,fp8,fp8,0,1.8078719774882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,float16,0,3.011317253112793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,float16,0,1.3820533752441406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,fp8,0,1.363866647084554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,fp8,0,3.001994768778483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,64,0,1,fp8,fp8,0,2.8559306462605796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,64,128,1,fp8,fp8,0,1.8176320393880208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,float16,0,3.0339574813842773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,float16,0,0.783578634262085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,fp8,0,3.0366134643554688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,64,0,1,fp8,fp8,0,2.828197479248047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,float16,0,1.6141014099121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,fp8,0,0.7909119923909506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,64,128,1,fp8,fp8,0,1.036197344462077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,fp8,0,1.6307679812113445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,64,0,1,fp8,fp8,0,1.5420212745666504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,float16,0,0.7234826882680258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,fp8,0,0.7074399789174398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,float16,0,1.5582186381022136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,64,128,1,fp8,fp8,0,0.9385173320770264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,fp8,0,1.5439947446187336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,64,0,1,fp8,fp8,0,1.459397315979004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,float16,0,0.7277279694875082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,float16,0,1.560746669769287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,fp8,0,0.7122560342152914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,64,128,1,fp8,fp8,0,0.9365812937418619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,fp8,0,1.5465599695841472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,float16,0,0.7264106273651123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,64,0,1,fp8,fp8,0,1.4519680341084797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,float16,0,1.5716373125712078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,fp8,0,0.7180053393046061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,64,128,1,fp8,fp8,0,0.9453972975413004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,fp8,0,1.5499253273010254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,float16,0,0.7303307056427002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,64,0,1,fp8,fp8,0,1.4469119707743328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,float16,0,1.5665334065755208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,fp8,0,0.7200106779734293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,64,128,1,fp8,fp8,0,0.9544320106506348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,fp8,0,1.5636746088663738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,64,0,1,fp8,fp8,0,1.463391939798991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,float16,0,0.4278186559677124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,float16,0,0.8519573211669922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,fp8,0,0.43303998311360675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,64,128,1,fp8,fp8,0,0.5619893471399943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,fp8,0,0.8607626756032308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,64,0,1,fp8,fp8,0,0.8250453472137451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,float16,0,0.4033546845118205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,float16,0,0.8270613352457682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,fp8,0,0.39510401089986164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,64,128,1,fp8,fp8,0,0.5145066579182943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,fp8,0,0.8186079661051432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,float16,0,0.40166401863098145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,64,0,1,fp8,fp8,0,0.775434652964274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,float16,0,0.8324693044026693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,fp8,0,0.39843201637268066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,64,128,1,fp8,fp8,0,0.5185120105743408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,fp8,0,0.8222506841023763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,64,0,1,fp8,fp8,0,0.7723306814829508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,float16,0,0.4012960195541382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,float16,0,0.8283147017161051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,fp8,0,0.39764265219370526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,64,128,1,fp8,fp8,0,0.5219039916992188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,fp8,0,0.8276906808217367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,64,0,1,fp8,fp8,0,0.7782133420308431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,float16,0,0.4049973487854004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,float16,0,0.8251787026723226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,fp8,0,0.3980533281962077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,64,128,1,fp8,fp8,0,0.5213599999745687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,fp8,0,0.831770658493042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,64,0,1,fp8,fp8,0,0.7853120168050131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,float16,0,0.28922667105992633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,float16,0,0.5081493457158407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,fp8,0,0.28857600688934326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,fp8,0,0.28867199023564655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,64,128,1,fp8,fp8,0,0.3489546775817871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,fp8,0,0.5065866708755493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,64,0,1,fp8,fp8,0,0.4601866801579793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,float16,0,0.2884213328361511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,float16,0,0.5109440088272095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,64,128,1,fp8,fp8,0,0.3492480119069417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,fp8,0,0.5124693314234415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,64,0,1,fp8,fp8,0,0.4612640142440796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,float16,0,0.2863893310228984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,float16,0,0.508352001508077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,fp8,0,0.28749332825342816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,64,128,1,fp8,fp8,0,0.350816011428833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,fp8,0,0.5125600099563599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,64,0,1,fp8,fp8,0,0.4612746636072795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,float16,0,0.28964267174402875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,float16,0,0.5088053146998087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,fp8,0,0.28728000322977704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,64,128,1,fp8,fp8,0,0.34815998872121173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,fp8,0,0.509173313776652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,64,0,1,fp8,fp8,0,0.4627306858698527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,float16,0,0.2885493238766988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,float16,0,0.5097546577453613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,fp8,0,0.288592000802358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,64,128,1,fp8,fp8,0,0.35045333703358966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,fp8,0,0.5075573523839315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,64,0,1,fp8,fp8,0,0.46001601219177246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,float16,0,4.031002680460612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,fp8,0,3.948997179667155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,64,128,1,fp8,fp8,0,5.280511856079102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,float16,0,4.104917208353679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,float16,0,7.428234736124675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,64,0,1,fp8,fp8,0,6.88050651550293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,fp8,0,7.371445337931315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,fp8,0,4.04315185546875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,64,128,1,fp8,fp8,0,5.325743993123372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,float16,0,7.55731201171875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,float16,0,4.157375971476237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,fp8,0,4.085573196411133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,fp8,0,7.502794901529948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,64,0,1,fp8,fp8,0,6.962544123331706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,64,128,1,fp8,fp8,0,5.375759760538737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,float16,0,7.627824147542317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,float16,0,4.146005312601726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,fp8,0,4.139381408691406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,64,0,1,fp8,fp8,0,7.0469919840494795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,fp8,0,7.421674728393555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,64,128,1,fp8,fp8,0,5.400549570719401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,float16,0,2.264533360799154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,float16,0,7.618122736612956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,fp8,0,2.2288640340169272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,float16,0,3.9973653157552085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,fp8,0,7.600053151448567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,64,128,1,fp8,fp8,0,2.9594345092773438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,64,0,1,fp8,fp8,0,7.0731627146403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,fp8,0,3.9421119689941406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,float16,0,2.0238720575968423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,64,0,1,fp8,fp8,0,3.7666826248168945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,fp8,0,1.985472043355306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,float16,0,3.712922732035319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,64,128,1,fp8,fp8,0,2.6342293421427407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,fp8,0,3.651887893676758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,float16,0,2.023909409840902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,fp8,0,1.9746079444885254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,64,0,1,fp8,fp8,0,3.425882657368978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,float16,0,3.724789301554362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,64,128,1,fp8,fp8,0,2.646725336710612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,float16,0,2.020085334777832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,fp8,0,1.9849707285563152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,fp8,0,3.6893491744995117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,64,0,1,fp8,fp8,0,3.449258804321289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,64,128,1,fp8,fp8,0,2.6444640159606934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,float16,0,3.728800137837728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,float16,0,2.0306933720906577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,fp8,0,3.7255465189615884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,64,0,1,fp8,fp8,0,3.4816853205362954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,fp8,0,2.013173262278239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,64,128,1,fp8,fp8,0,2.6708319981892905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,float16,0,1.1309599876403809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,float16,0,3.7669385274251304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,fp8,0,1.135263999303182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,fp8,0,3.711008071899414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,float16,0,2.006714661916097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,64,0,1,fp8,fp8,0,3.5087947845458984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,64,128,1,fp8,fp8,0,1.4941280682881672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,fp8,0,1.9892212549845378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,float16,0,1.035754680633545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,64,0,1,fp8,fp8,0,1.8865706125895183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,fp8,0,1.0189332962036133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,float16,0,1.8785813649495442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,64,128,1,fp8,fp8,0,1.3672266006469727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,float16,0,1.039743979771932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,fp8,0,1.8740639686584473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,64,0,1,fp8,fp8,0,1.7532639503479004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,fp8,0,1.014624039332072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,float16,0,1.894437313079834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,64,128,1,fp8,fp8,0,1.3559519449869792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,fp8,0,1.874245325724284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,64,0,1,fp8,fp8,0,1.7536907196044922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,float16,0,1.0346773465474446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,float16,0,1.8834452629089355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,fp8,0,1.017365296681722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,64,128,1,fp8,fp8,0,1.3433653513590496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,fp8,0,1.8674880663553874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,64,0,1,fp8,fp8,0,1.7565120061238606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,float16,0,1.0439626375834148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,fp8,0,1.0263573328653972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,float16,0,1.8934772809346516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,64,128,1,fp8,fp8,0,1.3589332898457844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,fp8,0,1.8798933029174805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,64,128,1,fp8,fp8,0,0.7814239660898844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,float16,0,0.592522660891215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,float16,0,1.0213066736857097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,64,0,1,fp8,fp8,0,1.756106694539388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,fp8,0,0.6005973418553671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,fp8,0,1.0317493279774983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,float16,0,0.5474880139032999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,64,0,1,fp8,fp8,0,0.9803466796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,fp8,0,0.5369866689046224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,float16,0,0.9804373582204183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,64,128,1,fp8,fp8,0,0.7114400068918864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,fp8,0,0.9667253494262695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,float16,0,0.550384004910787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,64,0,1,fp8,fp8,0,0.9182720184326172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,fp8,0,0.5393120050430298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,float16,0,0.9818293253580729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,64,128,1,fp8,fp8,0,0.714026689529419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,fp8,0,0.96998397509257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,64,0,1,fp8,fp8,0,0.9135626951853434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,float16,0,0.5497013330459595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,fp8,0,0.5414986610412598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,float16,0,0.9858720302581787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,64,128,1,fp8,fp8,0,0.7152480284372965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,fp8,0,0.973365306854248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,64,0,1,fp8,fp8,0,0.9188960393269857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,float16,0,0.5521440108617147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,fp8,0,0.5451840162277222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,float16,0,0.9783306916554769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,64,128,1,fp8,fp8,0,0.7188639640808105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,fp8,0,0.9818933010101318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,64,0,1,fp8,fp8,0,0.9217387040456136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,float16,0,0.32701865832010907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,float16,0,0.5475786526997884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,fp8,0,0.33191466331481934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,64,128,1,fp8,fp8,0,0.4318399826685588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,fp8,0,0.5477333466211954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,64,0,1,fp8,fp8,0,0.5134026606877645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,float16,0,0.30800533294677734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,float16,0,0.5293226639429728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,fp8,0,0.30405332644780475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,64,128,1,fp8,fp8,0,0.39680532614390057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,fp8,0,0.5237600008646647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,64,0,1,fp8,fp8,0,0.48068801561991376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,float16,0,0.306058665116628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,float16,0,0.5275946855545044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,fp8,0,0.3048853278160095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,64,128,1,fp8,fp8,0,0.3972533146540324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,fp8,0,0.5273333390553793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,64,0,1,fp8,fp8,0,0.4819733301798503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,float16,0,0.30898133913675946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,float16,0,0.5297760168711344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,fp8,0,0.30320000648498535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,64,128,1,fp8,fp8,0,0.397487998008728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,fp8,0,0.5271466573079427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,64,0,1,fp8,fp8,0,0.4826879898707072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,float16,0,0.310805340607961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,float16,0,0.5319573481877645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,fp8,0,0.30563199520111084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,64,128,1,fp8,fp8,0,0.401093324025472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,fp8,0,0.5249866644541422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,64,0,1,fp8,fp8,0,0.4843573172887166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,float16,0,0.22344533602396646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,float16,0,0.32602133353551227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,fp8,0,0.22392533222834268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,64,128,1,fp8,fp8,0,0.27210666735967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,fp8,0,0.3261653383572896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,64,0,1,fp8,fp8,0,0.30508800347646076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,float16,0,0.22073600689570108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,float16,0,0.32044800122578937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,fp8,0,0.22198933362960815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,64,128,1,fp8,fp8,0,0.26959999402364093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,fp8,0,0.32222400108973187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,64,0,1,fp8,fp8,0,0.30567999680836994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,float16,0,0.2221119999885559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,float16,0,0.2214133342107137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,float16,0,0.32046933968861896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,fp8,0,0.2206559975941976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,64,128,1,fp8,fp8,0,0.271013339360555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,fp8,0,0.32156266768773395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,64,0,1,fp8,fp8,0,0.3051786621411641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,float16,0,0.3216853340466817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,fp8,0,0.22263999780019125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,64,128,1,fp8,fp8,0,0.2704319953918457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,fp8,0,0.3208266695340474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,64,0,1,fp8,fp8,0,0.3035786747932434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,float16,0,0.2213653326034546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,float16,0,0.32172266642252606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,fp8,0,0.2219840089480082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,64,128,1,fp8,fp8,0,0.2708959976832072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,fp8,0,0.32225600878397626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,64,0,1,fp8,fp8,0,0.30483200152715045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,float16,0,5.334394454956055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,fp8,0,5.261887868245442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,64,128,1,fp8,fp8,0,6.940341313680013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,float16,0,7.7784163157145185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,float16,0,5.336618423461914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,fp8,0,7.780469258626302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,fp8,0,5.268735885620117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,64,0,1,fp8,fp8,0,7.2321014404296875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,float16,0,7.845855712890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,64,128,1,fp8,fp8,0,7.022954940795898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,float16,0,5.3507944742838545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,64,0,1,fp8,fp8,0,7.331600189208984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,fp8,0,5.306677182515462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,fp8,0,7.754655838012695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,float16,0,7.865333557128906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,64,128,1,fp8,fp8,0,7.068560282389323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,fp8,0,7.7864424387613935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,float16,0,5.388671875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,64,0,1,fp8,fp8,0,7.3646189371744795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,fp8,0,5.3619944254557295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,64,128,1,fp8,fp8,0,7.100890477498372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,float16,0,7.889071782430013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,float16,0,3.014789263407389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,fp8,0,2.9750773111979165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,fp8,0,7.841322580973308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,64,0,1,fp8,fp8,0,7.413322448730469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,float16,0,4.279813448588054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,64,128,1,fp8,fp8,0,3.8823893864949546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,float16,0,2.6348586082458496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,fp8,0,4.20033073425293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,64,0,1,fp8,fp8,0,4.040165265401204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,fp8,0,2.5728480021158853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,float16,0,3.875839869181315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,64,128,1,fp8,fp8,0,3.4544054667154946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,float16,0,2.653125286102295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,fp8,0,3.821957270304362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,64,0,1,fp8,fp8,0,3.605269432067871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,fp8,0,2.6121973991394043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,float16,0,3.9048051834106445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,64,128,1,fp8,fp8,0,3.4892800649007163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,64,0,1,fp8,fp8,0,3.6475038528442383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,fp8,0,3.8782774607340493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,float16,0,2.665034612019857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,fp8,0,2.6355573336283364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,float16,0,3.9126720428466797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,64,128,1,fp8,fp8,0,3.484853426615397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,fp8,0,3.8774665196736655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,fp8,0,3.8607571919759116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,float16,0,2.6694294611612954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,64,0,1,fp8,fp8,0,3.6264479955037436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,float16,0,3.94866148630778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,fp8,0,2.626800060272217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,64,128,1,fp8,fp8,0,3.521765391031901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,float16,0,1.481760025024414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,64,0,1,fp8,fp8,0,3.654149373372396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,float16,0,2.111135959625244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,fp8,0,1.4724052747090657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,64,128,1,fp8,fp8,0,1.9260106086730957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,fp8,0,2.09279998143514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,64,0,1,fp8,fp8,0,1.992106596628825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,float16,0,1.3267573515574138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,float16,0,1.9444533983866374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,fp8,0,1.3056106567382812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,64,128,1,fp8,fp8,0,1.7439039548238118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,fp8,0,1.9206933975219727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,64,0,1,fp8,fp8,0,1.8073867162068684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,float16,0,1.3337066968282063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,float16,0,1.9375786781311035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,64,0,1,fp8,fp8,0,1.8223573366800945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,fp8,0,1.3087626298268635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,64,128,1,fp8,fp8,0,1.7408266067504883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,float16,0,1.336330731709798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,fp8,0,1.9299519856770833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,fp8,0,1.3137599627176921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,float16,0,1.9503733317057292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,64,128,1,fp8,fp8,0,1.7350400288899739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,fp8,0,1.9398773511250813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,float16,0,1.344037373860677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,64,0,1,fp8,fp8,0,1.8211679458618164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,float16,0,1.9614559809366863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,fp8,0,1.3262933095296223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,64,128,1,fp8,fp8,0,1.7593065897623699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,fp8,0,1.9519200325012207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,64,0,1,fp8,fp8,0,1.8235893249511719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,float16,0,0.7614293098449707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,float16,0,1.0697759787241619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,fp8,0,0.7623786926269531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,64,128,1,fp8,fp8,0,0.991434653600057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,fp8,0,1.0766987005869548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,64,0,1,fp8,fp8,0,1.0303946336110432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,float16,0,0.693498690923055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,float16,0,0.9991146723429362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,fp8,0,0.6815093358357748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,64,128,1,fp8,fp8,0,0.8987572987874349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,fp8,0,0.9929920037587484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,64,0,1,fp8,fp8,0,0.9332746664683024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,float16,0,0.6930293242136637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,float16,0,1.007525364557902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,fp8,0,0.6836640040079752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,64,128,1,fp8,fp8,0,0.9000426928202311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,fp8,0,0.9991679986317953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,64,0,1,fp8,fp8,0,0.935920000076294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,float16,0,0.6941280364990234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,float16,0,1.0045973459879558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,fp8,0,0.6842133204142252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,64,128,1,fp8,fp8,0,0.9102506637573242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,fp8,0,1.001535971959432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,64,0,1,fp8,fp8,0,0.9343893527984619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,float16,0,0.702122688293457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,float16,0,1.0090933640797932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,fp8,0,0.6899786790211996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,64,128,1,fp8,fp8,0,0.9115253289540609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,fp8,0,1.0022772947947185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,float16,0,0.4042186737060547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,64,0,1,fp8,fp8,0,0.9505706628163656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,float16,0,0.5572106838226318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,fp8,0,0.4088746706644694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,64,128,1,fp8,fp8,0,0.5305600166320801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,fp8,0,0.5670080184936523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,64,0,1,fp8,fp8,0,0.5417333443959554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,float16,0,0.37190401554107666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,float16,0,0.5346719821294149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,fp8,0,0.3667093515396118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,64,128,1,fp8,fp8,0,0.4810346762339274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,fp8,0,0.5271626710891724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,64,0,1,fp8,fp8,0,0.4978613456090291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,float16,0,0.37087468306223553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,float16,0,0.5328533252080282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,fp8,0,0.36801600456237793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,64,128,1,fp8,fp8,0,0.48655466238657635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,fp8,0,0.5241066614786783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,float16,0,0.3752586841583252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,64,0,1,fp8,fp8,0,0.4980959892272949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,float16,0,0.5349653164545695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,fp8,0,0.36683201789855957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,64,128,1,fp8,fp8,0,0.48722132047017414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,fp8,0,0.5316160122553507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,64,0,1,fp8,fp8,0,0.5023893515268961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,float16,0,0.3740533192952474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,float16,0,0.2267786661783854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,float16,0,0.5357439915339152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,fp8,0,0.3707573413848877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,64,128,1,fp8,fp8,0,0.49008532365163165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,fp8,0,0.5292533238728842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,64,0,1,fp8,fp8,0,0.5027466615041097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,float16,0,0.3088266650835673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,fp8,0,0.22830933332443237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,64,128,1,fp8,fp8,0,0.2996373375256856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,fp8,0,0.3118240038553874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,64,0,1,fp8,fp8,0,0.2904106577237447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,float16,0,0.21077332894007364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,float16,0,0.2929653326670329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,fp8,0,0.20922134319941202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,64,128,1,fp8,fp8,0,0.27516265710194904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,fp8,0,0.28939733902613324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,64,0,1,fp8,fp8,0,0.26952000459035236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,float16,0,0.21237866083780924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,float16,0,0.2943733334541321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,fp8,0,0.20920532941818237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,64,128,1,fp8,fp8,0,0.2752586603164673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,fp8,0,0.29292800029118854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,64,0,1,fp8,fp8,0,0.26919466257095337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,float16,0,0.21197332938512167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,float16,0,0.2977706591288249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,fp8,0,0.21044800678888956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,64,128,1,fp8,fp8,0,0.27577600876490277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,fp8,0,0.2930453419685364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,64,0,1,fp8,fp8,0,0.26958932479222614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,float16,0,0.2135093410809835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,float16,0,0.29708800713221234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,fp8,0,0.21100799242655435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,64,128,1,fp8,fp8,0,0.2792533238728841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,fp8,0,0.2943626642227173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,64,0,1,fp8,fp8,0,0.2707039912541707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,float16,0,0.1569973329703013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,float16,0,0.19036799669265747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,fp8,0,0.15822933117548624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,64,128,1,fp8,fp8,0,0.19220799207687378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,fp8,0,0.1904639999071757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,64,0,1,fp8,fp8,0,0.18109333515167236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,float16,0,0.15432533621788025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,float16,0,0.18615466356277466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,fp8,0,0.1529759963353475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,64,128,1,fp8,fp8,0,0.18476267655690512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,fp8,0,0.1876479983329773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,64,0,1,fp8,fp8,0,0.17850667238235474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,float16,0,0.1540426711241404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,float16,0,0.1870186726252238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,fp8,0,0.1550826629002889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,64,128,1,fp8,fp8,0,0.18498667081197104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,fp8,0,0.18673600753148398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,64,0,1,fp8,fp8,0,0.17866667111714682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,float16,0,0.15553067127863565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,float16,0,0.1873813271522522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,fp8,0,0.15448000033696493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,64,128,1,fp8,fp8,0,0.18738667170206705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,fp8,0,0.18742932875951132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,64,0,1,fp8,fp8,0,0.17795199155807495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,float16,0,0.15430399775505066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,float16,0,0.18714666366577148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,fp8,0,0.15611199537913004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,64,128,1,fp8,fp8,0,0.18993600209554037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,fp8,0,0.1874986688296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,64,0,1,fp8,fp8,0,0.18065067132314047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,float16,0,3.9415359497070312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,fp8,0,3.9192641576131186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,float16,0,5.080922762552897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,64,128,1,fp8,fp8,0,5.0822188059488935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,fp8,0,5.095269203186035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,float16,0,3.9393812815348306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,64,0,1,fp8,fp8,0,4.672767957051595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,float16,0,5.104576110839844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,fp8,0,3.8810240427652993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,64,128,1,fp8,fp8,0,5.145285288492839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,64,0,1,fp8,fp8,0,4.684325218200684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,fp8,0,5.043264071146647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,float16,0,3.9463841120402017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,float16,0,5.091471989949544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,fp8,0,3.8986132939656577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,64,128,1,fp8,fp8,0,5.173573176066081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,fp8,0,5.049519856770833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,float16,0,3.9741598765055337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,64,0,1,fp8,fp8,0,4.708469390869141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,float16,0,5.120095888773601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,fp8,0,3.9113279978434243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,float16,0,2.219264030456543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,64,128,1,fp8,fp8,0,5.196858723958333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,fp8,0,5.054448127746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,64,0,1,fp8,fp8,0,4.756383895874023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,fp8,0,2.1855947176615396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,float16,0,2.8116480509440103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,64,128,1,fp8,fp8,0,2.885221481323242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,fp8,0,2.7586825688680015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,float16,0,1.9474612871805828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,64,0,1,fp8,fp8,0,2.66977596282959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,float16,0,2.518949349721273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,fp8,0,1.8985600471496582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,64,128,1,fp8,fp8,0,2.5432960192362466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,fp8,0,2.487717310587565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,64,0,1,fp8,fp8,0,2.3259199460347495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,float16,0,1.9588425954182942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,fp8,0,1.9348640441894531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,float16,0,2.5295093854268393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,64,128,1,fp8,fp8,0,2.576810677846273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,fp8,0,2.4845226605733237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,64,0,1,fp8,fp8,0,2.3394079208374023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,float16,0,1.9704480171203613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,fp8,0,1.934234619140625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,float16,0,2.5434133211771646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,64,128,1,fp8,fp8,0,2.5864480336507163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,fp8,0,2.504586696624756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,float16,0,1.9671467145284016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,64,0,1,fp8,fp8,0,2.361839930216471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,fp8,0,1.955407937367757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,float16,0,2.547290643056234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,64,128,1,fp8,fp8,0,2.588559945424398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,fp8,0,2.5264426867167153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,64,0,1,fp8,fp8,0,2.3831787109375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,float16,0,1.105679988861084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,float16,0,1.3882346153259277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,fp8,0,1.1067306995391846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,64,128,1,fp8,fp8,0,1.4444905916849773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,fp8,0,1.3933706283569336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,64,0,1,fp8,fp8,0,1.3213226795196533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,float16,0,0.9957760175069174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,float16,0,1.2894240220387776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,fp8,0,0.9736800193786621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,64,128,1,fp8,fp8,0,1.2919092973073323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,fp8,0,1.269210656483968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,64,0,1,fp8,fp8,0,1.1841440200805664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,float16,0,0.9977227052052816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,float16,0,1.2856746514638264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,fp8,0,0.9812479813893636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,64,128,1,fp8,fp8,0,1.3086346785227458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,fp8,0,1.26474134127299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,64,0,1,fp8,fp8,0,1.1856746673583984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,float16,0,1.0052106380462646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,float16,0,1.29312531153361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,fp8,0,0.9836320082346598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,64,128,1,fp8,fp8,0,1.3053653240203857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,fp8,0,1.2747946580251057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,64,0,1,fp8,fp8,0,1.1999573707580566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,float16,0,1.0046292940775554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,float16,0,1.2864480018615723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,fp8,0,0.9967733224232992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,64,128,1,fp8,fp8,0,1.3232853412628174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,fp8,0,1.2791732947031658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,float16,0,0.5725493431091309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,64,0,1,fp8,fp8,0,1.200266679128011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,float16,0,0.7220213413238525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,fp8,0,0.5735306739807129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,64,128,1,fp8,fp8,0,0.7493706544240316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,float16,0,0.6696586608886719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,fp8,0,0.722927967707316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,64,0,1,fp8,fp8,0,0.6868053277333578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,float16,0,0.5206400156021118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,fp8,0,0.511082649230957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,64,128,1,fp8,fp8,0,0.6820159753163656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,fp8,0,0.6566400130589803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,64,0,1,fp8,fp8,0,0.6132853428522745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,float16,0,0.5239306688308716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,float16,0,0.6716319719950358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,fp8,0,0.5130293369293213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,64,128,1,fp8,fp8,0,0.6801866690317789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,fp8,0,0.6593013207117716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,64,0,1,fp8,fp8,0,0.6225653489430746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,float16,0,0.5226613283157349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,float16,0,0.667365312576294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,fp8,0,0.5165493488311768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,64,128,1,fp8,fp8,0,0.6882293224334717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,fp8,0,0.660314679145813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,64,0,1,fp8,fp8,0,0.6203733285268148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,float16,0,0.5264053344726562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,float16,0,0.6753439903259277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,fp8,0,0.5190186500549316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,64,128,1,fp8,fp8,0,0.6874079704284668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,fp8,0,0.6676106452941895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,float16,0,0.30611199140548706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,64,0,1,fp8,fp8,0,0.6264853477478027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,float16,0,0.3805600007375081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,fp8,0,0.30932267506917316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,64,128,1,fp8,fp8,0,0.4052266677220662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,fp8,0,0.38180267810821533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,64,0,1,fp8,fp8,0,0.35900266965230304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,float16,0,0.28305600086847943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,float16,0,0.3577920198440552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,fp8,0,0.2778773307800293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,64,128,1,fp8,fp8,0,0.3678826491038005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,fp8,0,0.3550613323847453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,64,0,1,fp8,fp8,0,0.32612266143163043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,float16,0,0.2813546657562256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,float16,0,0.3594346841176351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,fp8,0,0.2795199950536092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,64,128,1,fp8,fp8,0,0.3702293237050374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,fp8,0,0.3526773452758789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,64,0,1,fp8,fp8,0,0.32833067576090497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,64,0,1,fp8,fp8,0,0.3277706702550252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,float16,0,0.2832319935162862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,float16,0,0.35861865679423016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,fp8,0,0.2805333336194356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,64,128,1,fp8,fp8,0,0.37193600336710614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,fp8,0,0.35517334938049316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,float16,0,0.2857439915339152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,float16,0,0.36231998602549237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,fp8,0,0.2815306584040324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,64,128,1,fp8,fp8,0,0.3742239872614543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,fp8,0,0.3550826708475749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,64,0,1,fp8,fp8,0,0.33022934198379517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,float16,0,0.1762453317642212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,float16,0,0.20864532391230264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,fp8,0,0.17758933703104654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,64,128,1,fp8,fp8,0,0.23231999079386392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,fp8,0,0.21075199047724405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,64,0,1,fp8,fp8,0,0.2003999948501587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,64,0,1,fp8,fp8,0,0.18383467197418213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,float16,0,0.16268799702326456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,float16,0,0.16266133387883505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,float16,0,0.1918720006942749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,fp8,0,0.16182933251063028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,64,128,1,fp8,fp8,0,0.21292267243067423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,fp8,0,0.18825066089630127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,float16,0,0.19158933560053507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,fp8,0,0.1627893348534902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,64,128,1,fp8,fp8,0,0.2137706677118937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,fp8,0,0.18862400452295938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,64,0,1,fp8,fp8,0,0.1835199991861979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,float16,0,0.1641866664091746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,float16,0,0.19158399105072021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,fp8,0,0.16198933124542236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,64,128,1,fp8,fp8,0,0.21512534221013388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,fp8,0,0.19155732790629068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,64,0,1,fp8,fp8,0,0.18414932489395142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,float16,0,0.16452800234158835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,float16,0,0.19417067368825278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,fp8,0,0.16409066319465637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,64,128,1,fp8,fp8,0,0.2157599925994873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,fp8,0,0.1906773249308268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,64,0,1,fp8,fp8,0,0.18600533405939737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,float16,0,0.1242133378982544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,float16,0,0.13595733046531677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,fp8,0,0.12331733107566833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,64,128,1,fp8,fp8,0,0.1518239974975586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,fp8,0,0.13638933499654135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,64,0,1,fp8,fp8,0,0.12902933359146118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,float16,0,0.11890666683514912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,float16,0,0.132314662138621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,fp8,0,0.1195093293984731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,64,128,1,fp8,fp8,0,0.142767995595932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,fp8,0,0.1314826707045237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,64,0,1,fp8,fp8,0,0.12667733430862427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,float16,0,0.11987732847531636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,float16,0,0.13146133224169412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,fp8,0,0.1188800036907196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,64,128,1,fp8,fp8,0,0.14421332875887552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,fp8,0,0.13246400157610574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,64,0,1,fp8,fp8,0,0.12658666570981345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,float16,0,0.11987200379371643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,float16,0,0.1318986713886261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,fp8,0,0.12082133690516154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,float16,0,0.13174933195114136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,64,128,1,fp8,fp8,0,0.14458666245142618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,fp8,0,0.13173866271972656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,64,0,1,fp8,fp8,0,0.1269493301709493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,float16,0,0.11948266625404358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,fp8,0,0.11999467015266418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,64,128,1,fp8,fp8,0,0.14410133163134256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,fp8,0,0.13296533624331155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,64,0,1,fp8,fp8,0,0.12658133109410605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,float16,0,4.732357343037923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,float16,0,5.4645334879557295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,fp8,0,4.738522529602051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,64,0,1,fp8,fp8,0,4.939104080200195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,fp8,0,5.432352066040039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,float16,0,4.79641596476237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,64,128,1,fp8,fp8,0,6.341162363688151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,float16,0,5.513434727986653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,fp8,0,4.789930661519368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,64,128,1,fp8,fp8,0,6.434991836547852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,fp8,0,5.486693064371745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,float16,0,4.80130672454834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,64,0,1,fp8,fp8,0,4.992543856302897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,fp8,0,4.851194699605306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,float16,0,5.571706771850586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,64,128,1,fp8,fp8,0,6.365338643391927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,fp8,0,5.552064259847005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,64,0,1,fp8,fp8,0,4.986213366190593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,float16,0,4.879472096761067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,fp8,0,4.8377227783203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,float16,0,5.621344248453776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,float16,0,2.740960121154785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,64,0,1,fp8,fp8,0,5.034570693969727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,fp8,0,5.553994496663411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,64,128,1,fp8,fp8,0,6.451754887898763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,float16,0,3.1065546671549478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,fp8,0,2.67627747853597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,64,128,1,fp8,fp8,0,3.432762781778971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,fp8,0,3.045765240987142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,float16,0,2.3716479937235513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,64,0,1,fp8,fp8,0,2.7212371826171875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,float16,0,2.722853342692057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,fp8,0,2.383146603902181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,64,128,1,fp8,fp8,0,3.154394785563151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,fp8,0,2.7235094706217446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,64,0,1,fp8,fp8,0,2.4633386929829917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,float16,0,2.380629380544027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,float16,0,2.721247990926107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,fp8,0,2.3790027300516763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,fp8,0,2.386021296183268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,64,128,1,fp8,fp8,0,3.169194539388021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,fp8,0,2.7451839447021484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,64,0,1,fp8,fp8,0,2.470794677734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,float16,0,2.4094667434692383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,float16,0,2.775871912638346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,64,128,1,fp8,fp8,0,3.1830666859944663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,fp8,0,2.7410081227620444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,64,0,1,fp8,fp8,0,2.4875893592834473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,float16,0,2.415583928426107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,float16,0,2.7725067138671875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,fp8,0,2.392181396484375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,float16,0,1.365887959798177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,float16,0,1.5579946835835774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,fp8,0,2.759552001953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,64,128,1,fp8,fp8,0,3.2225332260131836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,64,0,1,fp8,fp8,0,2.505669275919596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,fp8,0,1.3404426574707031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,64,128,1,fp8,fp8,0,1.7242239316304524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,fp8,0,1.517562707265218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,64,0,1,fp8,fp8,0,1.3599999745686848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,float16,0,1.1999733448028564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,float16,0,1.3771680196126301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,fp8,0,1.2048319975535076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,64,128,1,fp8,fp8,0,1.5895147323608398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,fp8,0,1.374959945678711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,64,0,1,fp8,fp8,0,1.2409813404083252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,float16,0,1.2080960273742676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,float16,0,1.378159999847412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,fp8,0,1.2050399780273438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,64,128,1,fp8,fp8,0,1.5992800394694011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,fp8,0,1.3896479606628418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,64,0,1,fp8,fp8,0,1.2369386355082195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,float16,0,1.204954703648885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,float16,0,1.388821283976237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,fp8,0,1.2063519954681396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,64,128,1,fp8,fp8,0,1.5991679827372234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,fp8,0,1.3827840487162273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,64,0,1,fp8,fp8,0,1.2463359832763672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,float16,0,1.2153759797414143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,float16,0,1.3874613444010417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,fp8,0,1.209269364674886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,64,128,1,fp8,fp8,0,1.614144007364909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,fp8,0,1.3855306307474773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,64,0,1,fp8,fp8,0,1.249834696451823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,float16,0,0.6956799825032552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,float16,0,0.7876479625701904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,fp8,0,0.6846559842427572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,64,128,1,fp8,fp8,0,0.8698826630910238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,fp8,0,0.7715093294779459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,64,0,1,fp8,fp8,0,0.6896106402079264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,float16,0,0.6148373285929362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,float16,0,0.7036800384521484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,fp8,0,0.617248018582662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,64,128,1,fp8,fp8,0,0.8174239794413248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,fp8,0,0.706991990407308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,64,0,1,fp8,fp8,0,0.6305919885635376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,float16,0,0.6167840162913004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,float16,0,0.710640033086141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,fp8,0,0.6182560125986735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,64,128,1,fp8,fp8,0,0.8151040077209473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,fp8,0,0.7085119883219401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,64,0,1,fp8,fp8,0,0.6345013380050659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,float16,0,0.6188106536865234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,float16,0,0.7036960124969482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,fp8,0,0.6170986493428549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,64,128,1,fp8,fp8,0,0.8210720221201578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,fp8,0,0.7051839828491211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,64,0,1,fp8,fp8,0,0.6324799855550131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,float16,0,0.621018648147583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,float16,0,0.7092693646748861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,fp8,0,0.6215253273646036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,64,128,1,fp8,fp8,0,0.8177653153737386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,fp8,0,0.7080480257670084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,64,0,1,fp8,fp8,0,0.6390560070673624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,float16,0,0.36208534240722656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,float16,0,0.40858133633931476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,fp8,0,0.35763732592264813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,64,128,1,fp8,fp8,0,0.45645864804585773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,fp8,0,0.40064533551534015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,64,0,1,fp8,fp8,0,0.35387198130289715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,float16,0,0.3227306604385376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,float16,0,0.36907732486724854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,fp8,0,0.3214240074157715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,64,128,1,fp8,fp8,0,0.42814934253692627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,fp8,0,0.3689440091451009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,64,0,1,fp8,fp8,0,0.3309013247489929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,float16,0,0.32262933254241943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,float16,0,0.36745067437489826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,fp8,0,0.3258026639620463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,64,128,1,fp8,fp8,0,0.42714667320251465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,fp8,0,0.37042665481567383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,64,0,1,fp8,fp8,0,0.33243733644485474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,float16,0,0.3248053391774495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,float16,0,0.37011198202768963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,fp8,0,0.3225546677907308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,64,128,1,fp8,fp8,0,0.4261653423309326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,fp8,0,0.37291733423868817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,64,0,1,fp8,fp8,0,0.3312693238258362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,float16,0,0.32517866293589276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,float16,0,0.37258132298787433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,fp8,0,0.3261973261833191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,64,128,1,fp8,fp8,0,0.43137065569559735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,fp8,0,0.37036800384521484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,64,0,1,fp8,fp8,0,0.3314773241678874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,float16,0,0.1965493361155192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,float16,0,0.2202720046043396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,float16,0,0.19724800189336142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,fp8,0,0.17550400892893472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,fp8,0,0.19156267245610556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,64,128,1,fp8,fp8,0,0.24758932987848917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,fp8,0,0.2174453337987264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,64,0,1,fp8,fp8,0,0.1882986625035604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,float16,0,0.17361066738764444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,64,128,1,fp8,fp8,0,0.2344640096028646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,fp8,0,0.19792000452677408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,64,0,1,fp8,fp8,0,0.1750133236249288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,float16,0,0.1749653418858846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,float16,0,0.1997119983037313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,fp8,0,0.17610132694244385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,64,128,1,fp8,fp8,0,0.23385600248972574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,fp8,0,0.1997386614481608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,64,0,1,fp8,fp8,0,0.1739786664644877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,float16,0,0.17491199572881064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,float16,0,0.1999359925587972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,fp8,0,0.1755733291308085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,64,128,1,fp8,fp8,0,0.23483200867970785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,64,128,1,fp8,fp8,0,0.23517332474390665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,fp8,0,0.2017973264058431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,64,0,1,fp8,fp8,0,0.1744746764500936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,float16,0,0.17617066701253256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,float16,0,0.2023573319117228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,fp8,0,0.17628800868988037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,fp8,0,0.20084800322850546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,64,0,1,fp8,fp8,0,0.17585599422454834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,float16,0,0.11076800028483073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,float16,0,0.11636267105738322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,fp8,0,0.11168000102043152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,64,128,1,fp8,fp8,0,0.14057067036628723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,fp8,0,0.11610666910807292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,64,0,1,fp8,fp8,0,0.10434133807818095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,float16,0,0.09687466422716777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,float16,0,0.10461866855621338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,fp8,0,0.09869866569836934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,64,128,1,fp8,fp8,0,0.12795733412106833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,fp8,0,0.10419733325640361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,64,0,1,fp8,fp8,0,0.09359467029571533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,float16,0,0.09827733039855957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,float16,0,0.10451199611028035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,fp8,0,0.09796266754468282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,64,128,1,fp8,fp8,0,0.13219199577967325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,64,128,1,fp8,fp8,0,0.1304213305314382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,fp8,0,0.10504000385602315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,64,0,1,fp8,fp8,0,0.09437867005666097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,float16,0,0.09753599762916565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,float16,0,0.10413866241772969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,fp8,0,0.09945066769917806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,fp8,0,0.10461333394050598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,64,0,1,fp8,fp8,0,0.09540266791979472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,float16,0,0.09968533118565877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,float16,0,0.10718400279680888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,fp8,0,0.0995306670665741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,64,128,1,fp8,fp8,0,0.13306666413942972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,fp8,0,0.10525332887967427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,64,0,1,fp8,fp8,0,0.09702933828035991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,float16,0,0.06427733103434245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,float16,0,0.06534933547178905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,fp8,0,0.06374933322270711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,64,128,1,fp8,fp8,0,0.08455999692281087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,fp8,0,0.06471999982992808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,64,0,1,fp8,fp8,0,0.060677334666252136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,float16,0,0.05835733314355215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,float16,0,0.05971199770768484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,fp8,0,0.05889600018660227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,64,128,1,fp8,fp8,0,0.07630933324495952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,fp8,0,0.06017066538333893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,64,0,1,fp8,fp8,0,0.05611200133959452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,float16,0,0.058117335041364036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,float16,0,0.06036800146102905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,fp8,0,0.05885866781075796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,64,128,1,fp8,fp8,0,0.07514133552710216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,fp8,0,0.06053866446018219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,64,0,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,float16,0,0.05973866581916809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,float16,0,0.06050133208433787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,fp8,0,0.059445331494013466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,64,128,1,fp8,fp8,0,0.07624533275763194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,fp8,0,0.06073066592216492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,64,0,1,fp8,fp8,0,0.05630399783452352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,float16,0,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,float16,0,0.06061333417892456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,fp8,0,0.06029866635799408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,64,128,1,fp8,fp8,0,0.0765119989713033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,fp8,0,0.060453335444132485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,64,0,1,fp8,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,1,64,128,1,float16,float16,0,4.463552157084147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,1,64,0,1,float16,float16,0,4.459072113037109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,1,64,128,1,float16,fp8,0,4.443210601806641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,1,64,128,1,fp8,fp8,0,5.939738591512044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,1,64,0,1,float16,fp8,0,4.471466700236003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,1,64,0,1,fp8,fp8,0,3.9834826787312827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,2,64,128,1,float16,float16,0,4.539344151814778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,2,64,0,1,float16,float16,0,4.5324907302856445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,2,64,128,1,float16,fp8,0,4.5394026438395185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,2,64,0,1,float16,fp8,0,4.535701433817546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,2,64,128,1,fp8,fp8,0,5.993232091267903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,2,64,0,1,fp8,fp8,0,4.003461201985677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,4,64,0,1,float16,float16,0,4.5789438883463545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,4,64,128,1,float16,float16,0,4.5599625905354815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,4,64,128,1,float16,fp8,0,4.538506507873535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,4,64,0,1,float16,fp8,0,4.565077463785808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,4,64,0,1,fp8,fp8,0,4.035562515258789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,8,64,128,1,float16,float16,0,4.579018592834473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,8,64,0,1,float16,float16,0,4.604170799255371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,4,64,128,1,fp8,fp8,0,6.024431864420573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,8,64,128,1,float16,fp8,0,4.575519879659017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,64,64,128,1,float16,float16,0,2.612874666849772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,64,64,0,1,float16,float16,0,2.610479990641276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,64,64,128,1,float16,fp8,0,2.554314613342285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,8,64,128,1,fp8,fp8,0,6.078986485799153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,8,64,0,1,float16,fp8,0,4.545914649963379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,8,64,0,1,fp8,fp8,0,4.05947748819987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,64,64,128,1,fp8,fp8,0,3.2488533655802407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,64,64,0,1,float16,fp8,0,2.5357492764790854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,64,64,0,1,fp8,fp8,0,2.2571093241373696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,1,64,128,1,float16,float16,0,2.2324533462524414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,1,64,0,1,float16,float16,0,2.225525379180908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,1,64,128,1,float16,fp8,0,2.241205374399821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,1,64,0,1,float16,fp8,0,2.2222506205240884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,1,64,0,1,fp8,fp8,0,1.9875574111938477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,1,64,128,1,fp8,fp8,0,2.968085289001465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,2,64,128,1,float16,float16,0,2.244037310282389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,2,64,0,1,float16,float16,0,2.238405386606852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,2,64,128,1,fp8,fp8,0,2.9838132858276367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,2,64,128,1,float16,fp8,0,2.244501272837321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,2,64,0,1,float16,fp8,0,2.2483946482340493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,2,64,0,1,fp8,fp8,0,1.99453337987264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,4,64,128,1,float16,float16,0,2.269301255544027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,4,64,0,1,float16,float16,0,2.251296043395996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,4,64,128,1,float16,fp8,0,2.281178633371989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,4,64,0,1,float16,fp8,0,2.2477280298868814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,4,64,0,1,fp8,fp8,0,2.0007519721984863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,4,64,128,1,fp8,fp8,0,3.0089333852132163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,8,64,128,1,float16,float16,0,2.3043413162231445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,8,64,0,1,float16,float16,0,2.2885653177897134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,8,64,128,1,float16,fp8,0,2.264853318532308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,64,64,128,1,float16,float16,0,1.3059093157450359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,8,64,0,1,float16,fp8,0,2.257317384084066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,8,64,0,1,fp8,fp8,0,2.017866611480713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,8,64,128,1,fp8,fp8,0,3.012981414794922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,64,64,0,1,float16,float16,0,1.3092426458994548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,64,64,128,1,float16,fp8,0,1.2831733226776123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,64,64,128,1,fp8,fp8,0,1.6206132570902507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,64,64,0,1,float16,fp8,0,1.2732693354288738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,64,64,0,1,fp8,fp8,0,1.118181308110555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,1,64,128,1,float16,float16,0,1.1385707060496013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,1,64,0,1,float16,float16,0,1.1200053691864014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,1,64,128,1,float16,fp8,0,1.1331733067830403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,1,64,128,1,fp8,fp8,0,1.5022026697794597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,1,64,0,1,float16,fp8,0,1.1267786820729573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,1,64,0,1,fp8,fp8,0,0.995189348856608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,2,64,128,1,float16,float16,0,1.1395359834035237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,2,64,0,1,float16,float16,0,1.1240320205688477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,2,64,128,1,float16,fp8,0,1.142896016438802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,2,64,0,1,float16,fp8,0,1.12608536084493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,2,64,128,1,fp8,fp8,0,1.496986707051595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,2,64,0,1,fp8,fp8,0,0.9987839857737223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,4,64,128,1,float16,float16,0,1.141856034596761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,4,64,0,1,float16,float16,0,1.1283679803212483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,4,64,128,1,float16,fp8,0,1.1401653289794922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,4,64,128,1,fp8,fp8,0,1.5039787292480469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,4,64,0,1,float16,fp8,0,1.1322773297627766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,4,64,0,1,fp8,fp8,0,1.002351999282837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,8,64,128,1,float16,float16,0,1.1427040100097656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,8,64,0,1,float16,float16,0,1.1341760158538818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,8,64,128,1,float16,fp8,0,1.1467999617258708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,8,64,128,1,fp8,fp8,0,1.5144426027933757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,8,64,0,1,float16,fp8,0,1.1310239632924397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,64,64,128,1,float16,float16,0,0.6616693337758383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,8,64,0,1,fp8,fp8,0,1.0136640071868896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,64,64,0,1,float16,float16,0,0.6526240110397339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,64,64,0,1,fp8,fp8,0,0.5663306713104248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,64,64,128,1,float16,fp8,0,0.6509973208109537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,64,64,128,1,fp8,fp8,0,0.8289386431376139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,64,64,0,1,float16,fp8,0,0.6426719824473063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,1,64,128,1,float16,float16,0,0.5809066692988077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,1,64,0,1,float16,fp8,0,0.5756586790084839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,1,64,0,1,float16,float16,0,0.5756586790084839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,1,64,128,1,float16,fp8,0,0.5812693436940511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,1,64,128,1,fp8,fp8,0,0.7693066596984863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,1,64,0,1,fp8,fp8,0,0.5112533171971639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,2,64,128,1,float16,float16,0,0.5850666761398315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,2,64,0,1,float16,float16,0,0.5742133458455404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,2,64,128,1,float16,fp8,0,0.5869333346684774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,2,64,128,1,fp8,fp8,0,0.7727306683858236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,2,64,0,1,float16,fp8,0,0.5749866565068563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,2,64,0,1,fp8,fp8,0,0.5101813475290934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,4,64,128,1,float16,float16,0,0.5835146506627401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,4,64,0,1,float16,float16,0,0.5795040130615234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,4,64,128,1,float16,fp8,0,0.5844693183898926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,4,64,128,1,fp8,fp8,0,0.7662613391876221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,4,64,0,1,float16,fp8,0,0.5772853295008341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,4,64,0,1,fp8,fp8,0,0.5120213429133097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,8,64,128,1,float16,float16,0,0.5862933397293091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,8,64,0,1,float16,float16,0,0.5790186723073324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,8,64,128,1,float16,fp8,0,0.5864160060882568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,8,64,128,1,fp8,fp8,0,0.7760106722513834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,8,64,0,1,float16,fp8,0,0.5786506732304891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,8,64,0,1,fp8,fp8,0,0.5125600099563599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,64,64,128,1,float16,float16,0,0.3471786578496297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,64,64,0,1,float16,float16,0,0.3398666779200236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,64,64,128,1,float16,fp8,0,0.3389493227005005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,64,64,128,1,fp8,fp8,0,0.4260480006535848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,64,64,0,1,float16,fp8,0,0.33501867453257245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,64,64,0,1,fp8,fp8,0,0.29234133164087933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,1,64,128,1,float16,float16,0,0.30457067489624023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,1,64,0,1,float16,float16,0,0.2988586624463399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,1,64,128,1,float16,fp8,0,0.3073813319206238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,1,64,128,1,fp8,fp8,0,0.40326400597890216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,1,64,0,1,float16,fp8,0,0.29948266347249347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,1,64,0,1,fp8,fp8,0,0.26738133033116657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,2,64,128,1,float16,float16,0,0.30588799715042114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,2,64,0,1,float16,float16,0,0.3019946614901225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,2,64,128,1,float16,fp8,0,0.30773333708445233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,2,64,128,1,fp8,fp8,0,0.40238932768503827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,2,64,0,1,float16,fp8,0,0.3009546597798665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,2,64,0,1,fp8,fp8,0,0.26898666222890216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,4,64,128,1,float16,float16,0,0.30537599325180054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,4,64,0,1,float16,float16,0,0.3004586696624756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,4,64,128,1,float16,fp8,0,0.30795733133951825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,4,64,128,1,fp8,fp8,0,0.40585601329803467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,4,64,0,1,float16,fp8,0,0.30081067482630414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,4,64,0,1,fp8,fp8,0,0.26759467522303265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,8,64,128,1,float16,float16,0,0.30908799171447754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,8,64,0,1,float16,float16,0,0.3023306727409363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,8,64,128,1,float16,fp8,0,0.30755199988683063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,8,64,128,1,fp8,fp8,0,0.40513598918914795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,8,64,0,1,float16,fp8,0,0.3031093279520671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,8,64,0,1,fp8,fp8,0,0.27183467149734497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,64,64,128,1,float16,float16,0,0.18774400154749551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,64,64,0,1,float16,float16,0,0.1819253365198771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,64,64,128,1,float16,fp8,0,0.18521066506703696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,64,64,128,1,fp8,fp8,0,0.23355199893315634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,64,64,0,1,float16,fp8,0,0.1797813375790914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,64,64,0,1,fp8,fp8,0,0.15729066729545593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,1,64,128,1,float16,float16,0,0.16497600078582764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,1,64,0,1,float16,float16,0,0.16099733114242554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,1,64,128,1,float16,fp8,0,0.166485329469045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,1,64,128,1,fp8,fp8,0,0.21840532620747885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,1,64,0,1,float16,fp8,0,0.16172800461451212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,1,64,0,1,fp8,fp8,0,0.14381333192189535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,2,64,128,1,float16,float16,0,0.16632533073425293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,2,64,0,1,float16,float16,0,0.16108266512552896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,2,64,128,1,float16,fp8,0,0.16662399967511496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,2,64,128,1,fp8,fp8,0,0.22173333168029785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,2,64,0,1,float16,fp8,0,0.16167466839154562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,2,64,0,1,fp8,fp8,0,0.14321066935857138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,4,64,128,1,float16,float16,0,0.16665599743525186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,4,64,0,1,float16,float16,0,0.16286399960517883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,4,64,128,1,float16,fp8,0,0.16697599490483603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,4,64,128,1,fp8,fp8,0,0.2194826602935791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,4,64,0,1,float16,fp8,0,0.16157866517702738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,4,64,0,1,fp8,fp8,0,0.14499732851982117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,8,64,128,1,float16,float16,0,0.16764267285664877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,8,64,0,1,float16,float16,0,0.1641973356405894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,8,64,128,1,float16,fp8,0,0.16859734058380127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,8,64,128,1,fp8,fp8,0,0.22103999058405557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,8,64,0,1,float16,fp8,0,0.16387200355529785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,8,64,0,1,fp8,fp8,0,0.14573333660761514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,64,64,128,1,float16,float16,0,0.10553600390752156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,64,64,0,1,float16,float16,0,0.10203199585278828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,64,64,128,1,float16,fp8,0,0.10591466228167216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,64,64,128,1,fp8,fp8,0,0.13329066832860312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,64,64,0,1,float16,fp8,0,0.09954133629798889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,64,64,0,1,fp8,fp8,0,0.0901759962240855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,1,64,128,1,float16,float16,0,0.09298666318257649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,1,64,0,1,float16,float16,0,0.08806932965914409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,1,64,128,1,float16,fp8,0,0.09251200159390767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,1,64,128,1,fp8,fp8,0,0.12131733695665996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,1,64,0,1,float16,fp8,0,0.08805867036183675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,1,64,0,1,fp8,fp8,0,0.07962133487065633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,2,64,128,1,float16,float16,0,0.09358400106430054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,2,64,0,1,float16,float16,0,0.08808533350626628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,2,64,128,1,float16,fp8,0,0.09410132964452107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,2,64,128,1,fp8,fp8,0,0.1239306628704071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,2,64,0,1,float16,fp8,0,0.08890666564305623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,2,64,0,1,fp8,fp8,0,0.07992533346017201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,4,64,0,1,float16,fp8,0,0.08874666690826416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,4,64,128,1,float16,float16,0,0.09470933675765991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,4,64,0,1,float16,float16,0,0.0890933374563853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,4,64,128,1,float16,fp8,0,0.09393067161242168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,4,64,128,1,fp8,fp8,0,0.12441066900889079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,4,64,0,1,fp8,fp8,0,0.08038933575153351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,8,64,128,1,float16,float16,0,0.09483200311660767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,8,64,0,1,float16,float16,0,0.08973333239555359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,8,64,128,1,float16,fp8,0,0.09610133369763692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,8,64,128,1,fp8,fp8,0,0.12665067116419473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,8,64,0,1,float16,fp8,0,0.0899679958820343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,8,64,0,1,fp8,fp8,0,0.08089066545168559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,64,64,128,1,float16,float16,0,0.06156266729036967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,64,64,0,1,float16,float16,0,0.05633600056171417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,64,64,128,1,float16,fp8,0,0.06160533428192139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,64,64,128,1,fp8,fp8,0,0.08166933556397755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,64,64,0,1,float16,fp8,0,0.056314667065938316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,64,64,0,1,fp8,fp8,0,0.05332266787687937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,1,64,128,1,float16,float16,0,0.055546666185061135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,2,64,128,1,float16,float16,0,0.05645333230495453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,1,64,0,1,float16,float16,0,0.05046399931112925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,1,64,128,1,float16,fp8,0,0.05621333420276642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,1,64,128,1,fp8,fp8,0,0.07121066749095917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,1,64,0,1,float16,fp8,0,0.051269332567850746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,1,64,0,1,fp8,fp8,0,0.04741866886615753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,2,64,0,1,float16,float16,0,0.052095999320348106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,2,64,128,1,float16,fp8,0,0.05622933308283488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,2,64,128,1,fp8,fp8,0,0.07210666437943776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,2,64,0,1,float16,fp8,0,0.05100800096988678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,2,64,0,1,fp8,fp8,0,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,4,64,128,1,float16,float16,0,0.05633600056171417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,4,64,0,1,float16,float16,0,0.051674668987592064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,4,64,128,1,float16,fp8,0,0.056799997886021934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,4,64,128,1,fp8,fp8,0,0.07357333103815715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,4,64,0,1,float16,fp8,0,0.05221866567929586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,4,64,0,1,fp8,fp8,0,0.04789333542188009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,8,64,128,1,float16,float16,0,0.056287998954455055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,8,64,0,1,float16,float16,0,0.052986666560173035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,8,64,128,1,float16,fp8,0,0.05738133192062378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,8,64,128,1,fp8,fp8,0,0.0728000005086263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,8,64,0,1,float16,fp8,0,0.05189333359400431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,8,64,0,1,fp8,fp8,0,0.0491946687301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,64,64,128,1,float16,float16,0,0.04131733377774557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,64,64,0,1,float16,float16,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,64,64,128,1,float16,fp8,0,0.040591999888420105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,64,64,128,1,fp8,fp8,0,0.05123733480771383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,64,64,0,1,float16,fp8,0,0.03484266748030981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,64,64,0,1,fp8,fp8,0,0.032069332897663116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,1,64,128,1,float16,float16,0,0.03873066604137421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,1,64,0,1,float16,float16,0,0.032933334509531655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,1,64,128,1,float16,fp8,0,0.03908266623814901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,1,64,128,1,fp8,fp8,0,0.05060266455014547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,1,64,0,1,float16,fp8,0,0.03307733436425527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,1,64,0,1,fp8,fp8,0,0.030581332743167877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,2,64,128,1,float16,float16,0,0.0391893337170283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,2,64,0,1,float16,float16,0,0.033045334120591484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,2,64,128,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,2,64,128,1,fp8,fp8,0,0.05072000126043955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,2,64,0,1,float16,fp8,0,0.03268266717592875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,2,64,0,1,fp8,fp8,0,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,4,64,128,1,float16,float16,0,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,4,64,0,1,float16,float16,0,0.03292266776164373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,4,64,128,1,float16,fp8,0,0.03993066648642222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,8,64,128,1,fp8,fp8,0,0.05026666820049286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,4,64,128,1,fp8,fp8,0,0.04991999765237173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,4,64,0,1,float16,fp8,0,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,4,64,0,1,fp8,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,8,64,128,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,8,64,0,1,float16,float16,0,0.03399466723203659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,8,64,128,1,float16,fp8,0,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,8,64,0,1,float16,fp8,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,8,64,0,1,fp8,fp8,0,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,1,64,128,1,float16,float16,0,1.895957310994466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,1,64,0,1,float16,float16,0,1.8420000076293945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,1,64,128,1,float16,fp8,0,1.904688040415446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,1,64,0,1,fp8,fp8,0,1.7030773162841797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,1,64,0,1,float16,fp8,0,1.8475786844889324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,1,64,128,1,fp8,fp8,0,2.603328069051107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,2,64,128,1,float16,float16,0,1.912218729654948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,2,64,0,1,float16,float16,0,1.8651092847188313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,2,64,128,1,float16,fp8,0,1.907434622446696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,2,64,128,1,fp8,fp8,0,2.6037279764811196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,2,64,0,1,fp8,fp8,0,1.7181493441263835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,2,64,0,1,float16,fp8,0,1.8637812932332356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,4,64,128,1,float16,float16,0,1.9407307306925456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,4,64,0,1,float16,float16,0,1.8900160789489746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,4,64,128,1,float16,fp8,0,1.9382774035135906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,4,64,0,1,float16,fp8,0,1.8763465881347656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,4,64,0,1,fp8,fp8,0,1.7222933769226074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,4,64,128,1,fp8,fp8,0,2.6227307319641113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,8,64,128,1,float16,float16,0,1.9494400024414062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,8,64,0,1,float16,float16,0,1.8936853408813477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,8,64,128,1,float16,fp8,0,1.9578347206115723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,8,64,128,1,fp8,fp8,0,2.63372802734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,64,64,128,1,float16,float16,0,1.1292266845703125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,8,64,0,1,float16,fp8,0,1.8906985918680828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,1,64,128,1,float16,float16,0,0.9548106988271078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,8,64,0,1,fp8,fp8,0,1.7369813919067383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,64,64,0,1,float16,float16,0,1.1110666592915852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,64,64,128,1,float16,fp8,0,1.098639965057373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,64,64,0,1,float16,fp8,0,1.0782240231831868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,64,64,128,1,fp8,fp8,0,1.4242293039957683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,64,64,0,1,fp8,fp8,0,0.9695680141448975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,1,64,0,1,float16,float16,0,0.9285226662953695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,1,64,128,1,float16,fp8,0,0.9552799860636393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,1,64,128,1,fp8,fp8,0,1.3017866611480713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,1,64,0,1,float16,fp8,0,0.9276320139567057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,1,64,0,1,fp8,fp8,0,0.8547946612040201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,2,64,128,1,float16,float16,0,0.96124267578125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,2,64,0,1,float16,float16,0,0.9333333174387614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,2,64,128,1,float16,fp8,0,0.9583360354105631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,2,64,0,1,float16,fp8,0,0.9299840132395426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,2,64,128,1,fp8,fp8,0,1.302245299021403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,2,64,0,1,fp8,fp8,0,0.8582239945729574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,4,64,128,1,float16,float16,0,0.9631093343098959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,4,64,0,1,float16,float16,0,0.9396426677703857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,4,64,128,1,float16,fp8,0,0.9671680132548014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,4,64,128,1,fp8,fp8,0,1.3126453558603923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,4,64,0,1,float16,fp8,0,0.9398132960001627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,4,64,0,1,fp8,fp8,0,0.8588533401489258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,8,64,128,1,float16,float16,0,0.9649759928385416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,8,64,0,1,float16,float16,0,0.9432319800059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,8,64,128,1,float16,fp8,0,0.965989351272583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,8,64,0,1,float16,fp8,0,0.9454507033030192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,8,64,128,1,fp8,fp8,0,1.3126933574676514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,8,64,0,1,fp8,fp8,0,0.8715360164642334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,64,64,128,1,float16,float16,0,0.5719413359959921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,64,64,0,1,float16,float16,0,0.5638879934946696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,64,64,128,1,float16,fp8,0,0.5593066612879435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,64,64,128,1,fp8,fp8,0,0.7112053235371908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,64,64,0,1,float16,fp8,0,0.5480639934539795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,64,64,0,1,fp8,fp8,0,0.4854026635487874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,1,64,128,1,float16,float16,0,0.48959465821584064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,1,64,0,1,float16,float16,0,0.4782079855600993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,1,64,128,1,float16,fp8,0,0.48970667521158856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,1,64,128,1,fp8,fp8,0,0.6651573181152344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,1,64,0,1,float16,fp8,0,0.47656532128651935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,1,64,0,1,fp8,fp8,0,0.4383680025736491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,2,64,128,1,float16,float16,0,0.4898453156153361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,2,64,0,1,float16,float16,0,0.4798933267593384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,2,64,128,1,float16,fp8,0,0.49105600516001385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,2,64,128,1,fp8,fp8,0,0.6659466822942098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,2,64,0,1,float16,fp8,0,0.47812267144521076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,2,64,0,1,fp8,fp8,0,0.4412800073623657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,4,64,128,1,float16,float16,0,0.4914720058441162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,4,64,0,1,float16,float16,0,0.4803040027618408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,4,64,128,1,float16,fp8,0,0.49141331513722736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,4,64,128,1,fp8,fp8,0,0.6653386751810709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,4,64,0,1,float16,fp8,0,0.48074134190877277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,4,64,0,1,fp8,fp8,0,0.4431626796722412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,8,64,128,1,float16,float16,0,0.49508798122406006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,8,64,0,1,float16,float16,0,0.4830186764399211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,8,64,128,1,float16,fp8,0,0.4939253330230713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,8,64,128,1,fp8,fp8,0,0.6675199667612711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,8,64,0,1,float16,fp8,0,0.4821919997533162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,8,64,0,1,fp8,fp8,0,0.4432906707127889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,64,64,128,1,float16,float16,0,0.30114134152730304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,64,64,0,1,float16,float16,0,0.29764799276987713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,64,64,128,1,float16,fp8,0,0.2947840094566345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,64,64,128,1,fp8,fp8,0,0.36640000343322754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,64,64,0,1,float16,fp8,0,0.28988800446192425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,64,64,0,1,fp8,fp8,0,0.25494933128356934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,1,64,128,1,float16,float16,0,0.2545439998308818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,1,64,0,1,float16,float16,0,0.24963732560475668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,1,64,128,1,float16,fp8,0,0.25619733333587646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,1,64,128,1,fp8,fp8,0,0.34419198830922443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,1,64,0,1,float16,fp8,0,0.24890132745107016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,1,64,0,1,fp8,fp8,0,0.23130667209625244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,2,64,128,1,float16,float16,0,0.25487999121348065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,2,64,0,1,float16,float16,0,0.24968532721201578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,2,64,128,1,float16,fp8,0,0.25599465767542523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,2,64,128,1,fp8,fp8,0,0.3426560163497925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,2,64,0,1,float16,fp8,0,0.2508693337440491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,2,64,0,1,fp8,fp8,0,0.2320853273073832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,4,64,128,1,float16,float16,0,0.257146676381429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,4,64,0,1,float16,float16,0,0.25076266129811603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,4,64,128,1,float16,fp8,0,0.25642667214075726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,4,64,128,1,fp8,fp8,0,0.34301332632700604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,4,64,0,1,float16,fp8,0,0.2507733305295308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,4,64,0,1,fp8,fp8,0,0.2320906718571981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,8,64,128,1,float16,float16,0,0.2590720057487488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,8,64,0,1,float16,float16,0,0.2534826596577962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,8,64,128,1,float16,fp8,0,0.2579200069109599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,8,64,128,1,fp8,fp8,0,0.3463360071182251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,8,64,0,1,float16,fp8,0,0.2518186569213867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,8,64,0,1,fp8,fp8,0,0.23428799708684286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,64,64,128,1,float16,float16,0,0.16176533699035645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,64,64,0,1,float16,float16,0,0.1600213348865509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,64,64,128,1,float16,fp8,0,0.16035733620325723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,64,64,128,1,fp8,fp8,0,0.1951626737912496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,64,64,0,1,float16,fp8,0,0.15762133399645487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,64,64,0,1,fp8,fp8,0,0.14101333419481912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,1,64,128,1,float16,float16,0,0.13577600320180258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,1,64,0,1,float16,float16,0,0.13160000244776407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,1,64,128,1,float16,fp8,0,0.13571733236312866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,1,64,128,1,fp8,fp8,0,0.18140800793965658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,1,64,0,1,float16,fp8,0,0.13328533371289572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,1,64,0,1,fp8,fp8,0,0.12771733601888022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,2,64,128,1,float16,float16,0,0.13642666737238565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,4,64,0,1,float16,float16,0,0.1330880026022593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,2,64,0,1,float16,float16,0,0.13239999612172446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,2,64,128,1,float16,fp8,0,0.13637866576512656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,2,64,128,1,fp8,fp8,0,0.18125333388646445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,2,64,0,1,float16,fp8,0,0.13361600041389465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,2,64,0,1,fp8,fp8,0,0.12830400466918945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,4,64,128,1,float16,float16,0,0.1374666690826416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,4,64,128,1,float16,fp8,0,0.13709867000579834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,4,64,128,1,fp8,fp8,0,0.18171199162801108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,4,64,0,1,float16,fp8,0,0.13301866253217062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,4,64,0,1,fp8,fp8,0,0.12969066699345908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,8,64,128,1,float16,float16,0,0.13717866937319437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,8,64,0,1,float16,float16,0,0.1351573367913564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,8,64,128,1,float16,fp8,0,0.1376533309618632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,8,64,128,1,fp8,fp8,0,0.18452266852060953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,8,64,0,1,float16,fp8,0,0.13570666313171387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,8,64,0,1,fp8,fp8,0,0.12963733077049255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,64,64,128,1,float16,float16,0,0.08828266461690266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,64,64,0,1,float16,float16,0,0.08807999889055888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,64,64,128,1,float16,fp8,0,0.08745599786440532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,64,64,128,1,fp8,fp8,0,0.10782933235168457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,64,64,0,1,float16,fp8,0,0.08725333213806152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,64,64,0,1,fp8,fp8,0,0.08182933429876964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,1,64,128,1,float16,float16,0,0.07238399982452393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,1,64,0,1,float16,float16,0,0.07124799986680348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,1,64,128,1,float16,fp8,0,0.07252799967924754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,1,64,128,1,fp8,fp8,0,0.09809600313504536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,1,64,0,1,float16,fp8,0,0.07088000078996022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,1,64,0,1,fp8,fp8,0,0.07073066631952922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,2,64,128,1,float16,float16,0,0.07322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,2,64,0,1,float16,float16,0,0.07181333502133687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,2,64,128,1,float16,fp8,0,0.07342933118343353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,2,64,128,1,fp8,fp8,0,0.09771200021107991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,2,64,0,1,float16,fp8,0,0.0719413310289383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,2,64,0,1,fp8,fp8,0,0.07156800230344136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,4,64,128,1,float16,float16,0,0.07323733468850453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,4,64,0,1,float16,float16,0,0.07202133536338806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,4,64,128,1,float16,fp8,0,0.07412800192832947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,4,64,128,1,fp8,fp8,0,0.0983786682287852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,4,64,0,1,float16,fp8,0,0.07173333565394084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,4,64,0,1,fp8,fp8,0,0.07144533097743988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,8,64,128,1,float16,float16,0,0.07377066711584727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,8,64,0,1,float16,float16,0,0.07259200016657512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,8,64,128,1,float16,fp8,0,0.07398933172225952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,8,64,128,1,fp8,fp8,0,0.09947733084360759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,8,64,0,1,float16,fp8,0,0.0734986662864685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,8,64,0,1,fp8,fp8,0,0.07411733269691467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,64,64,128,1,float16,float16,0,0.04566933214664459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,64,64,0,1,float16,float16,0,0.045509333411852516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,64,64,128,1,float16,fp8,0,0.04538666705290476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,64,64,128,1,fp8,fp8,0,0.062090665102005005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,64,64,0,1,float16,fp8,0,0.044682666659355164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,64,64,0,1,fp8,fp8,0,0.0473280002673467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,1,64,128,1,float16,float16,0,0.04127999891837438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,1,64,0,1,float16,float16,0,0.04099733382463455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,1,64,128,1,float16,fp8,0,0.04135466615358988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,1,64,128,1,fp8,fp8,0,0.055386667450269066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,1,64,0,1,float16,fp8,0,0.040463998913764954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,1,64,0,1,fp8,fp8,0,0.04265599946180979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,2,64,128,1,float16,float16,0,0.04098666707674662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,2,64,0,1,float16,float16,0,0.04068800061941147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,2,64,128,1,float16,fp8,0,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,2,64,128,1,fp8,fp8,0,0.055760001142819725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,2,64,0,1,float16,fp8,0,0.04057066639264425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,2,64,0,1,fp8,fp8,0,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,4,64,128,1,float16,float16,0,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,4,64,0,1,float16,float16,0,0.04158399999141693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,4,64,128,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,4,64,128,1,fp8,fp8,0,0.05602133274078369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,4,64,0,1,float16,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,8,64,0,1,fp8,fp8,0,0.04309333364168803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,4,64,0,1,fp8,fp8,0,0.043237333496411644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,8,64,128,1,float16,float16,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,8,64,0,1,float16,float16,0,0.04070399949947993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,8,64,128,1,float16,fp8,0,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,8,64,128,1,fp8,fp8,0,0.05685866872469584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,8,64,0,1,float16,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,64,64,128,1,float16,float16,0,0.03307733436425527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,64,64,0,1,float16,float16,0,0.03186133255561193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,64,64,128,1,float16,fp8,0,0.03289599965016047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,64,64,128,1,fp8,fp8,0,0.03614933292071024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,64,64,0,1,float16,fp8,0,0.0328053335348765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,64,64,0,1,fp8,fp8,0,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,1,64,128,1,float16,float16,0,0.029978667696317036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,1,64,0,1,float16,float16,0,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,1,64,128,1,float16,fp8,0,0.03030933439731598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,1,64,128,1,fp8,fp8,0,0.03449599941571554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,1,64,0,1,float16,fp8,0,0.030095999439557392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,1,64,0,1,fp8,fp8,0,0.02790933350721995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,2,64,128,1,float16,float16,0,0.03071466585000356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,2,64,0,1,float16,float16,0,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,2,64,128,1,float16,fp8,0,0.030085332691669464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,2,64,128,1,fp8,fp8,0,0.03510933369398117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,2,64,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,2,64,0,1,fp8,fp8,0,0.028501334289709728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,4,64,128,1,float16,float16,0,0.03014400104681651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,4,64,0,1,float16,float16,0,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,4,64,128,1,float16,fp8,0,0.030799999833106995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,4,64,128,1,fp8,fp8,0,0.03522133330504099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,4,64,0,1,float16,fp8,0,0.03071466585000356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,4,64,0,1,fp8,fp8,0,0.02794133375088374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,8,64,128,1,float16,float16,0,0.03065066784620285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,8,64,0,1,float16,float16,0,0.02998399982849757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,8,64,128,1,float16,fp8,0,0.03089066594839096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,8,64,128,1,fp8,fp8,0,0.03480000048875809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,8,64,0,1,float16,fp8,0,0.030245333909988403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,8,64,0,1,fp8,fp8,0,0.028543998797734577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,64,64,128,1,float16,float16,0,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,64,64,0,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,64,64,128,1,float16,fp8,0,0.02195200075705846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,64,64,128,1,fp8,fp8,0,0.02554133286078771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,64,64,0,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,64,64,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,1,64,128,1,float16,float16,0,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,1,64,0,1,float16,float16,0,0.019823999454577763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,1,64,128,1,float16,fp8,0,0.0206986665725708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,2,64,128,1,fp8,fp8,0,0.024357333779335022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,1,64,128,1,fp8,fp8,0,0.02404800057411194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,1,64,0,1,float16,fp8,0,0.020506666352351505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,1,64,0,1,fp8,fp8,0,0.02053333322207133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,2,64,128,1,float16,float16,0,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,2,64,0,1,float16,float16,0,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,2,64,128,1,float16,fp8,0,0.020245333512624104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,2,64,0,1,float16,fp8,0,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,2,64,0,1,fp8,fp8,0,0.020469332734743755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,4,64,128,1,float16,float16,0,0.020432000358899433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,4,64,0,1,float16,float16,0,0.0206133338312308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,4,64,128,1,float16,fp8,0,0.020165332903464634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,4,64,128,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,4,64,0,1,float16,fp8,0,0.01995733380317688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,4,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,8,64,128,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,8,64,0,1,float16,float16,0,0.019973333925008774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,8,64,128,1,float16,fp8,0,0.020421333611011505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,8,64,128,1,fp8,fp8,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,8,64,0,1,float16,fp8,0,0.02013333390156428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,8,64,0,1,fp8,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,1,64,128,1,float16,float16,0,0.6907520294189453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,1,64,0,1,float16,float16,0,0.6890239715576172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,1,64,128,1,float16,fp8,0,0.689903974533081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,1,64,128,1,fp8,fp8,0,0.8485493659973145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,1,64,0,1,float16,fp8,0,0.6902506351470947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,1,64,0,1,fp8,fp8,0,0.8506293296813965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,2,64,128,1,float16,float16,0,0.6932213306427002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,2,64,0,1,float16,float16,0,0.6941706339518229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,2,64,128,1,float16,fp8,0,0.6925919850667318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,2,64,128,1,fp8,fp8,0,0.8536427021026611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,2,64,0,1,float16,fp8,0,0.6929972966512045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,2,64,0,1,fp8,fp8,0,0.8544267018636068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,4,64,128,1,float16,float16,0,0.698911984761556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,4,64,0,1,float16,float16,0,0.7020746866861979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,4,64,128,1,float16,fp8,0,0.6997280120849609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,4,64,128,1,fp8,fp8,0,0.8548959891001383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,4,64,0,1,float16,fp8,0,0.6991466681162516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,4,64,0,1,fp8,fp8,0,0.8545013268788656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,8,64,128,1,float16,float16,0,0.7005333105723063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,8,64,0,1,float16,float16,0,0.7033066749572754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,8,64,128,1,float16,fp8,0,0.7021439870198568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,8,64,128,1,fp8,fp8,0,0.8639466762542725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,8,64,0,1,float16,fp8,0,0.701744000116984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,64,64,128,1,float16,float16,0,0.4373279809951782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,8,64,0,1,fp8,fp8,0,0.8634080092112223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,64,64,0,1,float16,float16,0,0.4368266661961873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,64,64,128,1,float16,fp8,0,0.4241226514180501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,64,64,128,1,fp8,fp8,0,0.4829813241958618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,64,64,0,1,float16,fp8,0,0.4247359832127889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,64,64,0,1,fp8,fp8,0,0.485482652982076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,1,64,128,1,float16,float16,0,0.3539359966913859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,1,64,0,1,float16,float16,0,0.3521440029144287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,1,64,128,1,float16,fp8,0,0.35790932178497314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,1,64,128,1,fp8,fp8,0,0.43480531374613446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,1,64,0,1,float16,fp8,0,0.3536800146102905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,1,64,0,1,fp8,fp8,0,0.4361120065053304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,2,64,128,1,float16,float16,0,0.3550560077031453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,2,64,0,1,float16,float16,0,0.3557120164235433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,2,64,128,1,float16,fp8,0,0.3545066515604655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,2,64,128,1,fp8,fp8,0,0.43564267953236896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,2,64,0,1,float16,fp8,0,0.3547786474227905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,2,64,0,1,fp8,fp8,0,0.4350399971008301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,4,64,128,1,float16,float16,0,0.3589973449707031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,4,64,0,1,float16,float16,0,0.3566720088322957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,4,64,128,1,float16,fp8,0,0.3565280040105184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,4,64,128,1,fp8,fp8,0,0.43828801314036053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,4,64,0,1,float16,fp8,0,0.35815465450286865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,4,64,0,1,fp8,fp8,0,0.4360373417536418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,8,64,128,1,float16,float16,0,0.360096017519633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,8,64,0,1,float16,float16,0,0.3579253355662028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,8,64,128,1,float16,fp8,0,0.35917333761850995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,8,64,128,1,fp8,fp8,0,0.43808531761169434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,8,64,0,1,float16,fp8,0,0.3589013417561849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,64,64,128,1,float16,float16,0,0.22904000679651895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,8,64,0,1,fp8,fp8,0,0.4395466645558675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,64,64,0,1,float16,float16,0,0.2308853268623352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,64,64,128,1,float16,fp8,0,0.2232960065205892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,64,64,128,1,fp8,fp8,0,0.25146132707595825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,64,64,0,1,float16,fp8,0,0.22292800744374594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,64,64,0,1,fp8,fp8,0,0.25217066208521527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,1,64,128,1,float16,float16,0,0.1845866640408834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,1,64,0,1,float16,float16,0,0.18381865819295248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,1,64,128,1,float16,fp8,0,0.185754656791687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,1,64,128,1,fp8,fp8,0,0.23029333353042603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,1,64,0,1,float16,fp8,0,0.1839146614074707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,1,64,0,1,fp8,fp8,0,0.2295466661453247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,2,64,128,1,float16,float16,0,0.18437866369883218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,2,64,0,1,float16,float16,0,0.18573866287867227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,2,64,128,1,float16,fp8,0,0.1848533352216085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,2,64,128,1,fp8,fp8,0,0.23058666785558066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,2,64,0,1,float16,fp8,0,0.18639467159907022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,2,64,0,1,fp8,fp8,0,0.23176000515619913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,4,64,128,1,float16,float16,0,0.18636266390482584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,4,64,0,1,float16,float16,0,0.1869759956995646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,4,64,128,1,float16,fp8,0,0.18684266010920206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,4,64,128,1,fp8,fp8,0,0.2304640014966329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,4,64,0,1,float16,fp8,0,0.18678933382034302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,4,64,0,1,fp8,fp8,0,0.23042666912078857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,8,64,128,1,float16,float16,0,0.18853867053985596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,8,64,0,1,float16,float16,0,0.18837867180506387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,8,64,128,1,float16,fp8,0,0.1882773240407308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,8,64,128,1,fp8,fp8,0,0.23190933465957642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,8,64,0,1,float16,fp8,0,0.18766399224599203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,8,64,0,1,fp8,fp8,0,0.23177599906921387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,64,64,128,1,float16,float16,0,0.12371200323104858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,64,64,0,1,float16,float16,0,0.1244053343931834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,64,64,128,1,float16,fp8,0,0.12152533729871114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,64,64,128,1,fp8,fp8,0,0.13949867089589438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,64,64,0,1,float16,fp8,0,0.12040000160535176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,64,64,0,1,fp8,fp8,0,0.13937066992123923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,1,64,0,1,fp8,fp8,0,0.127210666735967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,1,64,128,1,float16,float16,0,0.09854933619499207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,1,64,0,1,float16,float16,0,0.09890133142471313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,1,64,128,1,float16,fp8,0,0.09865599870681763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,1,64,128,1,fp8,fp8,0,0.12711999813715616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,1,64,0,1,float16,fp8,0,0.0999840001265208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,2,64,128,1,float16,float16,0,0.09896533687909444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,2,64,0,1,float16,float16,0,0.10016533732414246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,2,64,128,1,float16,fp8,0,0.09963732957839966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,2,64,128,1,fp8,fp8,0,0.1267573336760203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,2,64,0,1,float16,fp8,0,0.10001066327095032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,2,64,0,1,fp8,fp8,0,0.1253706713517507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,4,64,128,1,float16,float16,0,0.09950400392214458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,4,64,0,1,float16,float16,0,0.10055999954541524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,4,64,128,1,float16,fp8,0,0.09961600104967754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,4,64,128,1,fp8,fp8,0,0.12771733601888022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,4,64,0,1,float16,fp8,0,0.10168533523877461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,4,64,0,1,fp8,fp8,0,0.12795733412106833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,8,64,128,1,float16,float16,0,0.10079999764760335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,8,64,0,1,float16,float16,0,0.10099200407663982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,8,64,128,1,float16,fp8,0,0.10134399930636089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,8,64,128,1,fp8,fp8,0,0.12838932871818542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,8,64,0,1,float16,fp8,0,0.10037333766619365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,8,64,0,1,fp8,fp8,0,0.12802132964134216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,64,64,128,1,float16,float16,0,0.06969599922498067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,64,64,0,1,float16,float16,0,0.06923733154932658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,64,64,128,1,float16,fp8,0,0.06794666747252147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,64,64,128,1,fp8,fp8,0,0.08052266637484233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,64,64,0,1,float16,fp8,0,0.06845866640408833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,64,64,0,1,fp8,fp8,0,0.08088533580303192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,1,64,128,1,float16,float16,0,0.05433600147565206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,1,64,0,1,float16,float16,0,0.055120001236597695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,1,64,128,1,float16,fp8,0,0.055311997731526695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,1,64,128,1,fp8,fp8,0,0.07158933579921722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,1,64,0,1,float16,fp8,0,0.05451733370621999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,1,64,0,1,fp8,fp8,0,0.07136000196139018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,2,64,128,1,float16,float16,0,0.05505600074927012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,2,64,0,1,float16,float16,0,0.054986665646235146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,2,64,128,1,float16,fp8,0,0.054383998115857445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,2,64,128,1,fp8,fp8,0,0.07155733307202657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,2,64,0,1,float16,fp8,0,0.0553653339544932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,2,64,0,1,fp8,fp8,0,0.07217066486676534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,4,64,128,1,float16,float16,0,0.054986665646235146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,4,64,0,1,float16,float16,0,0.054917335510253906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,4,64,128,1,float16,fp8,0,0.05542399982611338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,4,64,128,1,fp8,fp8,0,0.07251200079917908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,4,64,0,1,float16,fp8,0,0.05514666438102722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,4,64,0,1,fp8,fp8,0,0.07275199890136719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,8,64,128,1,float16,float16,0,0.05645333230495453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,8,64,0,1,float16,float16,0,0.05648533503214518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,8,64,128,1,float16,fp8,0,0.056101332108179726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,8,64,128,1,fp8,fp8,0,0.07243200143178304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,8,64,0,1,float16,fp8,0,0.05598933498064677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,8,64,0,1,fp8,fp8,0,0.07327466706434886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,64,64,128,1,float16,float16,0,0.03658666710058848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,64,64,0,1,float16,float16,0,0.036357333262761436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,64,64,128,1,float16,fp8,0,0.036277333895365395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,64,64,128,1,fp8,fp8,0,0.047338664531707764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,64,64,0,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,64,64,0,1,fp8,fp8,0,0.046069333950678505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,1,64,128,1,float16,float16,0,0.03189333279927572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,1,64,0,1,float16,float16,0,0.03218133250872294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,1,64,128,1,float16,fp8,0,0.031856000423431396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,1,64,128,1,fp8,fp8,0,0.04308266441027323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,1,64,0,1,float16,fp8,0,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,1,64,0,1,fp8,fp8,0,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,2,64,128,1,float16,float16,0,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,2,64,0,1,float16,float16,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,2,64,128,1,float16,fp8,0,0.032272001107533775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,2,64,128,1,fp8,fp8,0,0.0422026664018631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,2,64,0,1,float16,fp8,0,0.031856000423431396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,2,64,0,1,fp8,fp8,0,0.04260266820589701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,4,64,128,1,float16,float16,0,0.032816000282764435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,4,64,0,1,float16,float16,0,0.03199466566244761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,4,64,128,1,float16,fp8,0,0.031930667658646904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,4,64,128,1,fp8,fp8,0,0.0420959989229838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,4,64,0,1,float16,fp8,0,0.032842665910720825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,4,64,0,1,fp8,fp8,0,0.042949333786964417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,8,64,128,1,float16,float16,0,0.03258133431275686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,8,64,0,1,float16,float16,0,0.03311466674009959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,8,64,128,1,float16,fp8,0,0.03316266586383184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,8,64,128,1,fp8,fp8,0,0.04329599936803182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,8,64,0,1,float16,fp8,0,0.032144000132878624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,8,64,0,1,fp8,fp8,0,0.043391997615496315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,64,64,128,1,float16,float16,0,0.02442666639884313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,64,64,0,1,float16,float16,0,0.024298667907714844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,64,64,128,1,float16,fp8,0,0.023904000719388325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,64,64,128,1,fp8,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,64,64,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,64,64,0,1,fp8,fp8,0,0.0288426677385966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,1,64,128,1,float16,float16,0,0.02221333235502243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,1,64,0,1,float16,float16,0,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,1,64,128,1,float16,fp8,0,0.02288000037272771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,1,64,128,1,fp8,fp8,0,0.028005334238211315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,1,64,0,1,float16,fp8,0,0.022437334060668945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,1,64,0,1,fp8,fp8,0,0.028688001135985058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,2,64,128,1,float16,float16,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,2,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,2,64,128,1,float16,fp8,0,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,2,64,128,1,fp8,fp8,0,0.027957332630952198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,2,64,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,2,64,0,1,fp8,fp8,0,0.028058665494124096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,4,64,128,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,4,64,0,1,float16,float16,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,4,64,128,1,float16,fp8,0,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,4,64,128,1,fp8,fp8,0,0.02826666583617528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,4,64,0,1,float16,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,4,64,0,1,fp8,fp8,0,0.028463999430338543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,8,64,128,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,8,64,0,1,float16,float16,0,0.022805333137512207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,8,64,128,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,8,64,128,1,fp8,fp8,0,0.028650666276613872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,8,64,0,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,8,64,0,1,fp8,fp8,0,0.02861333390076955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,64,64,128,1,float16,float16,0,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,64,64,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,64,64,128,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,64,64,128,1,fp8,fp8,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,64,64,0,1,float16,fp8,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,64,64,0,1,fp8,fp8,0,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,1,64,128,1,float16,float16,0,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,1,64,0,1,float16,float16,0,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,1,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,1,64,128,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,1,64,0,1,float16,fp8,0,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,1,64,0,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,2,64,128,1,float16,float16,0,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,2,64,0,1,float16,float16,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,2,64,128,1,float16,fp8,0,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,2,64,128,1,fp8,fp8,0,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,2,64,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,2,64,0,1,fp8,fp8,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,4,64,128,1,float16,float16,0,0.018138666947682697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,4,64,0,1,float16,float16,0,0.01812800019979477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,4,64,128,1,float16,fp8,0,0.018378666291634243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,4,64,128,1,fp8,fp8,0,0.021776000658671062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,4,64,0,1,float16,fp8,0,0.018453333526849747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,4,64,0,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,8,64,128,1,float16,float16,0,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,8,64,0,1,float16,float16,0,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,8,64,128,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,8,64,128,1,fp8,fp8,0,0.021840001145998638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,8,64,0,1,float16,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,8,64,0,1,fp8,fp8,0,0.021984001000722248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,64,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,64,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,64,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,64,64,128,1,fp8,fp8,0,0.020560000091791153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,64,64,0,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,64,64,0,1,fp8,fp8,0,0.020106667031844456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,1,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,1,64,0,1,float16,float16,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,1,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,1,64,128,1,fp8,fp8,0,0.019589333484570186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,1,64,0,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,1,64,0,1,fp8,fp8,0,0.020101333657900494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,2,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,2,64,0,1,float16,float16,0,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,2,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,2,64,128,1,fp8,fp8,0,0.019786667078733444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,2,64,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,2,64,0,1,fp8,fp8,0,0.02004266654451688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,4,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,4,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,4,64,128,1,float16,fp8,0,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,4,64,128,1,fp8,fp8,0,0.020346666375796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,4,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,4,64,0,1,fp8,fp8,0,0.01995733380317688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,8,64,128,1,float16,float16,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,8,64,0,1,float16,float16,0,0.014682666709025701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,8,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,8,64,128,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,8,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,8,64,0,1,fp8,fp8,0,0.01987733319401741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,1,64,128,1,float16,float16,0,0.3256426652272542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,1,64,0,1,float16,float16,0,0.3264373342196147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,1,64,128,1,float16,fp8,0,0.32523733377456665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,1,64,128,1,fp8,fp8,0,0.59443199634552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,1,64,0,1,float16,fp8,0,0.32597867647806805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,1,64,0,1,fp8,fp8,0,0.5955040057500204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,2,64,128,1,float16,float16,0,0.3259146610895793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,2,64,0,1,float16,float16,0,0.3269173304239909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,2,64,128,1,float16,fp8,0,0.32664533456166583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,2,64,128,1,fp8,fp8,0,0.5927626689275106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,2,64,0,1,float16,fp8,0,0.3256373405456543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,2,64,0,1,fp8,fp8,0,0.5947626829147339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,4,64,128,1,float16,float16,0,0.331770658493042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,4,64,0,1,float16,float16,0,0.3316426674524943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,4,64,128,1,float16,fp8,0,0.3300586740175883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,4,64,128,1,fp8,fp8,0,0.591973344484965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,4,64,0,1,float16,fp8,0,0.33000532786051434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,4,64,0,1,fp8,fp8,0,0.5946079889933268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,64,64,128,1,float16,float16,0,0.21854400634765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,8,64,128,1,float16,float16,0,0.331498662630717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,8,64,0,1,float16,float16,0,0.33243733644485474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,8,64,128,1,float16,fp8,0,0.33000532786051434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,8,64,128,1,fp8,fp8,0,0.6014933188756307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,8,64,0,1,float16,fp8,0,0.3306879997253418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,8,64,0,1,fp8,fp8,0,0.5948479970296224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,64,64,0,1,float16,float16,0,0.21896533171335855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,64,64,128,1,float16,fp8,0,0.21207465728123984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,64,64,128,1,fp8,fp8,0,0.3328533371289571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,64,64,0,1,float16,fp8,0,0.21136534214019775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,64,64,0,1,fp8,fp8,0,0.3338666756947835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,1,64,128,1,float16,float16,0,0.17144532998402914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,1,64,0,1,float16,float16,0,0.17061867316563925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,1,64,128,1,float16,fp8,0,0.17098132769266763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,1,64,128,1,fp8,fp8,0,0.3076053261756897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,1,64,0,1,float16,fp8,0,0.17043199141820273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,1,64,0,1,fp8,fp8,0,0.3088853359222412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,2,64,128,1,float16,float16,0,0.1707306702931722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,2,64,0,1,float16,float16,0,0.1709866722424825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,2,64,128,1,float16,fp8,0,0.1702400048573812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,2,64,128,1,fp8,fp8,0,0.3099946578343709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,2,64,0,1,float16,fp8,0,0.17164800564448038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,2,64,0,1,fp8,fp8,0,0.3083840012550354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,4,64,128,1,float16,float16,0,0.1725119948387146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,4,64,0,1,float16,float16,0,0.17249600092569986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,4,64,128,1,float16,fp8,0,0.17254932721455893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,4,64,128,1,fp8,fp8,0,0.3105813264846802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,4,64,0,1,float16,fp8,0,0.17220266660054526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,4,64,0,1,fp8,fp8,0,0.30962133407592773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,8,64,128,1,float16,float16,0,0.17421332995096842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,8,64,0,1,float16,float16,0,0.17443732420603433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,8,64,128,1,float16,fp8,0,0.17392534017562866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,8,64,128,1,fp8,fp8,0,0.31203200419743854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,8,64,0,1,float16,fp8,0,0.17430933316548666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,64,64,128,1,float16,float16,0,0.1176533301671346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,8,64,0,1,fp8,fp8,0,0.3120800058046977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,64,64,0,1,float16,float16,0,0.11741866668065389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,64,64,128,1,float16,fp8,0,0.11530133088429768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,64,64,128,1,fp8,fp8,0,0.17920533816019693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,64,64,0,1,float16,fp8,0,0.11411199967066447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,64,64,0,1,fp8,fp8,0,0.17948265870412192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,1,64,128,1,float16,float16,0,0.09213866790135701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,1,64,0,1,float16,float16,0,0.0913813312848409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,1,64,128,1,float16,fp8,0,0.0925600032011668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,1,64,128,1,fp8,fp8,0,0.1670666734377543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,1,64,0,1,float16,fp8,0,0.09269332885742188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,1,64,0,1,fp8,fp8,0,0.16662399967511496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,2,64,128,1,float16,float16,0,0.09240532914797465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,2,64,0,1,float16,float16,0,0.09202133615811665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,2,64,128,1,float16,fp8,0,0.09245333075523376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,2,64,128,1,fp8,fp8,0,0.16754666964213052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,2,64,0,1,float16,fp8,0,0.09215466181437175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,2,64,0,1,fp8,fp8,0,0.16660267114639282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,4,64,128,1,float16,float16,0,0.09378666679064433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,4,64,0,1,float16,float16,0,0.0935093363126119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,4,64,128,1,float16,fp8,0,0.09338666995366414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,4,64,128,1,fp8,fp8,0,0.16715200742085776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,4,64,0,1,float16,fp8,0,0.09293867150942485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,4,64,0,1,fp8,fp8,0,0.16775999466578165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,8,64,128,1,float16,float16,0,0.09444266557693481
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,8,64,0,1,float16,float16,0,0.0942080020904541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,8,64,128,1,float16,fp8,0,0.0941493312517802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,8,64,128,1,fp8,fp8,0,0.16795732577641806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,8,64,0,1,float16,fp8,0,0.09492266178131104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,8,64,0,1,fp8,fp8,0,0.16736533244450888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,64,64,128,1,float16,float16,0,0.0647573322057724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,64,64,0,1,float16,float16,0,0.06528000036875407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,64,64,128,1,float16,fp8,0,0.06333333253860474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,64,64,128,1,fp8,fp8,0,0.10173333684603374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,64,64,0,1,float16,fp8,0,0.06366933385531108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,64,64,0,1,fp8,fp8,0,0.10068266590436299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,1,64,0,1,float16,fp8,0,0.05184000233809153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,1,64,128,1,float16,float16,0,0.051632001996040344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,1,64,0,1,float16,float16,0,0.05124799907207489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,1,64,128,1,float16,fp8,0,0.05106133222579956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,1,64,128,1,fp8,fp8,0,0.0922933320204417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,1,64,0,1,fp8,fp8,0,0.09105066458384196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,2,64,128,1,float16,float16,0,0.05241066714127859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,2,64,0,1,float16,float16,0,0.05179733534653982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,2,64,128,1,float16,fp8,0,0.051455999414126076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,2,64,128,1,fp8,fp8,0,0.09486400087674458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,2,64,0,1,float16,fp8,0,0.0510506679614385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,2,64,0,1,fp8,fp8,0,0.09153599540392558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,4,64,128,1,float16,float16,0,0.05216533442338308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,4,64,0,1,float16,float16,0,0.05186133086681366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,4,64,128,1,float16,fp8,0,0.05220800141493479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,4,64,128,1,fp8,fp8,0,0.09331732988357544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,4,64,0,1,float16,fp8,0,0.051967998345692955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,4,64,0,1,fp8,fp8,0,0.0925439993540446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,8,64,128,1,float16,float16,0,0.052426666021347046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,8,64,0,1,float16,float16,0,0.052933335304260254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,8,64,128,1,float16,fp8,0,0.053488001227378845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,8,64,128,1,fp8,fp8,0,0.09338133533795674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,8,64,0,1,float16,fp8,0,0.05328533550103506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,8,64,0,1,fp8,fp8,0,0.0937653382619222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,64,64,128,1,float16,float16,0,0.035530666510264076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,64,64,0,1,float16,float16,0,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,64,64,128,1,float16,fp8,0,0.03432533393303553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,64,64,128,1,fp8,fp8,0,0.05898133416970571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,64,64,0,1,float16,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,64,64,0,1,fp8,fp8,0,0.058330665032068886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,1,64,128,1,float16,float16,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,1,64,0,1,float16,float16,0,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,1,64,128,1,float16,fp8,0,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,1,64,128,1,fp8,fp8,0,0.05362666646639506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,1,64,0,1,float16,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,1,64,0,1,fp8,fp8,0,0.054773335655530296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,2,64,128,1,float16,float16,0,0.031871999303499855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,2,64,0,1,float16,float16,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,2,64,128,1,float16,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,2,64,128,1,fp8,fp8,0,0.05409066875775655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,2,64,0,1,float16,fp8,0,0.031685332457224526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,2,64,0,1,fp8,fp8,0,0.05342400074005127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,4,64,128,1,float16,float16,0,0.032218667368094124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,4,64,0,1,float16,float16,0,0.032655999064445496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,4,64,128,1,float16,fp8,0,0.032842665910720825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,4,64,128,1,fp8,fp8,0,0.05425600210825602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,4,64,0,1,float16,fp8,0,0.03265066693226496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,4,64,0,1,fp8,fp8,0,0.0544106662273407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,8,64,128,1,float16,float16,0,0.03259200106064478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,8,64,0,1,float16,float16,0,0.03218133250872294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,8,64,128,1,float16,fp8,0,0.032831999162832894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,8,64,128,1,fp8,fp8,0,0.05482666691144308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,8,64,0,1,float16,fp8,0,0.032399999598662056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,8,64,0,1,fp8,fp8,0,0.055104002356529236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,64,64,128,1,float16,float16,0,0.022778667509555817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,64,64,0,1,float16,float16,0,0.022437334060668945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,64,64,128,1,float16,fp8,0,0.022848000129063923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,64,64,128,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,64,64,0,1,float16,fp8,0,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,64,64,0,1,fp8,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,1,64,128,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,1,64,0,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,1,64,128,1,float16,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,1,64,128,1,fp8,fp8,0,0.03458133339881897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,1,64,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,1,64,0,1,fp8,fp8,0,0.0344106654326121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,2,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,2,64,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,2,64,128,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,2,64,128,1,fp8,fp8,0,0.03454400102297465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,2,64,0,1,float16,fp8,0,0.021882665654023487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,2,64,0,1,fp8,fp8,0,0.03468266626199087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,4,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,4,64,0,1,float16,float16,0,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,4,64,128,1,float16,fp8,0,0.021802666286627453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,4,64,128,1,fp8,fp8,0,0.03487999985615412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,4,64,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,4,64,0,1,fp8,fp8,0,0.03482133398453394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,8,64,128,1,float16,float16,0,0.021541332205136616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,8,64,0,1,float16,float16,0,0.021744000415007275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,8,64,128,1,float16,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,8,64,128,1,fp8,fp8,0,0.034976000587145485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,8,64,0,1,float16,fp8,0,0.021935999393463135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,8,64,0,1,fp8,fp8,0,0.03473600000143051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,64,64,128,1,float16,float16,0,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,64,64,0,1,float16,float16,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,64,64,128,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,64,64,128,1,fp8,fp8,0,0.02626666675011317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,64,64,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,64,64,0,1,fp8,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,1,64,128,1,float16,float16,0,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,1,64,0,1,float16,float16,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,1,64,128,1,float16,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,1,64,128,1,fp8,fp8,0,0.02462933212518692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,1,64,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,1,64,0,1,fp8,fp8,0,0.024735999604066212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,2,64,128,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,2,64,0,1,float16,float16,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,2,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,2,64,128,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,2,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,2,64,0,1,fp8,fp8,0,0.024714666108290356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,4,64,128,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,4,64,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,4,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,4,64,128,1,fp8,fp8,0,0.02604266752799352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,4,64,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,8,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,4,64,0,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,8,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,8,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,8,64,128,1,fp8,fp8,0,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,8,64,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,8,64,0,1,fp8,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,64,64,128,1,float16,float16,0,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,64,64,0,1,float16,float16,0,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,64,64,128,1,float16,fp8,0,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,64,64,128,1,fp8,fp8,0,0.0205226664741834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,64,64,0,1,float16,fp8,0,0.014202666779359182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,64,64,0,1,fp8,fp8,0,0.020421333611011505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,1,64,128,1,float16,float16,0,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,1,64,0,1,float16,float16,0,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,1,64,128,1,float16,fp8,0,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,1,64,128,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,1,64,0,1,float16,fp8,0,0.01423466702302297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,1,64,0,1,fp8,fp8,0,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,2,64,128,1,float16,float16,0,0.013909333695967993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,4,64,0,1,float16,float16,0,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,2,64,0,1,float16,float16,0,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,2,64,128,1,float16,fp8,0,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,2,64,128,1,fp8,fp8,0,0.02038399999340375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,2,64,0,1,float16,fp8,0,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,2,64,0,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,4,64,128,1,float16,float16,0,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,4,64,128,1,float16,fp8,0,0.014202666779359182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,4,64,128,1,fp8,fp8,0,0.020245333512624104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,4,64,0,1,float16,fp8,0,0.014682666709025701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,4,64,0,1,fp8,fp8,0,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,8,64,128,1,float16,float16,0,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,8,64,0,1,float16,float16,0,0.013994666437307993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,8,64,128,1,float16,fp8,0,0.014357333381970724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,8,64,128,1,fp8,fp8,0,0.020538666596015293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,8,64,0,1,float16,fp8,0,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,8,64,0,1,fp8,fp8,0,0.020330666253964107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,64,64,128,1,float16,float16,0,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,64,64,0,1,float16,float16,0,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,64,64,128,1,float16,fp8,0,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,64,64,128,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,64,64,0,1,float16,fp8,0,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,64,64,0,1,fp8,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,1,64,128,1,float16,float16,0,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,1,64,0,1,float16,float16,0,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,1,64,128,1,float16,fp8,0,0.013280000537633896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,1,64,128,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,1,64,0,1,float16,fp8,0,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,1,64,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,2,64,128,1,float16,float16,0,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,2,64,0,1,float16,float16,0,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,2,64,128,1,float16,fp8,0,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,2,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,2,64,0,1,float16,fp8,0,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,2,64,0,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,4,64,128,1,float16,float16,0,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,4,64,0,1,float16,float16,0,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,4,64,128,1,float16,fp8,0,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,4,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,4,64,0,1,float16,fp8,0,0.013525333255529404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,4,64,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,8,64,128,1,float16,float16,0,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,8,64,0,1,float16,float16,0,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,8,64,128,1,float16,fp8,0,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,8,64,128,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,8,64,0,1,float16,fp8,0,0.013418667018413544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,8,64,0,1,fp8,fp8,0,0.019941333681344986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,1,64,128,1,float16,float16,0,0.1971199909845988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,1,64,0,1,float16,float16,0,0.19685333967208862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,1,64,128,1,float16,fp8,0,0.19618133703867593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,1,64,128,1,fp8,fp8,0,0.4957706530888875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,1,64,0,1,float16,fp8,0,0.19628800948460898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,1,64,0,1,fp8,fp8,0,0.4951253334681193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,2,64,128,1,float16,float16,0,0.1962613264719645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,2,64,0,1,float16,float16,0,0.19613866011301676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,2,64,128,1,float16,fp8,0,0.19589332739512125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,2,64,128,1,fp8,fp8,0,0.4939200083414714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,2,64,0,1,float16,fp8,0,0.1967839996019999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,4,64,128,1,float16,float16,0,0.1982133388519287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,2,64,0,1,fp8,fp8,0,0.49643198649088544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,4,64,0,1,float16,float16,0,0.19777067502339682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,4,64,128,1,float16,fp8,0,0.19709332784016928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,4,64,128,1,fp8,fp8,0,0.4925706783930461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,4,64,0,1,float16,fp8,0,0.19808000326156616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,4,64,0,1,fp8,fp8,0,0.493285338083903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,8,64,128,1,float16,float16,0,0.19909334182739258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,8,64,0,1,float16,float16,0,0.2005066672960917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,8,64,128,1,float16,fp8,0,0.19900266329447427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,8,64,0,1,float16,fp8,0,0.19776000579198202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,8,64,128,1,fp8,fp8,0,0.49741331736246747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,8,64,0,1,fp8,fp8,0,0.4936853249867757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,64,64,128,1,float16,float16,0,0.1239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,64,64,0,1,float16,float16,0,0.1243893305460612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,64,64,128,1,float16,fp8,0,0.12106133500734965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,64,64,128,1,fp8,fp8,0,0.2748960057894389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,64,64,0,1,float16,fp8,0,0.12059199810028076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,64,64,0,1,fp8,fp8,0,0.27565866708755493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,1,64,128,1,float16,float16,0,0.10425600409507751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,1,64,0,1,float16,float16,0,0.10496532917022705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,1,64,128,1,float16,fp8,0,0.1049173374970754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,1,64,128,1,fp8,fp8,0,0.26242132981618244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,1,64,0,1,float16,fp8,0,0.10548800230026245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,1,64,0,1,fp8,fp8,0,0.2605813344319661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,2,64,128,1,float16,float16,0,0.1048479974269867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,2,64,0,1,float16,float16,0,0.10824533303578694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,2,64,128,1,float16,fp8,0,0.10504532853762309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,2,64,128,1,fp8,fp8,0,0.2606079975763957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,2,64,0,1,float16,fp8,0,0.10519466797510783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,2,64,0,1,fp8,fp8,0,0.26152000824610394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,4,64,128,1,float16,float16,0,0.10641599694887798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,4,64,0,1,float16,float16,0,0.10570133725802104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,4,64,128,1,float16,fp8,0,0.10526933272679646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,4,64,128,1,fp8,fp8,0,0.26307199398676556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,4,64,0,1,float16,fp8,0,0.10533866286277771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,4,64,0,1,fp8,fp8,0,0.26096532742182416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,8,64,128,1,float16,float16,0,0.10668266812960307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,8,64,0,1,float16,float16,0,0.10659733414649963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,8,64,128,1,float16,fp8,0,0.10641066233317058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,8,64,128,1,fp8,fp8,0,0.26290667057037354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,8,64,0,1,float16,fp8,0,0.10593600074450175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,8,64,0,1,fp8,fp8,0,0.2627786596616109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,64,64,128,1,float16,float16,0,0.06744533280531566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,64,64,0,1,float16,float16,0,0.06796800096829732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,64,64,128,1,float16,fp8,0,0.06691200037797292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,64,64,128,1,fp8,fp8,0,0.1504533290863037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,64,64,0,1,float16,fp8,0,0.0666720022757848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,64,64,0,1,fp8,fp8,0,0.14913599689801535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,1,64,128,1,float16,float16,0,0.05641599992911021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,1,64,0,1,float16,float16,0,0.05657066901524862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,1,64,128,1,float16,fp8,0,0.056421334544817604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,1,64,128,1,fp8,fp8,0,0.13945066928863525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,1,64,0,1,float16,fp8,0,0.05694933235645294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,1,64,0,1,fp8,fp8,0,0.13926933209101358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,2,64,128,1,float16,float16,0,0.05699733396371206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,2,64,0,1,float16,float16,0,0.05691199998060862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,2,64,128,1,float16,fp8,0,0.0568800022204717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,2,64,128,1,fp8,fp8,0,0.13937066992123923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,2,64,0,1,float16,fp8,0,0.056736002365748085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,2,64,0,1,fp8,fp8,0,0.13959466417630514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,4,64,128,1,float16,float16,0,0.057760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,4,64,0,1,float16,float16,0,0.057349334160486855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,4,64,128,1,float16,fp8,0,0.05750933289527893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,4,64,128,1,fp8,fp8,0,0.13928000132242838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,4,64,0,1,float16,fp8,0,0.05719466507434845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,4,64,0,1,fp8,fp8,0,0.14005333185195923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,8,64,128,1,float16,float16,0,0.057946667075157166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,8,64,0,1,float16,float16,0,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,8,64,128,1,float16,fp8,0,0.05787200232346853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,8,64,128,1,fp8,fp8,0,0.14102400342623392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,8,64,0,1,float16,fp8,0,0.05857066810131073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,8,64,0,1,fp8,fp8,0,0.14105066657066345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,64,64,128,1,float16,float16,0,0.03719999889532725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,64,64,0,1,float16,float16,0,0.0365280012289683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,64,64,128,1,float16,fp8,0,0.0359946663180987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,64,64,128,1,fp8,fp8,0,0.08227733274300893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,64,64,0,1,float16,fp8,0,0.03629866739114126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,64,64,0,1,fp8,fp8,0,0.08318933347860973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,1,64,128,1,float16,float16,0,0.036362667878468834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,1,64,0,1,float16,float16,0,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,1,64,128,1,float16,fp8,0,0.033914667864640556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,1,64,128,1,fp8,fp8,0,0.07833066582679749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,1,64,0,1,float16,fp8,0,0.03404266635576884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,1,64,0,1,fp8,fp8,0,0.07897066573301952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,2,64,128,1,float16,float16,0,0.03401600072781245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,2,64,0,1,float16,float16,0,0.03349866718053818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,2,64,128,1,float16,fp8,0,0.03381866713364919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,2,64,128,1,fp8,fp8,0,0.07790933549404144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,2,64,0,1,float16,fp8,0,0.033813332517941795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,2,64,0,1,fp8,fp8,0,0.07827199995517731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,4,64,128,1,float16,float16,0,0.03475199888149897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,4,64,0,1,float16,float16,0,0.03480000048875809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,4,64,128,1,float16,fp8,0,0.03498133271932602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,4,64,128,1,fp8,fp8,0,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,4,64,0,1,float16,fp8,0,0.03477333237727483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,4,64,0,1,fp8,fp8,0,0.07840533554553986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,8,64,128,1,float16,float16,0,0.03440533330043157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,8,64,0,1,float16,float16,0,0.03480533262093862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,8,64,128,1,float16,fp8,0,0.03468266626199087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,8,64,128,1,fp8,fp8,0,0.07877866427103679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,8,64,0,1,float16,fp8,0,0.0347626656293869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,8,64,0,1,fp8,fp8,0,0.07934933404127757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,64,64,128,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,64,64,0,1,float16,float16,0,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,64,64,128,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,64,64,128,1,fp8,fp8,0,0.04926399886608124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,64,64,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,64,64,0,1,fp8,fp8,0,0.048469334840774536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,1,64,128,1,float16,float16,0,0.022074667116006214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,1,64,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,1,64,128,1,float16,fp8,0,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,1,64,128,1,fp8,fp8,0,0.047135998805363975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,1,64,0,1,float16,fp8,0,0.022074667116006214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,1,64,0,1,fp8,fp8,0,0.047877331574757896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,2,64,128,1,float16,float16,0,0.022218666970729828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,2,64,0,1,float16,float16,0,0.02213866760333379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,2,64,128,1,float16,fp8,0,0.02266666789849599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,2,64,128,1,fp8,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,2,64,0,1,float16,fp8,0,0.02256533255179723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,2,64,0,1,fp8,fp8,0,0.04692799846331278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,4,64,128,1,float16,float16,0,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,4,64,0,1,float16,float16,0,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,4,64,128,1,float16,fp8,0,0.022858666876951855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,4,64,128,1,fp8,fp8,0,0.048010667165120445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,4,64,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,4,64,0,1,fp8,fp8,0,0.048138668139775596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,8,64,128,1,float16,float16,0,0.022837333381175995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,8,64,0,1,float16,float16,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,8,64,128,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,8,64,128,1,fp8,fp8,0,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,8,64,0,1,float16,fp8,0,0.022810667753219604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,8,64,0,1,fp8,fp8,0,0.047744000951449074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,64,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,64,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,64,64,128,1,float16,fp8,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,64,64,128,1,fp8,fp8,0,0.03209600100914637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,64,64,0,1,float16,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,64,64,0,1,fp8,fp8,0,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,1,64,128,1,float16,float16,0,0.016202667107184727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,1,64,0,1,float16,float16,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,1,64,128,1,float16,fp8,0,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,1,64,128,1,fp8,fp8,0,0.030943999687830608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,1,64,0,1,float16,fp8,0,0.015967999895413715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,1,64,0,1,fp8,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,2,64,128,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,2,64,0,1,float16,float16,0,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,2,64,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,2,64,128,1,fp8,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,2,64,0,1,float16,fp8,0,0.016389333953460056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,2,64,0,1,fp8,fp8,0,0.03073066721359889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,4,64,128,1,float16,float16,0,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,4,64,0,1,float16,float16,0,0.016341333587964375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,4,64,128,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,4,64,128,1,fp8,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,4,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,4,64,0,1,fp8,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,8,64,128,1,float16,float16,0,0.01657066618402799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,8,64,0,1,float16,float16,0,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,8,64,128,1,float16,fp8,0,0.01642666632930438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,8,64,128,1,fp8,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,8,64,0,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,8,64,0,1,fp8,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,64,64,128,1,float16,float16,0,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,64,64,0,1,float16,float16,0,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,64,64,128,1,float16,fp8,0,0.014362666755914688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,64,64,128,1,fp8,fp8,0,0.023978665471076965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,64,64,0,1,float16,fp8,0,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,64,64,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,1,64,128,1,float16,float16,0,0.013621332744757334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,1,64,0,1,float16,float16,0,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,1,64,128,1,float16,fp8,0,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,1,64,128,1,fp8,fp8,0,0.023610666394233704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,1,64,0,1,float16,fp8,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,1,64,0,1,fp8,fp8,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,2,64,128,1,float16,float16,0,0.014074667046467463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,2,64,0,1,float16,float16,0,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,2,64,128,1,float16,fp8,0,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,2,64,128,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,2,64,0,1,float16,fp8,0,0.014250667144854864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,2,64,0,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,4,64,128,1,float16,float16,0,0.013786666095256805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,4,64,0,1,float16,float16,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,4,64,128,1,float16,fp8,0,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,4,64,128,1,fp8,fp8,0,0.02350933353106181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,4,64,0,1,float16,fp8,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,4,64,0,1,fp8,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,8,64,128,1,float16,float16,0,0.01434133326013883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,8,64,0,1,float16,float16,0,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,8,64,128,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,8,64,128,1,fp8,fp8,0,0.02383466561635335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,8,64,0,1,float16,fp8,0,0.014453332871198654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,8,64,0,1,fp8,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,64,64,128,1,float16,float16,0,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,64,64,0,1,float16,float16,0,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,64,64,128,1,float16,fp8,0,0.013280000537633896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,64,64,128,1,fp8,fp8,0,0.019952000429232914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,64,64,0,1,float16,fp8,0,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,64,64,0,1,fp8,fp8,0,0.01982933282852173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,1,64,128,1,float16,float16,0,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,1,64,0,1,float16,float16,0,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,1,64,128,1,float16,fp8,0,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,1,64,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,1,64,0,1,float16,fp8,0,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,1,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,2,64,128,1,float16,float16,0,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,2,64,0,1,float16,float16,0,0.012479999413092932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,2,64,128,1,float16,fp8,0,0.012645332763592402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,2,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,2,64,0,1,float16,fp8,0,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,2,64,0,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,4,64,128,1,float16,float16,0,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,4,64,0,1,float16,float16,0,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,4,64,128,1,float16,fp8,0,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,4,64,128,1,fp8,fp8,0,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,4,64,0,1,float16,fp8,0,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,4,64,0,1,fp8,fp8,0,0.019733333339293797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,8,64,128,1,float16,float16,0,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,8,64,0,1,float16,float16,0,0.012378666549921036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,8,64,128,1,float16,fp8,0,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,8,64,128,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,8,64,0,1,float16,fp8,0,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,8,64,0,1,fp8,fp8,0,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,64,64,128,1,float16,float16,0,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,64,64,0,1,float16,float16,0,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,64,64,128,1,float16,fp8,0,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,64,64,128,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,64,64,0,1,float16,fp8,0,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,64,64,0,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,1,64,128,1,float16,float16,0,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,1,64,0,1,float16,float16,0,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,1,64,128,1,float16,fp8,0,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,1,64,128,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,1,64,0,1,float16,fp8,0,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,1,64,0,1,fp8,fp8,0,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,2,64,128,1,float16,float16,0,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,2,64,0,1,float16,float16,0,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,2,64,128,1,float16,fp8,0,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,2,64,128,1,fp8,fp8,0,0.02233600119749705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,2,64,0,1,float16,fp8,0,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,2,64,0,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,4,64,128,1,float16,float16,0,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,4,64,0,1,float16,float16,0,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,4,64,128,1,float16,fp8,0,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,4,64,128,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,4,64,0,1,float16,fp8,0,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,4,64,0,1,fp8,fp8,0,0.0204373337328434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,8,64,128,1,float16,float16,0,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,8,64,0,1,float16,float16,0,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,8,64,128,1,float16,fp8,0,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,8,64,128,1,fp8,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,8,64,0,1,float16,fp8,0,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,8,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,2,64,128,1,float16,float16,0,0.14512532949447632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,1,64,128,1,float16,float16,0,0.14405333002408346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,1,64,0,1,float16,float16,0,0.14497066537539163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,1,64,128,1,float16,fp8,0,0.14458133776982626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,1,64,128,1,fp8,fp8,0,0.44861332575480145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,1,64,0,1,float16,fp8,0,0.14410133163134256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,1,64,0,1,fp8,fp8,0,0.44553065299987793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,2,64,0,1,float16,float16,0,0.14485333363215128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,2,64,128,1,float16,fp8,0,0.14478400349617004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,2,64,128,1,fp8,fp8,0,0.44994668165842694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,2,64,0,1,float16,fp8,0,0.14450666308403015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,2,64,0,1,fp8,fp8,0,0.4485599994659424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,4,64,128,1,float16,float16,0,0.14596800009409586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,4,64,0,1,float16,float16,0,0.14493866761525473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,4,64,128,1,float16,fp8,0,0.14478400349617004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,4,64,128,1,fp8,fp8,0,0.4466506640116374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,4,64,0,1,float16,fp8,0,0.1453386644522349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,4,64,0,1,fp8,fp8,0,0.44990400473276776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,8,64,128,1,float16,float16,0,0.14623467127482095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,8,64,0,1,float16,float16,0,0.14629866679509482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,8,64,128,1,float16,fp8,0,0.14572266737620035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,8,64,128,1,fp8,fp8,0,0.4460373322168986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,8,64,0,1,float16,fp8,0,0.14586666226387024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,8,64,0,1,fp8,fp8,0,0.4471893310546875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,64,64,128,1,float16,float16,0,0.08250666658083598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,64,64,0,1,float16,float16,0,0.08380267024040222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,64,64,128,1,float16,fp8,0,0.08142933249473572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,64,64,128,1,fp8,fp8,0,0.24198400974273682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,64,64,0,1,float16,fp8,0,0.08182399968306224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,64,64,0,1,fp8,fp8,0,0.23997332652409872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,1,64,128,1,float16,float16,0,0.07587733368078868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,1,64,0,1,float16,float16,0,0.07603199779987335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,1,64,128,1,float16,fp8,0,0.07707733412583669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,1,64,128,1,fp8,fp8,0,0.2327573299407959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,1,64,0,1,float16,fp8,0,0.07706133524576823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,1,64,0,1,fp8,fp8,0,0.2323466738065084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,2,64,128,1,float16,float16,0,0.07667199770609538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,2,64,0,1,float16,float16,0,0.07598400115966797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,2,64,128,1,float16,fp8,0,0.0764213353395462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,2,64,128,1,fp8,fp8,0,0.2323360045750936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,2,64,0,1,float16,fp8,0,0.07748800019423167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,2,64,0,1,fp8,fp8,0,0.2336533268292745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,4,64,128,1,float16,float16,0,0.07805866499741872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,4,64,0,1,float16,float16,0,0.07717333237330119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,4,64,128,1,float16,fp8,0,0.07714133461316426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,4,64,128,1,fp8,fp8,0,0.23188799619674683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,4,64,0,1,float16,fp8,0,0.07673066854476929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,4,64,0,1,fp8,fp8,0,0.23321600755055746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,8,64,128,1,float16,float16,0,0.07855466504891713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,8,64,0,1,float16,float16,0,0.07788266738255818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,8,64,128,1,float16,fp8,0,0.07863466441631317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,8,64,128,1,fp8,fp8,0,0.23490132888158163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,8,64,0,1,float16,fp8,0,0.07868266602357228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,8,64,0,1,fp8,fp8,0,0.23292267322540283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,64,64,128,1,float16,float16,0,0.04457066456476847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,64,64,0,1,float16,float16,0,0.04444799820582072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,64,64,128,1,float16,fp8,0,0.043477331598599754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,64,64,128,1,fp8,fp8,0,0.1309866706530253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,64,64,0,1,float16,fp8,0,0.044405331214269005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,64,64,0,1,fp8,fp8,0,0.13190933068593344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,1,64,128,1,float16,float16,0,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,1,64,0,1,float16,float16,0,0.043712000052134194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,1,64,128,1,float16,fp8,0,0.04364266494909922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,1,64,128,1,fp8,fp8,0,0.12558399637540182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,1,64,0,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,1,64,0,1,fp8,fp8,0,0.1253653367360433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,2,64,128,1,float16,float16,0,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,2,64,0,1,float16,float16,0,0.04369066655635834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,2,64,128,1,float16,fp8,0,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,2,64,128,1,fp8,fp8,0,0.12500799695650736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,2,64,0,1,float16,fp8,0,0.04347200194994608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,2,64,0,1,fp8,fp8,0,0.12568533420562744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,4,64,128,1,float16,float16,0,0.04487466812133789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,4,64,0,1,float16,float16,0,0.04461866617202759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,4,64,128,1,float16,fp8,0,0.044863998889923096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,4,64,128,1,fp8,fp8,0,0.12687466541926065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,4,64,0,1,float16,fp8,0,0.044938668608665466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,4,64,0,1,fp8,fp8,0,0.12552533547083536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,8,64,128,1,float16,float16,0,0.044112001856168113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,8,64,0,1,float16,float16,0,0.04471466441949209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,8,64,128,1,float16,fp8,0,0.044624000787734985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,8,64,128,1,fp8,fp8,0,0.1267680029074351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,8,64,0,1,float16,fp8,0,0.0444213350613912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,8,64,0,1,fp8,fp8,0,0.12644267082214355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,64,64,128,1,float16,float16,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,64,64,0,1,float16,float16,0,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,64,64,128,1,float16,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,64,64,128,1,fp8,fp8,0,0.07299200197060902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,64,64,0,1,float16,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,64,64,0,1,fp8,fp8,0,0.07218666871388753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,1,64,128,1,float16,float16,0,0.026965332527955372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,1,64,0,1,float16,float16,0,0.02681066592534383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,1,64,128,1,float16,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,1,64,128,1,fp8,fp8,0,0.07089599967002869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,1,64,0,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,1,64,0,1,fp8,fp8,0,0.07167466481526692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,2,64,128,1,float16,float16,0,0.026928000152111053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,2,64,0,1,float16,float16,0,0.026848000784715016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,2,64,128,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,2,64,128,1,fp8,fp8,0,0.07119999825954437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,2,64,0,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,2,64,0,1,fp8,fp8,0,0.07121600210666656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,4,64,128,1,float16,float16,0,0.027610667049884796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,4,64,0,1,float16,float16,0,0.027813332776228588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,4,64,128,1,float16,fp8,0,0.027802666028340656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,4,64,128,1,fp8,fp8,0,0.0711413323879242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,4,64,0,1,float16,fp8,0,0.028079998989899952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,4,64,0,1,fp8,fp8,0,0.07208533088366191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,8,64,128,1,float16,float16,0,0.027514666318893433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,8,64,0,1,float16,float16,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,8,64,128,1,float16,fp8,0,0.027866666515668232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,8,64,128,1,fp8,fp8,0,0.07189866900444031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,8,64,0,1,float16,fp8,0,0.027749332288901012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,8,64,0,1,fp8,fp8,0,0.0716480016708374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,64,64,128,1,float16,float16,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,64,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,64,64,128,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,64,64,128,1,fp8,fp8,0,0.044490665197372437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,64,64,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,64,64,0,1,fp8,fp8,0,0.044549331068992615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,1,64,128,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,1,64,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,1,64,128,1,float16,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,1,64,128,1,fp8,fp8,0,0.0440586656332016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,1,64,0,1,float16,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,1,64,0,1,fp8,fp8,0,0.04346133271853129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,2,64,128,1,float16,float16,0,0.024879999458789825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,2,64,0,1,float16,float16,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,2,64,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,2,64,128,1,fp8,fp8,0,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,2,64,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,2,64,0,1,fp8,fp8,0,0.0444213350613912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,4,64,128,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,4,64,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,4,64,128,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,4,64,128,1,fp8,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,4,64,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,4,64,0,1,fp8,fp8,0,0.04409599800904592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,8,64,128,1,float16,float16,0,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,8,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,8,64,128,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,8,64,128,1,fp8,fp8,0,0.04445866743723551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,8,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,8,64,0,1,fp8,fp8,0,0.043978666265805565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,64,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,64,64,0,1,float16,float16,0,0.014581333845853806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,64,64,128,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,64,64,128,1,fp8,fp8,0,0.03091199944416682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,64,64,0,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,1,64,0,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,64,64,0,1,fp8,fp8,0,0.03038399914900462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,1,64,128,1,float16,float16,0,0.014469332993030548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,1,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,1,64,128,1,float16,fp8,0,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,1,64,128,1,fp8,fp8,0,0.029824001093705494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,1,64,0,1,fp8,fp8,0,0.029813334345817566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,2,64,128,1,float16,float16,0,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,2,64,0,1,float16,float16,0,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,2,64,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,2,64,128,1,fp8,fp8,0,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,2,64,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,2,64,0,1,fp8,fp8,0,0.02976000060637792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,4,64,128,1,float16,float16,0,0.01452800010641416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,4,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,4,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,4,64,128,1,fp8,fp8,0,0.029946667452653248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,4,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,4,64,0,1,fp8,fp8,0,0.03025600065787633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,8,64,128,1,float16,float16,0,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,8,64,0,1,float16,float16,0,0.01422400027513504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,8,64,128,1,float16,fp8,0,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,8,64,128,1,fp8,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,8,64,0,1,float16,fp8,0,0.014645333091417948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,8,64,0,1,fp8,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,64,64,128,1,float16,float16,0,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,64,64,0,1,float16,float16,0,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,64,64,128,1,float16,fp8,0,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,64,64,128,1,fp8,fp8,0,0.02207999924818675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,64,64,0,1,float16,fp8,0,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,64,64,0,1,fp8,fp8,0,0.022277332842350006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,1,64,128,1,float16,float16,0,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,1,64,0,1,float16,float16,0,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,1,64,128,1,float16,fp8,0,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,1,64,128,1,fp8,fp8,0,0.022485333184401195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,1,64,0,1,float16,fp8,0,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,1,64,0,1,fp8,fp8,0,0.02250133454799652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,2,64,128,1,float16,float16,0,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,2,64,0,1,float16,float16,0,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,2,64,128,1,float16,fp8,0,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,2,64,128,1,fp8,fp8,0,0.02275199939807256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,2,64,0,1,float16,fp8,0,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,4,64,0,1,float16,fp8,0,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,2,64,0,1,fp8,fp8,0,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,4,64,128,1,float16,float16,0,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,4,64,0,1,float16,float16,0,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,4,64,128,1,float16,fp8,0,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,4,64,128,1,fp8,fp8,0,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,4,64,0,1,fp8,fp8,0,0.02271466702222824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,8,64,128,1,float16,float16,0,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,8,64,0,1,float16,float16,0,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,64,64,0,1,float16,float16,0,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,8,64,128,1,float16,fp8,0,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,8,64,128,1,fp8,fp8,0,0.022821334501107533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,8,64,0,1,float16,fp8,0,0.013562666873137156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,8,64,0,1,fp8,fp8,0,0.022469334304332733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,64,64,128,1,float16,float16,0,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,64,64,128,1,float16,fp8,0,0.0122079998254776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,64,64,128,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,64,64,0,1,float16,fp8,0,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,64,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,1,64,128,1,float16,float16,0,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,1,64,0,1,float16,float16,0,0.012378666549921036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,1,64,128,1,float16,fp8,0,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,1,64,128,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,1,64,0,1,float16,fp8,0,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,1,64,0,1,fp8,fp8,0,0.020010666300853092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,2,64,128,1,float16,float16,0,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,2,64,0,1,float16,float16,0,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,2,64,128,1,float16,fp8,0,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,2,64,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,2,64,0,1,float16,fp8,0,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,2,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,4,64,128,1,float16,float16,0,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,4,64,0,1,float16,float16,0,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,4,64,128,1,float16,fp8,0,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,4,64,128,1,fp8,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,4,64,0,1,float16,fp8,0,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,4,64,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,8,64,128,1,float16,float16,0,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,8,64,0,1,float16,float16,0,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,8,64,128,1,float16,fp8,0,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,8,64,128,1,fp8,fp8,0,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,8,64,0,1,float16,fp8,0,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,8,64,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,64,64,128,1,float16,float16,0,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,64,64,0,1,float16,float16,0,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,64,64,128,1,float16,fp8,0,0.011733333269755045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,64,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,64,64,0,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,64,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,1,64,128,1,float16,float16,0,0.014607999473810196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,1,64,0,1,float16,float16,0,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,1,64,128,1,float16,fp8,0,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,1,64,128,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,1,64,0,1,float16,fp8,0,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,1,64,0,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,2,64,128,1,float16,float16,0,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,2,64,0,1,float16,float16,0,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,2,64,128,1,float16,fp8,0,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,2,64,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,2,64,0,1,float16,fp8,0,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,2,64,0,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,4,64,128,1,float16,float16,0,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,4,64,0,1,float16,float16,0,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,4,64,128,1,float16,fp8,0,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,4,64,128,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,4,64,0,1,float16,fp8,0,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,4,64,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,8,64,128,1,float16,float16,0,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,8,64,0,1,float16,float16,0,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,8,64,128,1,float16,fp8,0,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,8,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,8,64,0,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,8,64,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,64,128,1,float16,float16,0,4.275498708089192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,64,128,1,float16,fp8,0,4.2081654866536455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,64,128,1,fp8,fp8,0,5.505919774373372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,float16,0,4.315093358357747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,fp8,0,4.281493186950684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,64,128,1,fp8,fp8,0,5.573050816853841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,float16,0,4.357125282287598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,64,0,1,float16,float16,0,28.626612345377605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,64,0,1,fp8,fp8,0,25.772069295247395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,fp8,0,4.305045445760091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,64,0,1,float16,fp8,0,28.148096720377605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,64,128,1,fp8,fp8,0,5.628053029378255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,float16,0,28.62818654378255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,float16,0,4.3840586344401045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,64,0,1,fp8,fp8,0,25.805498758951824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,fp8,0,28.666768391927082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,fp8,0,4.350314776102702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,float16,0,29.22447967529297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,64,128,1,fp8,fp8,0,5.607541402180989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,float16,0,2.39900795618693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,fp8,0,2.3914880752563477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,64,0,1,fp8,fp8,0,26.022288004557293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,64,128,1,fp8,fp8,0,3.0994399388631186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,fp8,0,28.976287841796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,float16,0,28.79522705078125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,float16,0,14.897263844807943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,64,128,1,float16,float16,0,2.1668373743693032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,64,128,1,float16,fp8,0,2.1450506846110025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,64,128,1,fp8,fp8,0,2.823941230773926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,fp8,0,14.558549245198568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,fp8,0,28.943382263183594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,64,0,1,fp8,fp8,0,25.891499837239582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,float16,0,2.218623956044515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,64,0,1,fp8,fp8,0,13.377403259277344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,fp8,0,2.174389362335205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,64,0,1,float16,float16,0,14.466687520345053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,64,128,1,fp8,fp8,0,2.8526881535847983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,64,0,1,fp8,fp8,0,13.083508809407553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,64,0,1,float16,fp8,0,14.377515157063803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,float16,0,2.2159573237101235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,fp8,0,2.1774293581644693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,float16,0,14.21517817179362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,64,128,1,fp8,fp8,0,2.8690293629964194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,fp8,0,14.506037394205729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,64,0,1,fp8,fp8,0,13.006319681803385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,float16,0,14.491989135742188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,float16,0,2.2235466639200845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,fp8,0,2.1917972564697266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,float16,0,1.252079963684082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,64,128,1,fp8,fp8,0,2.9136692682902017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,fp8,0,1.2712000211079915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,64,128,1,fp8,fp8,0,1.6294293403625488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,64,0,1,fp8,fp8,0,13.033808390299479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,fp8,0,14.304058074951172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,float16,0,7.348965326944987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,64,128,1,float16,float16,0,1.1720586617787678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,float16,0,14.218560536702475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,64,128,1,float16,fp8,0,1.1613813241322835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,64,128,1,fp8,fp8,0,1.4887839953104656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,64,0,1,fp8,fp8,0,13.05081049601237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,fp8,0,14.428805033365885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,64,0,1,fp8,fp8,0,6.768400192260742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,fp8,0,7.348074595133464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,float16,0,1.1730026404062908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,fp8,0,1.1680320103963215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,64,0,1,float16,float16,0,7.3059946695963545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,64,128,1,fp8,fp8,0,1.4962399800618489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,64,0,1,float16,fp8,0,7.240730921427409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,64,0,1,fp8,fp8,0,6.6536000569661455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,float16,0,1.1897599697113037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,float16,0,7.307157516479492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,64,0,1,fp8,fp8,0,6.616495768229167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,fp8,0,7.26852289835612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,fp8,0,1.1606293519337971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,64,128,1,fp8,fp8,0,1.5142879486083984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,float16,0,7.291274388631185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,fp8,0,7.276650746663411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,float16,0,1.1882293224334717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,64,0,1,fp8,fp8,0,6.670970916748047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,float16,0,3.9391520818074546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,fp8,0,1.1738613446553547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,float16,0,7.374474843343099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,64,128,1,fp8,fp8,0,1.5218507448832195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,fp8,0,7.224917093912761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,float16,0,0.8234612941741943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,64,0,1,fp8,fp8,0,6.624501546223958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,fp8,0,0.8200906912485758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,64,128,1,fp8,fp8,0,0.9909866650899252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,fp8,0,3.9354985555013022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,64,0,1,fp8,fp8,0,3.5798559188842773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,64,128,1,float16,float16,0,0.8224799633026123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,64,128,1,float16,fp8,0,0.8230613072713217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,64,0,1,float16,float16,0,3.8841867446899414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,64,128,1,fp8,fp8,0,0.9838346640268961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,64,0,1,float16,fp8,0,3.9169174830118814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,float16,0,0.8218719959259033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,64,0,1,fp8,fp8,0,3.597983996073405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,fp8,0,0.8233653704325358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,float16,0,3.9290825525919595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,64,128,1,fp8,fp8,0,0.9827626546223959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,float16,0,0.8221279780069987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,fp8,0,3.8841705322265625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,64,0,1,fp8,fp8,0,3.5910186767578125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,fp8,0,0.8228053251902262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,64,128,1,fp8,fp8,0,0.98526930809021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,float16,0,3.9196160634358725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,64,0,1,fp8,fp8,0,3.586693445841471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,float16,0,0.8222613334655762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,fp8,0,3.9192641576131186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,fp8,0,0.8225599924723307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,64,128,1,fp8,fp8,0,0.985322634379069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,float16,0,3.910559972127279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,fp8,0,3.9217707316080728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,64,0,1,fp8,fp8,0,3.5808693567911782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,64,128,1,float16,float16,0,3.1796798706054688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,64,128,1,float16,fp8,0,3.1242825190226235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,64,128,1,fp8,fp8,0,4.127034823099772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,float16,0,3.24837334950765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,64,0,1,float16,float16,0,16.73641586303711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,64,0,1,fp8,fp8,0,15.01629384358724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,64,0,1,float16,fp8,0,16.675904591878254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,fp8,0,3.197765350341797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,64,128,1,fp8,fp8,0,4.155424118041992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,float16,0,16.70581817626953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,float16,0,3.2597173055013022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,fp8,0,3.2253545125325522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,fp8,0,16.75202178955078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,64,0,1,fp8,fp8,0,15.084779103597006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,64,128,1,fp8,fp8,0,4.177674611409505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,float16,0,16.7161865234375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,float16,0,3.2887627283732095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,fp8,0,3.2581920623779297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,float16,0,1.7860107421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,64,128,1,fp8,fp8,0,4.217151959737142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,fp8,0,16.826544443766277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,fp8,0,1.788474718729655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,64,128,1,fp8,fp8,0,2.328224023183187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,64,0,1,fp8,fp8,0,15.18240483601888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,float16,0,8.529050827026367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,64,128,1,float16,float16,0,1.624058723449707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,64,128,1,float16,fp8,0,1.6127947171529133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,float16,0,16.87769063313802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,fp8,0,8.630474726359049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,64,0,1,fp8,fp8,0,7.823984146118164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,64,0,1,fp8,fp8,0,15.14260228474935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,fp8,0,16.47823969523112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,64,128,1,fp8,fp8,0,2.0933547019958496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,float16,0,1.6424214045206706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,64,0,1,float16,float16,0,8.392506917317709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,fp8,0,1.6032427151997883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,64,128,1,fp8,fp8,0,2.1223947207132974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,float16,0,1.65175994237264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,64,0,1,fp8,fp8,0,7.638720194498698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,fp8,0,1.6250933011372883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,64,0,1,float16,fp8,0,8.362335840861002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,float16,0,8.332773208618164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,64,128,1,fp8,fp8,0,2.145557403564453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,float16,0,1.657434622446696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,64,0,1,fp8,fp8,0,7.602362950642903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,fp8,0,1.6408960024515789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,fp8,0,8.39905039469401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,float16,0,8.350559870402018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,64,128,1,fp8,fp8,0,2.1652639706929526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,float16,0,0.9463520050048828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,64,0,1,fp8,fp8,0,7.595408121744792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,fp8,0,0.9634613196055094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,fp8,0,8.385882695515951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,64,128,1,fp8,fp8,0,1.2383519808451335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,float16,0,8.349711736043295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,float16,0,4.333898544311523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,64,128,1,float16,float16,0,0.8930933475494385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,fp8,0,4.315845489501953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,64,0,1,fp8,fp8,0,7.640170415242513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,64,0,1,fp8,fp8,0,4.026026725769043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,fp8,0,8.448111852010092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,64,0,1,float16,float16,0,4.373023986816406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,64,128,1,float16,fp8,0,0.8790667057037354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,64,128,1,fp8,fp8,0,1.132581313451131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,float16,0,0.8865973154703776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,64,0,1,float16,fp8,0,4.363386789957683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,64,0,1,fp8,fp8,0,3.876938819885254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,float16,0,4.308874766031901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,fp8,0,0.8721333344777426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,64,128,1,fp8,fp8,0,1.1291893323262532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,float16,0,0.889514684677124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,fp8,0,0.876367966334025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,fp8,0,4.295743942260742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,64,0,1,fp8,fp8,0,3.9208854039510093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,64,128,1,fp8,fp8,0,1.1412053108215332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,float16,0,4.3207041422526045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,float16,0,0.8930293718973795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,fp8,0,4.313082695007324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,fp8,0,0.8857386906941732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,64,128,1,fp8,fp8,0,1.1416320006052654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,64,0,1,fp8,fp8,0,3.9072265625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,float16,0,4.267098744710286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,float16,0,0.624016006787618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,fp8,0,4.306111971537272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,fp8,0,0.6218986511230469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,64,0,1,fp8,fp8,0,3.930906613667806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,64,128,1,fp8,fp8,0,0.7487200101216634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,float16,0,2.3461333910624185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,fp8,0,2.3449546496073403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,64,0,1,fp8,fp8,0,2.1678454081217446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,64,128,1,float16,float16,0,0.6212533315022787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,64,0,1,float16,float16,0,2.3426507314046225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,64,128,1,float16,fp8,0,0.6210986773173014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,64,128,1,fp8,fp8,0,0.7474453449249268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,64,0,1,float16,fp8,0,2.347594738006592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,64,0,1,fp8,fp8,0,2.164672056833903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,float16,0,0.62117866675059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,float16,0,2.343957265218099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,fp8,0,0.6211573282877604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,64,128,1,fp8,fp8,0,0.7470773061116537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,fp8,0,2.328106721242269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,fp8,0,0.6219946543375651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,float16,0,0.6232426563898722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,64,0,1,fp8,fp8,0,2.1611413955688477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,64,128,1,fp8,fp8,0,0.7465226650238037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,float16,0,2.345797379811605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,float16,0,0.6209760109583536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,fp8,0,2.329066594441732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,fp8,0,0.6228213310241699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,64,0,1,fp8,fp8,0,2.1615360577901206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,64,128,1,fp8,fp8,0,0.7473386923472086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,float16,0,2.3387893040974936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,fp8,0,2.3320107460021973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,64,0,1,fp8,fp8,0,2.1534239451090493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,64,128,1,float16,float16,0,2.6291839281717935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,64,128,1,float16,fp8,0,2.5979572931925454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,64,128,1,fp8,fp8,0,3.4294827779134116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,float16,0,2.680645306905111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,64,0,1,float16,float16,0,11.945701599121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,64,0,1,fp8,fp8,0,10.726757049560547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,64,0,1,float16,fp8,0,11.891770680745443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,fp8,0,2.64412260055542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,64,128,1,fp8,fp8,0,3.4917707443237305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,float16,0,2.6872641245524087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,float16,0,11.800655364990234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,fp8,0,11.847408294677734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,64,0,1,fp8,fp8,0,10.836063385009766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,fp8,0,2.659050623575846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,64,128,1,fp8,fp8,0,3.4792000452677407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,float16,0,2.743765195210775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,float16,0,11.72433598836263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,fp8,0,2.7014134724934897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,64,128,1,fp8,fp8,0,3.510997454325358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,float16,0,1.4775892893473308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,fp8,0,1.4828054110209148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,fp8,0,11.951530456542969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,64,128,1,fp8,fp8,0,1.9366505940755208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,64,0,1,fp8,fp8,0,10.790091196695963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,float16,0,6.07809575398763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,float16,0,11.922927856445312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,64,0,1,fp8,fp8,0,10.804826100667318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,fp8,0,11.809466044108072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,fp8,0,6.085941314697266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,64,128,1,float16,float16,0,1.3562506039937336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,64,128,1,float16,fp8,0,1.342101256052653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,64,0,1,fp8,fp8,0,5.648831685384114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,64,0,1,float16,float16,0,6.027119954427083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,64,128,1,fp8,fp8,0,1.7535200119018555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,64,0,1,float16,fp8,0,5.934752146402995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,float16,0,1.3650879859924316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,64,0,1,fp8,fp8,0,5.404186884562175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,fp8,0,1.3380799293518066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,float16,0,5.968874613444011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,64,128,1,fp8,fp8,0,1.7530080477396648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,float16,0,1.3634506861368816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,fp8,0,1.3434185981750488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,64,0,1,fp8,fp8,0,5.453413645426433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,fp8,0,6.052789052327474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,64,128,1,fp8,fp8,0,1.770426591237386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,float16,0,6.007354736328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,float16,0,1.3717759450276692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,fp8,0,1.3604213396708171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,float16,0,0.7879412968953451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,64,128,1,fp8,fp8,0,1.789903958638509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,64,0,1,fp8,fp8,0,5.434378941853841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,fp8,0,5.898207982381185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,fp8,0,0.7997333208719889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,64,128,1,fp8,fp8,0,1.0346399943033855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,float16,0,3.112880071004232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,64,128,1,float16,float16,0,0.7396106719970703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,float16,0,5.964544296264648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,64,0,1,fp8,fp8,0,5.487024307250977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,fp8,0,5.953727722167969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,64,128,1,float16,fp8,0,0.7382826805114746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,64,128,1,fp8,fp8,0,0.9439093271891276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,fp8,0,3.1544907887776694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,64,0,1,fp8,fp8,0,2.884415944417318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,64,0,1,float16,float16,0,3.0622453689575195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,float16,0,0.7476533253987631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,fp8,0,0.731765349706014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,64,128,1,fp8,fp8,0,0.9535413583119711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,64,0,1,float16,fp8,0,3.052271842956543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,64,0,1,fp8,fp8,0,2.817983945210775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,float16,0,3.0899359385172525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,fp8,0,3.0342400868733725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,64,0,1,fp8,fp8,0,2.7962719599405923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,float16,0,0.7446346282958984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,fp8,0,0.7351360321044922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,float16,0,3.0556853612264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,64,128,1,fp8,fp8,0,0.9551466306050619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,fp8,0,3.059306780497233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,64,0,1,fp8,fp8,0,2.804154713948568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,float16,0,0.744698683420817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,fp8,0,0.7409706910451254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,float16,0,3.1044692993164062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,64,128,1,fp8,fp8,0,0.9617173671722412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,fp8,0,3.049663861592611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,64,0,1,fp8,fp8,0,2.8305654525756836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,float16,0,0.5241920153299967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,fp8,0,0.5230293273925781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,float16,0,1.7049813270568848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,64,128,1,fp8,fp8,0,0.6268693208694458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,fp8,0,1.6942240397135417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,64,0,1,fp8,fp8,0,1.527407964070638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,64,128,1,float16,float16,0,0.5236159960428873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,float16,0,0.5221279859542847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,64,128,1,float16,fp8,0,0.5239200194676717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,64,0,1,float16,float16,0,1.7090880076090496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,64,128,1,fp8,fp8,0,0.626858671506246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,64,0,1,float16,fp8,0,1.7093332608540852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,64,0,1,fp8,fp8,0,1.5159039497375488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,fp8,0,0.524346669514974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,float16,0,0.5214506785074869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,64,128,1,fp8,fp8,0,0.6297706762949625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,float16,0,1.7075626055399578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,fp8,0,1.7075732549031575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,64,0,1,fp8,fp8,0,1.5290506680806477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,fp8,0,0.5219466686248779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,float16,0,1.7086399396260579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,64,128,1,fp8,fp8,0,0.6308159828186035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,fp8,0,1.7041920026143391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,64,0,1,fp8,fp8,0,1.5167892773946126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,float16,0,0.5210933287938436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,fp8,0,0.5242240031560262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,float16,0,1.7092852592468262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,64,128,1,fp8,fp8,0,0.6259573300679525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,fp8,0,1.7088106473286946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,64,0,1,fp8,fp8,0,1.527018706003825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,64,128,1,float16,float16,0,4.190229415893555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,64,128,1,float16,fp8,0,4.140656153361003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,64,128,1,fp8,fp8,0,5.410794576009114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,float16,0,4.271642684936523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,64,0,1,float16,float16,0,15.678960164388021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,fp8,0,4.212826728820801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,64,128,1,fp8,fp8,0,5.490479787190755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,64,0,1,fp8,fp8,0,14.168079376220703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,float16,0,4.327306747436523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,64,0,1,float16,fp8,0,15.613780975341797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,float16,0,15.658004760742188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,fp8,0,4.2542985280354815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,64,0,1,fp8,fp8,0,14.235893249511719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,fp8,0,15.653765360514322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,64,128,1,fp8,fp8,0,5.47929573059082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,float16,0,4.336336135864258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,fp8,0,4.273898760477702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,64,128,1,fp8,fp8,0,5.5627091725667315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,float16,0,15.532581329345703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,float16,0,2.360367933909098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,fp8,0,2.346277395884196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,64,0,1,fp8,fp8,0,14.27673594156901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,fp8,0,15.72204844156901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,64,128,1,fp8,fp8,0,3.042384147644043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,float16,0,15.964591979980469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,float16,0,8.076789220174154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,64,128,1,float16,float16,0,2.113584041595459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,fp8,0,15.917252858479818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,64,128,1,float16,fp8,0,2.0642453829447427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,64,0,1,fp8,fp8,0,14.30123774210612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,64,128,1,fp8,fp8,0,2.730031967163086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,64,0,1,fp8,fp8,0,7.442933400472005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,fp8,0,8.036346435546875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,float16,0,2.1318880716959634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,fp8,0,2.1013760566711426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,64,0,1,float16,float16,0,7.858959833780925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,64,128,1,fp8,fp8,0,2.7527147928873696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,64,0,1,fp8,fp8,0,7.165733337402344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,64,0,1,float16,fp8,0,7.816191991170247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,float16,0,2.1401119232177734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,fp8,0,2.1012214024861655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,float16,0,7.843461354573567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,64,128,1,fp8,fp8,0,2.7876052856445312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,64,0,1,fp8,fp8,0,7.1593278249104815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,fp8,0,7.796517054239909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,float16,0,7.8348642985026045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,fp8,0,7.882901509602864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,float16,0,2.1430187225341797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,fp8,0,2.116485277811686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,float16,0,1.1924373308817546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,64,128,1,fp8,fp8,0,2.797226587931315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,64,0,1,fp8,fp8,0,7.172320048014323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,fp8,0,1.1885546843210857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,64,128,1,fp8,fp8,0,1.5440799395243328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,float16,0,4.069504102071126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,64,128,1,float16,float16,0,1.087936004002889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,float16,0,7.849285125732422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,64,0,1,fp8,fp8,0,7.205637613932292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,fp8,0,4.091775894165039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,fp8,0,7.822890599568685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,64,0,1,fp8,fp8,0,3.762842814127604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,64,128,1,float16,fp8,0,1.0701813697814941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,64,128,1,fp8,fp8,0,1.4113972981770833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,64,0,1,float16,float16,0,4.007952054341634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,float16,0,1.0983893076578777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,fp8,0,1.073520024617513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,64,128,1,fp8,fp8,0,1.4164533615112305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,64,0,1,float16,fp8,0,3.9757280349731445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,64,0,1,fp8,fp8,0,3.6430559158325195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,float16,0,1.0956532955169678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,fp8,0,1.0857493082682292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,float16,0,4.044373194376628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,64,128,1,fp8,fp8,0,1.4300533930460613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,64,0,1,fp8,fp8,0,3.6364320119222007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,fp8,0,4.041562716166179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,float16,0,1.1089920202891033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,float16,0,4.075989405314128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,fp8,0,1.1001653671264648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,64,128,1,fp8,fp8,0,1.4334774017333984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,64,0,1,fp8,fp8,0,3.635039965311686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,fp8,0,3.9838987986246743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,float16,0,0.6424959897994995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,fp8,0,0.6463040113449097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,float16,0,4.035600026448567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,64,0,1,fp8,fp8,0,3.6382773717244468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,fp8,0,4.040885289510091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,64,128,1,fp8,fp8,0,0.8310986359914144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,float16,0,2.1067840258280435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,64,128,1,float16,float16,0,0.5946346521377563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,fp8,0,2.1070079803466797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,64,0,1,fp8,fp8,0,1.9668800036112468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,64,0,1,float16,float16,0,2.0569653511047363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,64,128,1,float16,fp8,0,0.591754674911499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,64,128,1,fp8,fp8,0,0.7710026899973551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,64,0,1,float16,fp8,0,2.034063975016276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,float16,0,0.5957066615422567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,64,0,1,fp8,fp8,0,1.8928799629211426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,float16,0,2.0416320164998374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,fp8,0,0.5899306535720825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,64,128,1,fp8,fp8,0,0.7622826894124349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,fp8,0,2.0507359504699707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,float16,0,0.6003040075302124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,64,0,1,fp8,fp8,0,1.8840853373209636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,fp8,0,0.594869335492452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,64,128,1,fp8,fp8,0,0.7656533718109131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,float16,0,2.046757380167643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,fp8,0,2.0444533030192056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,float16,0,0.6059093475341797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,64,0,1,fp8,fp8,0,1.8860054016113281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,fp8,0,0.5961013237635294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,64,128,1,fp8,fp8,0,0.7756160100301107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,float16,0,2.0492213567097983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,float16,0,0.423802653948466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,fp8,0,2.044053395589193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,64,0,1,fp8,fp8,0,1.8925065994262695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,fp8,0,0.4230293432871501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,float16,0,1.1696106592814128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,64,128,1,fp8,fp8,0,0.5099360148111979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,fp8,0,1.1696266333262126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,64,0,1,fp8,fp8,0,1.038592020670573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,64,128,1,float16,float16,0,0.4226826826731364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,64,0,1,float16,float16,0,1.1635200182596843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,64,128,1,float16,fp8,0,0.42264000574747723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,64,128,1,fp8,fp8,0,0.5079466501871744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,64,0,1,float16,fp8,0,1.173141320546468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,64,0,1,fp8,fp8,0,1.0447999636332195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,float16,0,0.42323732376098633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,fp8,0,0.4230293432871501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,float16,0,1.1637226740519206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,64,128,1,fp8,fp8,0,0.5095306634902954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,fp8,0,1.1636053721110027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,64,0,1,fp8,fp8,0,1.0385226408640544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,float16,0,0.42479467391967773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,float16,0,1.1718613306681316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,fp8,0,0.42390398184458417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,64,128,1,fp8,fp8,0,0.5097493330637614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,float16,0,0.42235199610392254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,fp8,0,1.167413314183553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,64,0,1,fp8,fp8,0,1.0407360394795735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,fp8,0,0.42475732167561847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,float16,0,1.164394696553548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,64,128,1,fp8,fp8,0,0.5102933247884115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,fp8,0,1.1720373630523682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,64,0,1,fp8,fp8,0,1.0399999618530273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,64,128,1,float16,float16,0,3.1123040517171225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,64,128,1,float16,fp8,0,3.065781275431315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,64,128,1,fp8,fp8,0,4.052506764729817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,float16,0,3.1975040435791016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,fp8,0,3.1567360560099282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,64,128,1,fp8,fp8,0,4.093263943990071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,64,0,1,float16,float16,0,9.393824259440104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,64,0,1,fp8,fp8,0,8.464399973551432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,64,0,1,float16,fp8,0,9.329914728800455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,float16,0,3.2027308146158853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,float16,0,9.397141138712565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,fp8,0,3.170421282450358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,64,0,1,fp8,fp8,0,8.583749135335287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,64,128,1,fp8,fp8,0,4.143701235453288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,fp8,0,9.289418538411459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,float16,0,3.2305386861165366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,fp8,0,3.2230612436930337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,float16,0,9.493888219197592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,64,128,1,fp8,fp8,0,4.1738026936848955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,fp8,0,9.311034520467123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,64,0,1,fp8,fp8,0,8.63972282409668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,float16,0,1.7444052696228027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,fp8,0,1.7535252571105957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,64,128,1,fp8,fp8,0,2.2815732955932617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,float16,0,9.382565180460611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,float16,0,4.907077471415202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,64,128,1,float16,float16,0,1.5781653722127278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,64,0,1,fp8,fp8,0,4.498501459757487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,fp8,0,4.863914807637532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,64,128,1,float16,fp8,0,1.5459200541178386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,fp8,0,9.3612429300944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,64,0,1,fp8,fp8,0,8.574607849121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,64,128,1,fp8,fp8,0,2.0361599922180176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,64,0,1,float16,float16,0,4.7445065180460615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,float16,0,1.591002623240153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,64,0,1,fp8,fp8,0,4.249370574951172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,64,0,1,float16,fp8,0,4.68452803293864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,fp8,0,1.5577120780944824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,float16,0,4.657247861226399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,64,128,1,fp8,fp8,0,2.0387627283732095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,float16,0,1.5910986264546711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,fp8,0,1.5735999743143718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,64,0,1,fp8,fp8,0,4.296266555786133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,fp8,0,4.713856061299642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,float16,0,4.733925183614095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,64,128,1,fp8,fp8,0,2.0606239636739097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,fp8,0,4.675301233927409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,float16,0,1.590394655863444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,64,0,1,fp8,fp8,0,4.276149431864421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,float16,0,4.709408124287923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,fp8,0,1.5663946469624836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,64,128,1,fp8,fp8,0,2.086575984954834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,float16,0,0.8931787014007568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,fp8,0,4.724512100219727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,64,0,1,fp8,fp8,0,4.324202537536621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,fp8,0,0.897216002146403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,64,128,1,fp8,fp8,0,1.1796106497446697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,float16,0,2.4507573445638022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,64,128,1,float16,float16,0,0.8247679869333903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,fp8,0,2.4227360089619956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,64,0,1,fp8,fp8,0,2.283408006032308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,64,0,1,float16,float16,0,2.364463965098063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,64,128,1,float16,fp8,0,0.8053973515828451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,64,128,1,fp8,fp8,0,1.0579040050506592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,float16,0,0.8274400234222412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,64,0,1,float16,fp8,0,2.368234634399414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,64,0,1,fp8,fp8,0,2.1819893519083657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,fp8,0,0.8146453698476156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,float16,0,2.351701259613037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,64,128,1,fp8,fp8,0,1.0624000231424968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,fp8,0,2.369706630706787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,float16,0,0.830293337504069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,64,0,1,fp8,fp8,0,2.179375966389974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,fp8,0,0.8145173390706381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,float16,0,2.386799971262614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,64,128,1,fp8,fp8,0,1.0709973176320393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,float16,0,0.831834634145101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,fp8,0,2.3423147201538086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,fp8,0,0.822645346323649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,64,0,1,fp8,fp8,0,2.1856533686319985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,64,128,1,fp8,fp8,0,1.0794133345286052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,float16,0,2.390607992808024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,float16,0,0.4848959843317668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,fp8,0,0.4905279874801636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,float16,0,1.264469305674235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,fp8,0,2.385493278503418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,64,0,1,fp8,fp8,0,2.2125066121419272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,64,128,1,fp8,fp8,0,0.632213314374288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,64,128,1,float16,float16,0,0.4543413321177165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,fp8,0,1.2743626435597737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,64,0,1,fp8,fp8,0,1.2065866788228352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,64,128,1,float16,fp8,0,0.4468640089035034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,64,0,1,float16,float16,0,1.2405866781870525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,64,128,1,fp8,fp8,0,0.5834773381551107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,64,0,1,float16,fp8,0,1.233237346013387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,64,0,1,fp8,fp8,0,1.1388373374938965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,float16,0,0.45653335253397626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,fp8,0,0.4510026772816976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,float16,0,1.2426186402638753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,64,128,1,fp8,fp8,0,0.5792800188064575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,fp8,0,1.2345653374989827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,64,0,1,fp8,fp8,0,1.156224012374878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,float16,0,0.4586506684621175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,fp8,0,0.4521919886271159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,float16,0,1.23526930809021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,64,128,1,fp8,fp8,0,0.5869866609573364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,fp8,0,1.2365919748942058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,float16,0,0.45757333437601727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,64,0,1,fp8,fp8,0,1.1441173553466797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,fp8,0,0.45685867468516034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,64,128,1,fp8,fp8,0,0.5893439849217733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,float16,0,1.2550346851348877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,fp8,0,1.2335093021392822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,float16,0,0.32342400153477985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,64,0,1,fp8,fp8,0,1.1537493069966633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,float16,0,0.725546677907308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,fp8,0,0.32516799370447796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,64,128,1,fp8,fp8,0,0.39327998956044513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,fp8,0,0.7327626546223959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,64,0,1,fp8,fp8,0,0.6475520133972168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,64,128,1,float16,float16,0,0.32293333609898883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,64,128,1,float16,fp8,0,0.3222986658414205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,64,0,1,float16,float16,0,0.7310826778411865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,64,128,1,fp8,fp8,0,0.39141865571339923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,fp8,0,0.32310400406519574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,64,0,1,float16,fp8,0,0.7348106702168783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,64,0,1,fp8,fp8,0,0.653493324915568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,float16,0,0.32466665903727215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,float16,0,0.7274026870727539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,64,128,1,fp8,fp8,0,0.38923199971516925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,float16,0,0.32293333609898883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,fp8,0,0.7301386992136637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,64,0,1,fp8,fp8,0,0.6534186601638794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,float16,0,0.7351360321044922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,fp8,0,0.3244746724764506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,64,128,1,fp8,fp8,0,0.3931359847386678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,fp8,0,0.7254026730855306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,float16,0,0.3237066666285197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,64,0,1,fp8,fp8,0,0.6490293343861898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,float16,0,0.7308853467305502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,fp8,0,0.32416532437006634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,64,128,1,fp8,fp8,0,0.3922186692555745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,fp8,0,0.7348746458689371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,64,0,1,fp8,fp8,0,0.6548106670379639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,64,128,1,float16,float16,0,4.117616017659505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,64,128,1,float16,fp8,0,4.0642134348551435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,64,128,1,fp8,fp8,0,5.346666971842448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,float16,0,4.233882586161296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,fp8,0,4.177541414896647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,64,0,1,float16,float16,0,9.13596280415853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,64,128,1,fp8,fp8,0,5.380197525024414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,64,0,1,fp8,fp8,0,8.350128173828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,64,0,1,float16,fp8,0,9.280319849650065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,float16,0,9.211109161376953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,float16,0,4.24181334177653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,fp8,0,4.194229443868001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,fp8,0,9.191493352254232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,64,128,1,fp8,fp8,0,5.419557571411133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,64,0,1,fp8,fp8,0,8.441776275634766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,float16,0,4.282037417093913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,float16,0,9.238719940185547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,fp8,0,4.291103998819987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,fp8,0,9.273141225179037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,float16,0,2.350015958150228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,64,0,1,fp8,fp8,0,8.48629887898763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,64,128,1,fp8,fp8,0,5.468720118204753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,float16,0,9.275423685709635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,fp8,0,2.311914602915446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,float16,0,4.884304046630859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,64,128,1,fp8,fp8,0,3.0072266260782876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,64,0,1,fp8,fp8,0,8.511066436767578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,fp8,0,9.228096008300781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,64,128,1,float16,float16,0,2.052464008331299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,fp8,0,4.806912104288737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,64,128,1,float16,fp8,0,2.0207626024881997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,64,0,1,fp8,fp8,0,4.540261268615723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,64,128,1,fp8,fp8,0,2.649376074473063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,64,0,1,float16,float16,0,4.630037307739258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,64,0,1,float16,fp8,0,4.5526078542073565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,64,0,1,fp8,fp8,0,4.19981320699056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,fp8,0,4.556495984395345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,float16,0,2.073904037475586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,fp8,0,2.042954603830973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,64,128,1,fp8,fp8,0,2.715002695719401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,float16,0,4.578543980916341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,64,0,1,fp8,fp8,0,4.236821174621582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,fp8,0,2.069279988606771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,float16,0,2.0993653933207193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,float16,0,4.616394678751628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,64,128,1,fp8,fp8,0,2.7148427963256836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,float16,0,2.1009440422058105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,fp8,0,4.5892534255981445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,64,0,1,fp8,fp8,0,4.243354797363281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,fp8,0,2.0758934020996094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,float16,0,4.651887893676758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,float16,0,1.1497920354207356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,64,128,1,fp8,fp8,0,2.7390613555908203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,fp8,0,1.1513439814249675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,float16,0,2.4121920267740884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,64,128,1,fp8,fp8,0,1.4977653821309407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,64,0,1,fp8,fp8,0,4.278805414835612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,fp8,0,4.614272117614746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,64,128,1,float16,float16,0,1.0472746690114338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,fp8,0,2.471120039621989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,64,0,1,fp8,fp8,0,2.2609920501708984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,64,128,1,float16,fp8,0,1.0369653701782227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,64,0,1,float16,float16,0,2.2958292961120605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,64,0,1,float16,fp8,0,2.27509339650472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,64,128,1,fp8,fp8,0,1.3514933586120605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,float16,0,1.0512746969858806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,64,0,1,fp8,fp8,0,2.1352747281392417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,fp8,0,1.032650629679362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,float16,0,2.3176159858703613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,64,128,1,fp8,fp8,0,1.3575894037882488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,fp8,0,2.2793760299682617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,float16,0,1.055834690729777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,64,0,1,fp8,fp8,0,2.1214027404785156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,fp8,0,1.0378719965616863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,64,128,1,fp8,fp8,0,1.375040054321289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,float16,0,2.3091519673665366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,float16,0,1.0611733595530193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,64,0,1,fp8,fp8,0,2.1251519521077475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,fp8,0,2.2891359329223633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,float16,0,2.307509263356527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,fp8,0,1.0471999645233154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,64,128,1,fp8,fp8,0,1.3855306307474773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,fp8,0,2.308922608693441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,float16,0,0.6017280022303263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,64,0,1,fp8,fp8,0,2.1443146069844565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,float16,0,1.2235626379648845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,fp8,0,0.6099626620610555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,64,128,1,fp8,fp8,0,0.7828799883524576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,64,128,1,float16,float16,0,0.555727998415629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,fp8,0,1.240336020787557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,64,0,1,fp8,fp8,0,1.1701707045237224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,64,0,1,float16,float16,0,1.189578692118327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,64,128,1,float16,fp8,0,0.5456586678822836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,64,128,1,fp8,fp8,0,0.7106773058573405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,64,0,1,float16,fp8,0,1.186570644378662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,float16,0,0.5601280132929484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,64,0,1,fp8,fp8,0,1.0989387035369873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,fp8,0,0.5490346749623617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,float16,0,1.1920106410980225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,64,128,1,fp8,fp8,0,0.7180480162302653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,float16,0,0.5614293416341146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,fp8,0,1.182965358098348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,64,0,1,fp8,fp8,0,1.0955519676208496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,fp8,0,0.5504853328069051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,float16,0,1.2010613282521565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,64,128,1,fp8,fp8,0,0.7215572992960612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,float16,0,0.565285325050354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,fp8,0,1.1852533022562664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,64,0,1,fp8,fp8,0,1.10316268603007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,fp8,0,0.5528373320897421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,float16,0,1.1983946959177654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,64,128,1,fp8,fp8,0,0.7264853318532308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,fp8,0,1.1974613666534424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,64,0,1,fp8,fp8,0,1.1197386582692463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,fp8,0,0.6600053310394287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,float16,0,0.3313279946645101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,float16,0,0.6545600096384684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,fp8,0,0.3364266554514567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,64,128,1,fp8,fp8,0,0.43253334363301593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,64,0,1,fp8,fp8,0,0.6019946734110514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,64,128,1,float16,float16,0,0.31190399328867596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,64,0,1,float16,float16,0,0.6406613190968832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,64,128,1,float16,fp8,0,0.3068693280220032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,64,128,1,fp8,fp8,0,0.40064001083374023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,64,0,1,float16,fp8,0,0.6341973145802816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,64,0,1,fp8,fp8,0,0.5702986717224121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,float16,0,0.3096426725387573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,float16,0,0.6372106472651163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,fp8,0,0.30829866727193195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,64,128,1,fp8,fp8,0,0.4013813336690267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,fp8,0,0.6324426730473837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,64,0,1,fp8,fp8,0,0.5733333428700765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,float16,0,0.31355732679367065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,float16,0,0.638479987780253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,fp8,0,0.3090719978014628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,fp8,0,0.3120959997177124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,64,128,1,fp8,fp8,0,0.40034135182698566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,fp8,0,0.6373386780420939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,64,0,1,fp8,fp8,0,0.5750293334325155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,float16,0,0.31410133838653564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,float16,0,0.6429493427276611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,64,128,1,fp8,fp8,0,0.4028746684392293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,fp8,0,0.6373333136240641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,64,0,1,fp8,fp8,0,0.5757120052973429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,float16,0,0.2246613303820292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,float16,0,0.37829331556955975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,fp8,0,0.2254400054613749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,64,128,1,fp8,fp8,0,0.27353066205978394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,fp8,0,0.3771093289057414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,64,0,1,fp8,fp8,0,0.3509813149770101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,64,128,1,float16,float16,0,0.22346667448679605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,64,0,1,float16,float16,0,0.3725653489430745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,64,128,1,float16,fp8,0,0.22334933280944824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,64,128,1,fp8,fp8,0,0.2720853288968404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,64,0,1,float16,fp8,0,0.3744586706161499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,64,0,1,fp8,fp8,0,0.35091201464335126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,float16,0,0.22431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,float16,0,0.3729066848754883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,fp8,0,0.2241013248761495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,64,128,1,fp8,fp8,0,0.27244265874226886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,fp8,0,0.37269333998362225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,64,0,1,fp8,fp8,0,0.35183465480804443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,float16,0,0.22406933705012003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,float16,0,0.3728586832682292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,fp8,0,0.22457599639892578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,64,128,1,fp8,fp8,0,0.27236799399058026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,fp8,0,0.3742719888687134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,64,0,1,fp8,fp8,0,0.35074134667714435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,float16,0,0.2233333388964335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,float16,0,0.37481598059336346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,fp8,0,0.2230506738026937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,64,128,1,fp8,fp8,0,0.27286932865778607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,fp8,0,0.37407465775807697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,64,0,1,fp8,fp8,0,0.3513439893722534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,64,128,1,float16,float16,0,3.0758771896362305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,64,128,1,float16,fp8,0,3.0253012975056968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,64,128,1,fp8,fp8,0,3.985194524129232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,64,0,1,float16,float16,0,5.7138824462890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,float16,0,3.131418546040853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,fp8,0,3.077050526936849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,64,0,1,fp8,fp8,0,5.198762575785319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,64,0,1,float16,fp8,0,5.601098378499349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,64,128,1,fp8,fp8,0,4.041861216227214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,float16,0,5.6804962158203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,float16,0,3.144048055013021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,fp8,0,3.088597297668457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,64,0,1,fp8,fp8,0,5.273301442464192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,fp8,0,5.7468210856119795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,64,128,1,fp8,fp8,0,4.096538543701172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,float16,0,5.7536055246988935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,64,0,1,fp8,fp8,0,5.273872057596843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,float16,0,3.170938809712728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,fp8,0,5.676917394002278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,fp8,0,3.1297388076782227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,float16,0,1.723141352335612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,64,128,1,fp8,fp8,0,4.116400082906087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,fp8,0,1.7102239926656086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,float16,0,5.760586420694987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,float16,0,3.016826629638672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,64,0,1,fp8,fp8,0,5.336453119913737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,64,128,1,fp8,fp8,0,2.225653330485026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,fp8,0,5.733840306599935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,64,128,1,float16,float16,0,1.5381919542948406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,fp8,0,2.973877271016439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,64,128,1,float16,fp8,0,1.5190720558166504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,64,0,1,fp8,fp8,0,2.848010698954264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,64,0,1,float16,float16,0,2.825584093729655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,64,128,1,fp8,fp8,0,1.9883999824523926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,float16,0,1.5453119277954102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,fp8,0,1.5238720575968425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,64,0,1,fp8,fp8,0,2.6020639737447104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,64,0,1,float16,fp8,0,2.773536046346029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,float16,0,2.8073867162068686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,64,128,1,fp8,fp8,0,1.9961919784545898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,fp8,0,2.8070507049560547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,64,0,1,fp8,fp8,0,2.613749345143636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,float16,0,1.5430506070454915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,fp8,0,1.5313173929850261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,float16,0,2.8399041493733725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,64,128,1,fp8,fp8,0,2.0131093660990396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,float16,0,1.5610826810201008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,fp8,0,2.786970774332682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,64,0,1,fp8,fp8,0,2.6130879720052085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,fp8,0,1.5348960558573406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,float16,0,2.8472105662027993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,64,128,1,fp8,fp8,0,2.034053325653076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,float16,0,0.8684106667836508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,fp8,0,0.8691840171813965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,float16,0,1.5090293884277344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,fp8,0,2.831509272257487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,64,0,1,fp8,fp8,0,2.645888010660807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,64,128,1,fp8,fp8,0,1.1250240008036296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,fp8,0,1.5129013061523438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,64,128,1,float16,float16,0,0.7924533685048422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,64,0,1,fp8,fp8,0,1.4251093864440918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,64,128,1,float16,fp8,0,0.7746773560841879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,64,0,1,float16,float16,0,1.4374613761901855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,64,128,1,fp8,fp8,0,1.0272160371144612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,64,0,1,float16,fp8,0,1.4305920600891113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,float16,0,0.7956319650014242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,64,0,1,fp8,fp8,0,1.3192533651987712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,fp8,0,0.7789333661397299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,float16,0,1.4465227127075195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,64,128,1,fp8,fp8,0,1.0174773534138997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,fp8,0,1.4250613848368328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,fp8,0,0.7812853654225668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,float16,0,0.7978879610697428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,64,0,1,fp8,fp8,0,1.3241546948750813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,float16,0,1.4476799964904785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,fp8,0,1.4356160163879395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,64,128,1,fp8,fp8,0,1.0322133700052898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,float16,0,0.8003093401590983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,fp8,0,1.4430987040201824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,64,0,1,fp8,fp8,0,1.321775992711385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,fp8,0,0.7941439946492513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,float16,0,1.4445652961730957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,64,128,1,fp8,fp8,0,1.0379306475321453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,float16,0,0.4561226765314738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,64,0,1,fp8,fp8,0,1.3454346656799316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,fp8,0,0.46242666244506836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,float16,0,0.7842506567637125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,64,128,1,fp8,fp8,0,0.596565326054891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,fp8,0,0.791541337966919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,64,0,1,fp8,fp8,0,0.7444746494293213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,64,128,1,float16,float16,0,0.4227946599324544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,64,0,1,float16,float16,0,0.7508479754130045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,64,128,1,float16,fp8,0,0.4158080021540324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,64,128,1,fp8,fp8,0,0.5457173188527426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,64,0,1,float16,fp8,0,0.7449759642283121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,float16,0,0.42298134167989093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,64,0,1,fp8,fp8,0,0.698421319325765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,float16,0,0.7517920335133871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,fp8,0,0.4168586730957031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,64,128,1,fp8,fp8,0,0.5487146774927775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,fp8,0,0.7489173412322998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,64,0,1,fp8,fp8,0,0.6964266300201416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,float16,0,0.4244266748428345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,float16,0,0.7589120070139567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,fp8,0,0.419648011525472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,64,128,1,fp8,fp8,0,0.5452160040537516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,fp8,0,0.7479680379231771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,64,0,1,fp8,fp8,0,0.7019893328348795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,float16,0,0.42909332116444904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,float16,0,0.758080005645752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,fp8,0,0.4222079912821452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,64,128,1,fp8,fp8,0,0.5558880170186361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,fp8,0,0.7528479894002279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,float16,0,0.2543519934018453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,64,0,1,fp8,fp8,0,0.6996906598409017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,64,0,1,fp8,fp8,0,0.3928426504135132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,64,128,1,float16,float16,0,0.2397759954134623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,fp8,0,0.25923200448354083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,float16,0,0.42470399538675946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,64,128,1,fp8,fp8,0,0.3330559929211934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,64,0,1,float16,fp8,0,0.40640532970428467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,fp8,0,0.42643733819325763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,64,0,1,float16,float16,0,0.40808534622192383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,64,128,1,float16,fp8,0,0.2353279987970988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,64,128,1,fp8,fp8,0,0.3088373343149821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,64,0,1,fp8,fp8,0,0.3681386709213257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,float16,0,0.24039467175801596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,float16,0,0.4090240001678467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,fp8,0,0.23741867144902548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,64,128,1,fp8,fp8,0,0.30726399024327594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,fp8,0,0.40533332029978436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,64,0,1,fp8,fp8,0,0.36510932445526123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,float16,0,0.24193066358566284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,float16,0,0.41089598337809247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,fp8,0,0.23853866259256998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,64,128,1,fp8,fp8,0,0.308351993560791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,fp8,0,0.4070826768875122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,64,0,1,fp8,fp8,0,0.36770133177439374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,float16,0,0.24186132351557413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,float16,0,0.4114026625951131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,fp8,0,0.17489065726598105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,fp8,0,0.24059200286865234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,64,128,1,fp8,fp8,0,0.31279999017715454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,fp8,0,0.41147732734680176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,64,0,1,fp8,fp8,0,0.36981332302093506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,float16,0,0.17524800697962442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,float16,0,0.25094399849573773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,64,128,1,fp8,fp8,0,0.21462933222452799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,fp8,0,0.2502773404121399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,64,0,1,fp8,fp8,0,0.23654399315516153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,64,128,1,float16,float16,0,0.17324266831080118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,64,0,1,float16,float16,0,0.24926400184631348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,64,128,1,float16,fp8,0,0.17263466119766235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,64,128,1,fp8,fp8,0,0.2103839914004008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,64,0,1,float16,fp8,0,0.24794133504231772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,64,0,1,fp8,fp8,0,0.2340373396873474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,float16,0,0.17251733938852945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,float16,0,0.24844799439112344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,fp8,0,0.17381866772969565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,64,128,1,fp8,fp8,0,0.2112906575202942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,fp8,0,0.24842133124669394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,64,0,1,fp8,fp8,0,0.23390400409698486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,float16,0,0.17299733559290567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,float16,0,0.24779200553894043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,fp8,0,0.17292799552281699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,64,128,1,fp8,fp8,0,0.21221333742141724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,fp8,0,0.24835733572642008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,64,0,1,fp8,fp8,0,0.23453332980473837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,float16,0,0.17337065935134888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,float16,0,0.24830400943756104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,fp8,0,0.17351466417312622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,64,128,1,fp8,fp8,0,0.2130240003267924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,fp8,0,0.24779200553894043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,64,0,1,fp8,fp8,0,0.23533866802851358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,64,128,1,float16,float16,0,4.0825761159261065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,64,128,1,float16,fp8,0,4.027157465616862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,64,0,1,float16,float16,0,5.9633331298828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,64,128,1,fp8,fp8,0,5.193072001139323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,float16,0,4.066447893778483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,64,0,1,fp8,fp8,0,5.389269510904948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,64,0,1,float16,fp8,0,5.950431823730469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,fp8,0,4.02184518178304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,float16,0,5.97437858581543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,64,128,1,fp8,fp8,0,5.293504079182942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,float16,0,4.0969492594401045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,fp8,0,5.896799723307292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,64,0,1,fp8,fp8,0,5.512165069580078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,fp8,0,4.062485376993815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,float16,0,5.974613189697266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,64,128,1,fp8,fp8,0,5.34449577331543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,fp8,0,5.938287734985352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,float16,0,4.1626027425130205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,64,0,1,fp8,fp8,0,5.542442957560222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,fp8,0,4.091914812723796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,float16,0,6.044981638590495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,float16,0,2.29750394821167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,64,128,1,fp8,fp8,0,5.3590240478515625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,float16,0,3.2464211781819663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,fp8,0,2.2574559847513833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,fp8,0,6.033418655395508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,64,0,1,fp8,fp8,0,5.612672170003255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,64,128,1,fp8,fp8,0,2.9467414220174155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,64,128,1,float16,float16,0,2.0171359380086265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,fp8,0,3.2140372594197593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,64,0,1,fp8,fp8,0,3.0362345377604165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,64,128,1,float16,fp8,0,1.9928372701009114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,64,0,1,float16,float16,0,2.9381707509358725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,64,128,1,fp8,fp8,0,2.5869599978129068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,float16,0,2.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,64,0,1,fp8,fp8,0,2.7157119115193686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,64,0,1,float16,fp8,0,2.900719960530599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,float16,0,2.9759041468302407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,fp8,0,1.9952905972798665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,64,0,1,fp8,fp8,0,2.720213254292806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,fp8,0,2.9192107518514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,64,128,1,fp8,fp8,0,2.6284640630086265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,float16,0,2.042261282602946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,fp8,0,2.003114700317383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,float16,0,2.970229466756185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,64,128,1,fp8,fp8,0,2.6495946248372397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,fp8,0,2.950021425882975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,64,0,1,fp8,fp8,0,2.7310187021891275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,float16,0,2.045098622639974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,fp8,0,2.0225067138671875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,float16,0,2.9934666951497397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,64,128,1,fp8,fp8,0,2.6421546936035156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,fp8,0,2.958458582560221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,float16,0,1.1332053343454997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,64,0,1,fp8,fp8,0,2.7545973459879556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,float16,0,1.6044960021972656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,fp8,0,1.1273173491160076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,64,128,1,fp8,fp8,0,1.4637333552042644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,fp8,0,1.6061065991719563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,64,128,1,float16,float16,0,1.0158452987670898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,64,0,1,fp8,fp8,0,1.5073973337809246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,64,128,1,float16,fp8,0,0.9984959761301676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,64,0,1,float16,float16,0,1.4924960136413574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,64,128,1,fp8,fp8,0,1.3130719661712646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,64,0,1,float16,fp8,0,1.472453276316325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,64,0,1,fp8,fp8,0,1.3680906295776367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,float16,0,1.0262293020884197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,float16,0,1.4970134099324544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,fp8,0,1.0034613609313965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,64,128,1,fp8,fp8,0,1.3262133598327637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,fp8,0,1.475754737854004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,64,0,1,fp8,fp8,0,1.3719040552775066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,fp8,0,1.4804266293843586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,float16,0,1.0264053344726562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,float16,0,1.4974346160888672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,fp8,0,1.0145173072814941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,64,128,1,fp8,fp8,0,1.3321706453959148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,64,0,1,fp8,fp8,0,1.3777813911437988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,float16,0,1.035205364227295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,fp8,0,1.0199146270751953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,float16,0,0.8221493562062582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,float16,0,1.5009867350260417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,64,128,1,fp8,fp8,0,1.3373386065165203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,fp8,0,1.4912692705790203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,64,0,1,fp8,fp8,0,1.3953280448913574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,float16,0,0.5838880141576132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,fp8,0,0.5848426818847656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,64,128,1,fp8,fp8,0,0.7579360008239746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,fp8,0,0.822821299235026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,64,0,1,fp8,fp8,0,0.7769066492716471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,64,128,1,float16,float16,0,0.5316106478373209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,64,0,1,float16,float16,0,0.7738880316416422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,64,128,1,float16,fp8,0,0.5247679948806763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,64,128,1,fp8,fp8,0,0.6898559729258219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,64,0,1,float16,fp8,0,0.7633439699808756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,64,0,1,fp8,fp8,0,0.7076533635457357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,float16,0,0.5335306723912557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,float16,0,0.7714560031890869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,fp8,0,0.5250613292058309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,64,128,1,fp8,fp8,0,0.6907467047373453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,fp8,0,0.7662719885508219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,64,0,1,fp8,fp8,0,0.7158026695251465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,float16,0,0.5355413357416788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,float16,0,0.773301362991333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,fp8,0,0.5271146694819132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,64,128,1,fp8,fp8,0,0.6926666895548502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,fp8,0,0.7664000193277994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,64,0,1,fp8,fp8,0,0.7162346839904785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,float16,0,0.5412373145421346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,float16,0,0.7820533116658529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,fp8,0,0.5329066514968872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,fp8,0,0.31590400139490765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,64,128,1,fp8,fp8,0,0.6986933549245199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,fp8,0,0.7741333643595377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,64,0,1,fp8,fp8,0,0.7207146485646566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,float16,0,0.3105333248774211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,float16,0,0.4339040120442708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,64,128,1,fp8,fp8,0,0.40965867042541504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,fp8,0,0.43777068456013996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,64,0,1,fp8,fp8,0,0.4073066711425781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,64,128,1,float16,float16,0,0.2873599926630656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,64,0,1,float16,float16,0,0.41145598888397217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,64,128,1,float16,fp8,0,0.28295467297236127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,64,128,1,fp8,fp8,0,0.3743679920832316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,64,0,1,float16,fp8,0,0.4073919852574666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,64,0,1,fp8,fp8,0,0.3704479932785034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,float16,0,0.28922667105992633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,float16,0,0.41130133469899494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,fp8,0,0.28545600175857544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,64,128,1,fp8,fp8,0,0.3741226593653361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,fp8,0,0.40742401281992596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,64,0,1,fp8,fp8,0,0.3736213445663452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,fp8,0,0.4096533457438151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,float16,0,0.28943999608357746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,float16,0,0.41416533788045246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,fp8,0,0.28495466709136963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,64,128,1,fp8,fp8,0,0.37669865290323895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,64,0,1,fp8,fp8,0,0.37326931953430176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,float16,0,0.29227733612060547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,float16,0,0.4158720175425212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,fp8,0,0.2897706627845764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,64,128,1,fp8,fp8,0,0.37938133875528973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,fp8,0,0.41397865613301593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,64,0,1,fp8,fp8,0,0.37598931789398193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,float16,0,0.17717333634694418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,float16,0,0.23448532819747925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,fp8,0,0.1811199982961019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,64,128,1,fp8,fp8,0,0.2344906727472941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,fp8,0,0.23747199773788452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,64,0,1,fp8,fp8,0,0.22510933876037598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,64,128,1,float16,float16,0,0.16660267114639282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,64,0,1,float16,float16,0,0.2171786626180013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,64,128,1,float16,fp8,0,0.16408000389734903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,64,128,1,fp8,fp8,0,0.21725332736968994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,64,0,1,float16,fp8,0,0.2155946691830953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,64,0,1,fp8,fp8,0,0.20709333817164102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,float16,0,0.1669173240661621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,float16,0,0.21888534228006998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,fp8,0,0.16506133476893106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,64,128,1,fp8,fp8,0,0.21545066436131796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,fp8,0,0.2163146734237671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,64,0,1,fp8,fp8,0,0.2075093388557434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,float16,0,0.16661333044370016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,float16,0,0.21925334135691324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,fp8,0,0.1660373310248057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,64,128,1,fp8,fp8,0,0.21779733896255493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,fp8,0,0.21870400508244833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,64,0,1,fp8,fp8,0,0.20747733116149902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,float16,0,0.16805332899093628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,float16,0,0.12545599540074667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,float16,0,0.22020800908406576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,fp8,0,0.1662773291269938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,64,128,1,fp8,fp8,0,0.21897600094477335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,fp8,0,0.21855467557907104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,64,0,1,fp8,fp8,0,0.20956265926361084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,float16,0,0.14881599942843118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,fp8,0,0.1255626678466797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,64,128,1,fp8,fp8,0,0.1551040013631185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,fp8,0,0.14896532893180847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,64,0,1,fp8,fp8,0,0.14044266939163208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,64,128,1,float16,float16,0,0.12105600039164226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,64,0,1,float16,float16,0,0.14544000228246054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,64,128,1,float16,fp8,0,0.12181333700815837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,64,128,1,fp8,fp8,0,0.14619200428326926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,64,0,1,float16,fp8,0,0.145797332127889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,64,0,1,fp8,fp8,0,0.13883733749389648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,float16,0,0.12334932883580525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,float16,0,0.14522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,fp8,0,0.12119999527931213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,64,128,1,fp8,fp8,0,0.14390400052070618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,fp8,0,0.14619200428326926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,64,0,1,fp8,fp8,0,0.13868799805641174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,float16,0,0.12244799733161926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,float16,0,0.14536000291506448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,fp8,0,0.12171733379364014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,64,128,1,fp8,fp8,0,0.14365866780281067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,fp8,0,0.1471733351548513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,64,0,1,fp8,fp8,0,0.13850667079289755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,float16,0,0.12190399567286174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,float16,0,0.14620799819628397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,fp8,0,0.12232533097267151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,64,128,1,fp8,fp8,0,0.14686933159828186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,fp8,0,0.14661866426467896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,64,0,1,fp8,fp8,0,0.13858133554458618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,64,128,1,float16,float16,0,3.020266532897949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,64,0,1,float16,float16,0,3.875962575276693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,64,128,1,float16,fp8,0,2.9892800649007163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,64,128,1,fp8,fp8,0,3.828826586405436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,64,0,1,fp8,fp8,0,3.497690518697103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,float16,0,3.0222934087117515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,64,0,1,float16,fp8,0,3.8427680333455405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,fp8,0,2.970613479614258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,float16,0,3.8797438939412436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,64,128,1,fp8,fp8,0,3.8635307947794595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,fp8,0,3.829893430074056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,64,0,1,fp8,fp8,0,3.5256694157918296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,float16,0,3.02998415629069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,fp8,0,2.9718399047851562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,float16,0,3.8968321482340493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,64,128,1,fp8,fp8,0,3.9313761393229165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,fp8,0,3.8680747350056968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,float16,0,3.0597171783447266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,64,0,1,fp8,fp8,0,3.5727628072102866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,fp8,0,3.0182774861653647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,float16,0,3.944981257120768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,64,128,1,fp8,fp8,0,3.956965446472168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,float16,0,1.6933973630269368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,fp8,0,3.9014612833658853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,64,0,1,fp8,fp8,0,3.638570785522461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,float16,0,2.12936004002889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,fp8,0,1.6691199938456218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,64,128,1,fp8,fp8,0,2.1884427070617676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,64,128,1,float16,float16,0,1.4932319323221843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,64,0,1,float16,float16,0,1.9429334004720051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,64,0,1,fp8,fp8,0,2.0014400482177734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,fp8,0,2.113866647084554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,64,128,1,float16,fp8,0,1.4643252690633137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,64,128,1,fp8,fp8,0,1.927061398824056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,64,0,1,float16,fp8,0,1.9096372922261555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,64,0,1,fp8,fp8,0,1.751738707224528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,float16,0,1.5102027257283528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,fp8,0,1.4786027272542317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,float16,0,1.9355093638102214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,64,128,1,fp8,fp8,0,1.9388373692830403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,fp8,0,1.9139787356058757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,64,0,1,fp8,fp8,0,1.7663680712382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,float16,0,1.5164532661437988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,fp8,0,1.4950133959452312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,float16,0,1.9478079477945964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,fp8,0,1.9203519821166992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,64,128,1,fp8,fp8,0,1.957914670308431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,64,0,1,fp8,fp8,0,1.7766613960266113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,float16,0,1.5226240158081055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,fp8,0,1.512170632680257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,float16,0,1.9599040349324544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,64,128,1,fp8,fp8,0,1.9934080441792805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,float16,0,0.8502986431121826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,float16,0,1.0696053504943848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,fp8,0,1.9427146911621094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,fp8,0,0.8464319705963135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,64,0,1,fp8,fp8,0,1.799242655436198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,64,128,1,fp8,fp8,0,1.0977013111114502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,fp8,0,1.0676639874776204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,64,0,1,fp8,fp8,0,1.0053760210673015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,64,128,1,float16,float16,0,0.7640799681345621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,64,0,1,float16,float16,0,0.9837120374043783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,64,128,1,float16,fp8,0,0.7501920064290365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,64,128,1,fp8,fp8,0,0.9865226745605469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,64,0,1,float16,fp8,0,0.9692426522572836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,64,0,1,fp8,fp8,0,0.9000106652577718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,float16,0,0.7663946946461996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,float16,0,0.9886773427327474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,fp8,0,0.7555359999338785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,64,128,1,fp8,fp8,0,0.9902400175730387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,fp8,0,0.9809439977010092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,64,0,1,fp8,fp8,0,0.8948533535003662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,float16,0,0.7688159942626953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,float16,0,0.9927893479665121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,fp8,0,0.7580373287200928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,64,128,1,fp8,fp8,0,1.0001599788665771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,fp8,0,0.9807360172271729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,64,0,1,fp8,fp8,0,0.9046133359273275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,float16,0,0.7807626724243164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,float16,0,0.9970719814300537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,fp8,0,0.7672533194224039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,64,128,1,fp8,fp8,0,1.011354684829712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,fp8,0,0.9910293420155843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,float16,0,0.4412906567255656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,64,0,1,fp8,fp8,0,0.9156053066253662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,float16,0,0.551797350247701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,fp8,0,0.4459679921468099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,64,128,1,fp8,fp8,0,0.5764106512069702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,64,0,1,float16,float16,0,0.5166986783345541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,fp8,0,0.5540800094604492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,64,0,1,fp8,fp8,0,0.5245013236999512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,64,128,1,float16,float16,0,0.40460801124572754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,64,128,1,float16,fp8,0,0.3946666717529297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,64,128,1,fp8,fp8,0,0.519322673479716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,64,0,1,float16,fp8,0,0.5101919968922933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,64,0,1,fp8,fp8,0,0.47113601366678876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,float16,0,0.4038453499476115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,float16,0,0.5158666769663492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,fp8,0,0.3978399833043416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,64,128,1,fp8,fp8,0,0.5247040192286173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,fp8,0,0.5105119943618774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,64,0,1,fp8,fp8,0,0.472213347752889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,float16,0,0.4083626667658488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,float16,0,0.518453319867452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,float16,0,0.4082933266957601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,fp8,0,0.4006613492965698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,64,128,1,fp8,fp8,0,0.5243146816889445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,fp8,0,0.513210654258728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,64,0,1,fp8,fp8,0,0.47842665513356525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,float16,0,0.5198933283487955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,fp8,0,0.40706666310628253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,64,128,1,fp8,fp8,0,0.531333327293396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,fp8,0,0.5159306526184082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,64,0,1,fp8,fp8,0,0.4795786539713542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,float16,0,0.23876800139745077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,float16,0,0.2971573273340861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,fp8,0,0.24290666977564493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,64,128,1,fp8,fp8,0,0.31566933790842694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,fp8,0,0.2998720010121663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,64,0,1,fp8,fp8,0,0.27642667293548584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,64,128,1,float16,float16,0,0.22076267004013062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,64,0,1,float16,fp8,0,0.2766186594963074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,64,0,1,float16,float16,0,0.27690666913986206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,64,128,1,float16,fp8,0,0.21730132897694907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,64,128,1,fp8,fp8,0,0.288917342821757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,64,0,1,fp8,fp8,0,0.2498133381207784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,float16,0,0.22025599082310995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,float16,0,0.27848533789316815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,fp8,0,0.21797333161036173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,64,128,1,fp8,fp8,0,0.2879573305447896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,fp8,0,0.27638934055964154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,64,0,1,fp8,fp8,0,0.25251734256744385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,fp8,0,0.27740800380706787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,float16,0,0.22164799769719443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,float16,0,0.280074675877889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,fp8,0,0.22013866901397705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,64,128,1,fp8,fp8,0,0.29151999950408936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,64,0,1,fp8,fp8,0,0.2524906595547994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,float16,0,0.22573866446812949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,float16,0,0.2818293372790019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,fp8,0,0.22209600607554117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,64,128,1,fp8,fp8,0,0.29135467608769733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,fp8,0,0.2805280089378357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,64,0,1,fp8,fp8,0,0.25568532943725586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,float16,0,0.14039466778437296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,float16,0,0.16123732924461365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,fp8,0,0.14230933785438538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,64,128,1,fp8,fp8,0,0.18278932571411133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,fp8,0,0.16320533553759256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,64,0,1,fp8,fp8,0,0.1560533344745636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,64,128,1,float16,float16,0,0.1285813351472219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,64,0,1,float16,float16,0,0.15055466691652933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,64,128,1,float16,fp8,0,0.12890666723251343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,64,128,1,fp8,fp8,0,0.16591466466585794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,64,0,1,float16,fp8,0,0.14966400464375815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,64,0,1,fp8,fp8,0,0.1413386662801107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,float16,0,0.13014933466911316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,float16,0,0.14899733662605286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,fp8,0,0.12785599629084268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,64,128,1,fp8,fp8,0,0.165610671043396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,fp8,0,0.14992533127466837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,64,0,1,fp8,fp8,0,0.14246400197347006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,float16,0,0.13014400005340576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,float16,0,0.15065600474675497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,fp8,0,0.12922133008639017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,fp8,0,0.13078400492668152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,64,128,1,fp8,fp8,0,0.1655306617418925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,fp8,0,0.1488800048828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,64,0,1,fp8,fp8,0,0.14359999696413675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,float16,0,0.13184000054995218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,float16,0,0.15107733011245728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,64,128,1,fp8,fp8,0,0.17173866430918375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,fp8,0,0.15157866477966309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,64,0,1,fp8,fp8,0,0.14597866932551065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,float16,0,0.09851200381914775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,float16,0,0.107232004404068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,fp8,0,0.09892800450325012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,64,128,1,fp8,fp8,0,0.12382933497428894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,fp8,0,0.10799466570218404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,64,0,1,fp8,fp8,0,0.10288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,64,128,1,float16,float16,0,0.09462933739026387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,64,0,1,float16,float16,0,0.10572800040245056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,64,128,1,float16,fp8,0,0.09422399600346883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,64,128,1,fp8,fp8,0,0.11378666758537292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,64,0,1,float16,fp8,0,0.10617066423098247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,64,0,1,fp8,fp8,0,0.10110400120417277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,float16,0,0.09483200311660767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,float16,0,0.10519466797510783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,fp8,0,0.09470933675765991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,64,128,1,fp8,fp8,0,0.11492799719174702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,fp8,0,0.10504532853762309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,64,0,1,fp8,fp8,0,0.1002293328444163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,float16,0,0.09470933675765991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,float16,0,0.09453866879145305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,float16,0,0.10655466715494792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,fp8,0,0.09485333164532979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,64,128,1,fp8,fp8,0,0.11372799674669902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,fp8,0,0.10550933082898457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,64,0,1,fp8,fp8,0,0.10149332880973816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,float16,0,0.10481066505114238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,fp8,0,0.0956053336461385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,64,128,1,fp8,fp8,0,0.11365866661071777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,fp8,0,0.10668800274531047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,64,0,1,fp8,fp8,0,0.10089066624641418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,64,128,1,float16,float16,0,3.66973876953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,64,128,1,float16,fp8,0,3.6825440724690757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,64,0,1,float16,float16,0,4.224794705708821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,64,0,1,fp8,fp8,0,3.7736425399780273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,64,0,1,float16,fp8,0,4.2215572992960615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,float16,0,3.7280639012654624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,64,128,1,fp8,fp8,0,4.868495941162109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,float16,0,4.262858708699544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,fp8,0,3.703632036844889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,fp8,0,4.238773345947266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,64,0,1,fp8,fp8,0,3.8113012313842773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,64,128,1,fp8,fp8,0,4.911994616190593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,float16,0,3.8320000966389975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,fp8,0,3.817647933959961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,float16,0,4.396266619364421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,64,128,1,fp8,fp8,0,5.067413330078125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,64,0,1,fp8,fp8,0,4.012655893961589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,fp8,0,4.33844788869222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,float16,0,3.8702081044514975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,fp8,0,3.88099733988444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,float16,0,4.394458770751953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,64,128,1,fp8,fp8,0,5.127546628316243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,float16,0,2.030677318572998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,float16,0,2.320453325907389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,fp8,0,2.0159200032552085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,fp8,0,4.403696060180664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,64,0,1,fp8,fp8,0,4.024330774943034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,64,128,1,fp8,fp8,0,2.5794453620910645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,64,0,1,fp8,fp8,0,2.0307092666625977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,fp8,0,2.2836586634318032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,64,128,1,float16,float16,0,1.8434933026631672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,64,128,1,float16,fp8,0,1.848031997680664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,64,0,1,float16,float16,0,2.1240533192952475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,64,128,1,fp8,fp8,0,2.426047960917155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,64,0,1,float16,fp8,0,2.108762741088867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,64,0,1,fp8,fp8,0,1.902506669362386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,float16,0,1.8593066533406575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,fp8,0,1.8538773854573567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,float16,0,2.1417439778645835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,64,128,1,fp8,fp8,0,2.467317263285319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,fp8,0,2.140773296356201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,64,0,1,fp8,fp8,0,1.9160693486531575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,float16,0,1.9106559753417969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,fp8,0,1.906325340270996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,float16,0,2.186175982157389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,fp8,0,2.1888160705566406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,64,128,1,fp8,fp8,0,2.533226648966471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,64,0,1,fp8,fp8,0,2.0021440188090005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,float16,0,1.9284693400065105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,fp8,0,1.9277973175048828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,float16,0,2.1864585876464844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,float16,0,1.0239733060201008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,64,128,1,fp8,fp8,0,2.5413440068562827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,fp8,0,2.199514706929525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,float16,0,1.160138686498006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,64,0,1,fp8,fp8,0,2.028437296549479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,fp8,0,1.0150346755981445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,64,128,1,fp8,fp8,0,1.2971733411153157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,fp8,0,1.145301342010498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,64,128,1,float16,float16,0,0.9323253631591797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,64,0,1,fp8,fp8,0,1.0286613305409749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,64,0,1,float16,float16,0,1.0720213254292805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,64,128,1,float16,fp8,0,0.9399200280507406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,64,128,1,fp8,fp8,0,1.2417439619700115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,64,0,1,float16,fp8,0,1.0732213656107585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,64,0,1,fp8,fp8,0,0.9560533364613851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,float16,0,0.93886931737264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,float16,0,1.079535961151123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,64,128,1,fp8,fp8,0,1.2436319986979167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,fp8,0,0.9423946539560953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,fp8,0,1.0791893005371094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,float16,0,0.9677066802978516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,64,0,1,fp8,fp8,0,0.975541353225708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,float16,0,1.0978506406148274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,fp8,0,0.9610506693522135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,fp8,0,1.101482629776001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,64,128,1,fp8,fp8,0,1.27347199122111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,64,0,1,fp8,fp8,0,0.995365301767985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,float16,0,0.9624959627787272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,float16,0,1.1020320256551106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,fp8,0,0.9659519990285238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,64,128,1,fp8,fp8,0,1.2798826694488525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,fp8,0,1.0949066480000813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,float16,0,0.5248426596323649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,64,0,1,fp8,fp8,0,1.0140000184377034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,float16,0,0.5965119997660319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,fp8,0,0.520522673924764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,64,128,1,fp8,fp8,0,0.6688586870829264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,fp8,0,0.5932000080744425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,64,0,1,fp8,fp8,0,0.5213173230489095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,64,128,1,float16,float16,0,0.47856001059214276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,64,0,1,float16,float16,0,0.5473599831263224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,64,128,1,float16,fp8,0,0.4799040158589681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,64,128,1,fp8,fp8,0,0.6301386753718058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,64,0,1,float16,fp8,0,0.5480533440907797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,64,0,1,fp8,fp8,0,0.492304007212321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,float16,0,0.48282134532928467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,float16,0,0.5500106811523438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,fp8,0,0.4808586835861206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,64,128,1,fp8,fp8,0,0.635807991027832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,fp8,0,0.5507946809132894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,64,0,1,fp8,fp8,0,0.49715201059977215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,float16,0,0.4923413197199504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,float16,0,0.5626933177312216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,fp8,0,0.4907146692276001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,64,128,1,fp8,fp8,0,0.6454240083694458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,fp8,0,0.559770663579305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,64,0,1,fp8,fp8,0,0.5061066548029581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,float16,0,0.49185601870218915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,float16,0,0.5637919902801514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,fp8,0,0.4937280019124349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,64,128,1,fp8,fp8,0,0.6551093260447184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,fp8,0,0.5599573453267416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,64,0,1,fp8,fp8,0,0.5097333192825317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,64,128,1,fp8,fp8,0,0.3521653413772583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,float16,0,0.27898667256037396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,float16,0,0.31487466891606647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,fp8,0,0.2767680088678996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,fp8,0,0.3131306568781535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,64,0,1,fp8,fp8,0,0.2670346697171529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,64,128,1,float16,float16,0,0.2519200046857198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,64,0,1,float16,float16,0,0.28889065980911255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,64,128,1,float16,fp8,0,0.2528480092684428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,64,128,1,fp8,fp8,0,0.33461864789326984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,64,0,1,float16,fp8,0,0.2897866765658061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,64,0,1,fp8,fp8,0,0.25325334072113037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,float16,0,0.2525973320007324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,float16,0,0.28801600138346356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,fp8,0,0.25381867090861004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,64,128,1,fp8,fp8,0,0.3367893298467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,fp8,0,0.28843732674916583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,64,0,1,fp8,fp8,0,0.2574666738510132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,float16,0,0.2593653400739034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,float16,0,0.2939466635386149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,fp8,0,0.25885866085688275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,64,128,1,fp8,fp8,0,0.3396586577097575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,64,0,1,fp8,fp8,0,0.2556053400039673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,fp8,0,0.29578665892283124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,float16,0,0.2602720061937968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,float16,0,0.29579732815424603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,fp8,0,0.2614933252334595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,64,128,1,fp8,fp8,0,0.3423733313878377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,fp8,0,0.2938026587168376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,64,0,1,fp8,fp8,0,0.26001065969467163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,float16,0,0.1527466674645742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,float16,0,0.16839466492335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,fp8,0,0.1536746621131897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,64,128,1,fp8,fp8,0,0.1928000052769979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,fp8,0,0.16716800133387247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,64,0,1,fp8,fp8,0,0.1453279952208201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,64,128,1,float16,float16,0,0.13748266299565634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,64,0,1,float16,float16,0,0.14966400464375815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,64,128,1,float16,fp8,0,0.13682132959365845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,64,128,1,fp8,fp8,0,0.1839466691017151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,64,0,1,float16,fp8,0,0.15009066462516785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,64,0,1,fp8,fp8,0,0.1351573367913564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,float16,0,0.1402453382809957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,float16,0,0.15036799510320029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,fp8,0,0.1397119959195455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,64,128,1,fp8,fp8,0,0.1835093299547831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,fp8,0,0.15015467007954916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,64,0,1,fp8,fp8,0,0.13523200154304504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,float16,0,0.14113066593805948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,float16,0,0.1534826656182607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,fp8,0,0.14149333039919534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,64,128,1,fp8,fp8,0,0.18747733036677042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,fp8,0,0.15260799725850424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,64,0,1,fp8,fp8,0,0.13902399937311807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,float16,0,0.1406880021095276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,float16,0,0.15317866206169128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,float16,0,0.0922986666361491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,fp8,0,0.14262400070826212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,64,128,1,fp8,fp8,0,0.11395733555157979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,64,128,1,fp8,fp8,0,0.18904000520706177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,fp8,0,0.15506133437156677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,64,0,1,fp8,fp8,0,0.1399946709473928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,float16,0,0.08945066730181377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,fp8,0,0.08801066875457764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,fp8,0,0.0916266640027364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,64,0,1,fp8,fp8,0,0.08390399813652039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,64,128,1,float16,float16,0,0.0780320018529892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,64,0,1,float16,float16,0,0.08455999692281087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,64,128,1,float16,fp8,0,0.07797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,64,128,1,fp8,fp8,0,0.10160533587137859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,64,0,1,float16,fp8,0,0.0831520011027654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,64,0,1,fp8,fp8,0,0.0758186678091685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,float16,0,0.0782239983479182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,float16,0,0.08425600330034892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,fp8,0,0.07899199922879536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,64,128,1,fp8,fp8,0,0.10283199946085612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,fp8,0,0.08437333504358928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,64,0,1,fp8,fp8,0,0.07681066791216533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,float16,0,0.07974400122960408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,float16,0,0.08646399776140849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,fp8,0,0.08083199958006541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,64,128,1,fp8,fp8,0,0.1072160005569458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,64,128,1,fp8,fp8,0,0.1071519951025645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,fp8,0,0.08553066849708557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,64,0,1,fp8,fp8,0,0.0784746656815211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,64,0,1,fp8,fp8,0,0.07888533174991608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,float16,0,0.08228266735871632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,float16,0,0.08559466401735942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,fp8,0,0.08077333370844524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,fp8,0,0.08546666304270427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,float16,0,0.05277866621812185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,64,128,1,float16,float16,0,0.04840533435344696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,float16,0,0.05429333448410034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,64,128,1,float16,fp8,0,0.048911998669306435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,fp8,0,0.05292266607284546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,64,128,1,fp8,fp8,0,0.0724426656961441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,fp8,0,0.054010664423306785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,64,0,1,fp8,fp8,0,0.04677866895993551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,64,0,1,fp8,fp8,0,0.04949333270390829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,64,0,1,float16,float16,0,0.04986133178075155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,64,128,1,fp8,fp8,0,0.06487999856472015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,64,0,1,float16,fp8,0,0.05034666756788889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,float16,0,0.04956266780694326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,float16,0,0.051088000337282814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,fp8,0,0.04967466493447622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,64,128,1,fp8,fp8,0,0.06623466809590657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,fp8,0,0.05019199848175049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,64,128,1,fp8,fp8,0,0.06559999783833821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,64,0,1,fp8,fp8,0,0.04811733464399973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,float16,0,0.04955733319123586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,float16,0,0.05226133267084757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,fp8,0,0.05115733544031779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,fp8,0,0.05167999863624573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,64,0,1,fp8,fp8,0,0.04789333542188009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,float16,0,0.050885334610939026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,float16,0,0.05202133456865946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,fp8,0,0.05016533533732096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,64,128,1,fp8,fp8,0,0.06533333162466685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,fp8,0,0.052933335304260254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,64,0,1,fp8,fp8,0,0.04808000226815542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,1,64,128,1,float16,float16,0,3.4753494262695312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,1,64,0,1,float16,float16,0,3.4419307708740234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,1,64,128,1,float16,fp8,0,3.4611145655314126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,1,64,0,1,float16,fp8,0,3.435610771179199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,1,64,128,1,fp8,fp8,0,4.563983917236328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,1,64,0,1,fp8,fp8,0,3.0630451838175454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,2,64,128,1,float16,float16,0,3.524245262145996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,2,64,0,1,float16,float16,0,3.4899412790934243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,2,64,128,1,float16,fp8,0,3.5225439071655273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,2,64,0,1,float16,fp8,0,3.495919863382975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,2,64,128,1,fp8,fp8,0,4.585242589314778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,2,64,0,1,fp8,fp8,0,3.0962985356648765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,4,64,128,1,float16,float16,0,3.62281068166097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,4,64,0,1,float16,float16,0,3.623407999674479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,4,64,128,1,float16,fp8,0,3.616037368774414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,4,64,0,1,float16,fp8,0,3.5970614751180015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,4,64,128,1,fp8,fp8,0,4.781824111938477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,4,64,0,1,fp8,fp8,0,3.2874186833699546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,8,64,128,1,float16,float16,0,3.6459732055664062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,8,64,0,1,float16,float16,0,3.624074618021647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,8,64,128,1,float16,fp8,0,3.6811253229777017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,48,64,128,1,float16,float16,0,1.949178695678711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,8,64,0,1,float16,fp8,0,3.664095878601074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,48,64,0,1,float16,float16,0,1.9278720219930012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,8,64,128,1,fp8,fp8,0,4.818938573201497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,8,64,0,1,fp8,fp8,0,3.31440003712972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,48,64,128,1,float16,fp8,0,1.9102026621500652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,48,64,0,1,float16,fp8,0,1.8884906768798828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,48,64,128,1,fp8,fp8,0,2.4235146840413413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,48,64,0,1,fp8,fp8,0,1.6666347185770671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,1,64,128,1,float16,float16,0,1.7496959368387859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,1,64,0,1,float16,float16,0,1.7316053708394368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,1,64,128,1,float16,fp8,0,1.7513173421223958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,1,64,0,1,fp8,fp8,0,1.5363146464029949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,1,64,128,1,fp8,fp8,0,2.296229362487793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,1,64,0,1,float16,fp8,0,1.7260746955871582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,2,64,0,1,float16,float16,0,1.7435894012451172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,2,64,128,1,float16,float16,0,1.764586607615153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,2,64,128,1,float16,fp8,0,1.7614453633626301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,2,64,0,1,fp8,fp8,0,1.557685375213623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,2,64,128,1,fp8,fp8,0,2.314805348714193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,4,64,128,1,float16,fp8,0,1.8170773188273113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,2,64,0,1,float16,fp8,0,1.7510132789611816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,4,64,128,1,float16,float16,0,1.8216800689697266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,4,64,0,1,float16,float16,0,1.79585599899292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,4,64,0,1,float16,fp8,0,1.8015626271565754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,4,64,0,1,fp8,fp8,0,1.635466734568278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,4,64,128,1,fp8,fp8,0,2.3952372868855796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,8,64,128,1,float16,float16,0,1.8253599802652996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,8,64,0,1,float16,float16,0,1.815882682800293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,8,64,128,1,float16,fp8,0,1.8390827178955078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,48,64,128,1,float16,float16,0,0.9769173463185629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,8,64,0,1,float16,fp8,0,1.8113919893900554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,48,64,0,1,float16,float16,0,0.9614346822102865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,8,64,0,1,fp8,fp8,0,1.6490613619486492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,8,64,128,1,fp8,fp8,0,2.408496061960856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,48,64,128,1,float16,fp8,0,0.9651467005411783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,48,64,0,1,fp8,fp8,0,0.8400266965230306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,48,64,128,1,fp8,fp8,0,1.2372693220774333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,48,64,0,1,float16,fp8,0,0.9560800393422445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,1,64,128,1,float16,float16,0,0.8849706649780273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,1,64,0,1,float16,float16,0,0.8729120095570883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,1,64,128,1,float16,fp8,0,0.8905866940816244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,1,64,128,1,fp8,fp8,0,1.1679946581522624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,1,64,0,1,float16,fp8,0,0.8782239754994711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,1,64,0,1,fp8,fp8,0,0.7730773289998373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,2,64,128,1,float16,float16,0,0.8975626627604166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,2,64,0,1,float16,float16,0,0.8845653533935547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,2,64,128,1,float16,fp8,0,0.8924053510030111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,2,64,128,1,fp8,fp8,0,1.171679973602295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,2,64,0,1,float16,fp8,0,0.8854773044586182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,2,64,0,1,fp8,fp8,0,0.7873866558074951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,4,64,128,1,float16,float16,0,0.9165013631184896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,4,64,0,1,float16,float16,0,0.9084800084431967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,4,64,128,1,float16,fp8,0,0.918341318766276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,4,64,128,1,fp8,fp8,0,1.2060373624165852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,4,64,0,1,float16,fp8,0,0.900165319442749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,4,64,0,1,fp8,fp8,0,0.8139839967091879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,8,64,128,1,float16,float16,0,0.9164266586303711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,8,64,0,1,float16,float16,0,0.9059999783833822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,8,64,128,1,float16,fp8,0,0.9122453530629476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,48,64,128,1,float16,float16,0,0.5020373264948527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,8,64,128,1,fp8,fp8,0,1.210810661315918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,8,64,0,1,float16,fp8,0,0.9049599965413412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,8,64,0,1,fp8,fp8,0,0.8249546686808268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,48,64,0,1,float16,float16,0,0.4984000126520793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,48,64,128,1,float16,fp8,0,0.4973440170288086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,48,64,128,1,fp8,fp8,0,0.6337600151697794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,48,64,0,1,float16,fp8,0,0.48796268304189044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,48,64,0,1,fp8,fp8,0,0.4300373395284017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,1,64,128,1,float16,float16,0,0.45636268456776935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,1,64,0,1,float16,float16,0,0.44706666469573975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,1,64,128,1,float16,fp8,0,0.45691200097401935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,1,64,128,1,fp8,fp8,0,0.5955520073572794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,1,64,0,1,float16,fp8,0,0.44734398523966473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,1,64,0,1,fp8,fp8,0,0.39709333578745526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,2,64,128,1,float16,float16,0,0.4574506680170695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,2,64,0,1,float16,float16,0,0.45343466599782306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,2,64,128,1,float16,fp8,0,0.4596373240152995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,2,64,128,1,fp8,fp8,0,0.6005973418553671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,2,64,0,1,float16,fp8,0,0.45420801639556885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,2,64,0,1,fp8,fp8,0,0.4035573403040568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,4,64,128,1,fp8,fp8,0,0.6184426546096802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,4,64,128,1,float16,float16,0,0.4680639902750651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,4,64,0,1,float16,float16,0,0.4602880080540975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,4,64,128,1,float16,fp8,0,0.46792534987131756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,4,64,0,1,float16,fp8,0,0.4609440167744954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,4,64,0,1,fp8,fp8,0,0.41545601685841876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,8,64,128,1,fp8,fp8,0,0.6167946656545004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,8,64,128,1,float16,float16,0,0.4699466625849406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,8,64,0,1,float16,float16,0,0.46105066935221356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,8,64,128,1,float16,fp8,0,0.47036266326904297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,8,64,0,1,float16,fp8,0,0.45977067947387695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,8,64,0,1,fp8,fp8,0,0.41803733507792157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,48,64,128,1,float16,float16,0,0.26571200291315716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,48,64,0,1,float16,float16,0,0.2604479988416036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,48,64,128,1,float16,fp8,0,0.2659413417180379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,48,64,128,1,fp8,fp8,0,0.33449065685272217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,48,64,0,1,float16,fp8,0,0.25723199049631756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,48,64,0,1,fp8,fp8,0,0.2228906750679016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,1,64,128,1,float16,float16,0,0.24046399195988974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,1,64,0,1,float16,float16,0,0.23466666539510092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,1,64,128,1,float16,fp8,0,0.24226667483647665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,2,64,128,1,float16,fp8,0,0.24276266495386759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,1,64,128,1,fp8,fp8,0,0.3131733338038127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,1,64,0,1,float16,fp8,0,0.2349440058072408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,1,64,0,1,fp8,fp8,0,0.208079993724823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,2,64,128,1,float16,float16,0,0.2421226700146993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,2,64,0,1,float16,float16,0,0.23825599749883017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,2,64,128,1,fp8,fp8,0,0.317738672097524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,2,64,0,1,float16,fp8,0,0.23641600211461386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,2,64,0,1,fp8,fp8,0,0.21052267154057822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,4,64,128,1,float16,float16,0,0.24688533941904703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,4,64,0,1,float16,float16,0,0.24201067288716635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,8,64,0,1,float16,float16,0,0.24322134256362915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,4,64,128,1,float16,fp8,0,0.24885867039362589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,4,64,128,1,fp8,fp8,0,0.3216373324394226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,4,64,0,1,float16,fp8,0,0.24315200249354044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,4,64,0,1,fp8,fp8,0,0.2137333353360494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,8,64,128,1,float16,float16,0,0.24664533138275146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,8,64,128,1,float16,fp8,0,0.24670400222142538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,8,64,128,1,fp8,fp8,0,0.323472003142039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,8,64,0,1,float16,fp8,0,0.2425653338432312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,8,64,0,1,fp8,fp8,0,0.21772799889246622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,48,64,128,1,float16,float16,0,0.14776000380516052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,48,64,0,1,float16,float16,0,0.14498133460680643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,48,64,128,1,float16,fp8,0,0.14542933305104574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,48,64,128,1,fp8,fp8,0,0.18476800123850504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,48,64,0,1,float16,fp8,0,0.14215999841690063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,48,64,0,1,fp8,fp8,0,0.12383466958999634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,1,64,128,1,float16,float16,0,0.1318933367729187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,1,64,0,1,float16,float16,0,0.1267359952131907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,1,64,128,1,float16,fp8,0,0.1328373352686564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,1,64,128,1,fp8,fp8,0,0.173199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,1,64,0,1,float16,fp8,0,0.12575999895731607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,1,64,0,1,fp8,fp8,0,0.11204800009727478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,2,64,128,1,float16,float16,0,0.13100266456604004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,2,64,0,1,float16,float16,0,0.127210666735967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,2,64,128,1,float16,fp8,0,0.13243732849756876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,2,64,128,1,fp8,fp8,0,0.17511467138926187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,2,64,0,1,float16,fp8,0,0.12688533465067545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,4,64,0,1,float16,fp8,0,0.12794133027394614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,2,64,0,1,fp8,fp8,0,0.11337066690127055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,4,64,128,1,float16,float16,0,0.1343786617120107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,4,64,0,1,float16,float16,0,0.12896533807118735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,4,64,128,1,float16,fp8,0,0.13449600338935852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,4,64,128,1,fp8,fp8,0,0.17786665757497153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,4,64,0,1,fp8,fp8,0,0.11691733201344807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,8,64,128,1,float16,float16,0,0.135754664738973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,8,64,0,1,float16,float16,0,0.12827733159065247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,8,64,128,1,float16,fp8,0,0.13689066966374716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,8,64,128,1,fp8,fp8,0,0.17838933070500693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,8,64,0,1,float16,fp8,0,0.12928533554077148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,8,64,0,1,fp8,fp8,0,0.11647466818491618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,48,64,128,1,float16,float16,0,0.08637866377830505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,48,64,0,1,float16,float16,0,0.07967466612656911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,48,64,128,1,float16,fp8,0,0.08574933807055156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,48,64,128,1,fp8,fp8,0,0.10761599739392598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,48,64,0,1,float16,fp8,0,0.07983999947706859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,48,64,0,1,fp8,fp8,0,0.07099733253320058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,1,64,128,1,float16,float16,0,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,1,64,0,1,float16,float16,0,0.0705866664648056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,1,64,128,1,float16,fp8,0,0.0761706680059433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,1,64,128,1,fp8,fp8,0,0.09805333614349365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,1,64,0,1,float16,fp8,0,0.0703413337469101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,1,64,0,1,fp8,fp8,0,0.06428800026575725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,2,64,128,1,float16,float16,0,0.0759626676638921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,2,64,0,1,float16,float16,0,0.07165866593519847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,2,64,128,1,float16,fp8,0,0.07675200204054515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,2,64,128,1,fp8,fp8,0,0.09693333506584167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,2,64,0,1,float16,fp8,0,0.07214400172233582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,2,64,0,1,fp8,fp8,0,0.06479999919732411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,4,64,128,1,float16,float16,0,0.07803733150164287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,4,64,0,1,float16,float16,0,0.0724426656961441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,4,64,128,1,float16,fp8,0,0.07765866816043854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,4,64,128,1,fp8,fp8,0,0.09920000036557515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,4,64,0,1,float16,fp8,0,0.07320000231266022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,4,64,0,1,fp8,fp8,0,0.06648000081380208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,8,64,128,1,float16,float16,0,0.0783733328183492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,8,64,0,1,float16,float16,0,0.07477866609891255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,8,64,128,1,float16,fp8,0,0.07789866626262665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,8,64,128,1,fp8,fp8,0,0.09980266292889912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,8,64,0,1,float16,fp8,0,0.07392000158627827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,8,64,0,1,fp8,fp8,0,0.06675200164318085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,48,64,128,1,float16,float16,0,0.052298665046691895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,48,64,0,1,float16,float16,0,0.0470773329337438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,48,64,128,1,float16,fp8,0,0.050885334610939026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,48,64,128,1,fp8,fp8,0,0.06810133159160614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,48,64,0,1,float16,fp8,0,0.04635733366012573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,48,64,0,1,fp8,fp8,0,0.04285866518815359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,1,64,0,1,fp8,fp8,0,0.0407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,2,64,128,1,float16,float16,0,0.04794133206208547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,1,64,128,1,float16,float16,0,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,1,64,0,1,float16,float16,0,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,1,64,128,1,float16,fp8,0,0.048826664686203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,1,64,128,1,fp8,fp8,0,0.062074666221936546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,1,64,0,1,float16,fp8,0,0.04354133208592733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,2,64,0,1,float16,float16,0,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,2,64,128,1,float16,fp8,0,0.04867733518282572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,2,64,128,1,fp8,fp8,0,0.06173333525657654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,2,64,0,1,float16,fp8,0,0.04422399898370107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,2,64,0,1,fp8,fp8,0,0.04062933226426443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,4,64,128,1,float16,float16,0,0.04962133367856344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,4,64,0,1,float16,float16,0,0.04506133496761322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,4,64,128,1,float16,fp8,0,0.04923733572165171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,4,64,128,1,fp8,fp8,0,0.06277333199977875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,4,64,0,1,float16,fp8,0,0.04573333263397217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,4,64,0,1,fp8,fp8,0,0.04126933217048645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,8,64,128,1,float16,float16,0,0.04890666902065277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,8,64,0,1,float16,float16,0,0.04445866743723551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,8,64,128,1,float16,fp8,0,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,8,64,128,1,fp8,fp8,0,0.06386666496594746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,8,64,0,1,float16,fp8,0,0.04514666895071665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,8,64,0,1,fp8,fp8,0,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,48,64,128,1,float16,float16,0,0.03701333453257879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,48,64,0,1,float16,float16,0,0.03419200082619985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,48,64,128,1,float16,fp8,0,0.036602665980656944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,48,64,128,1,fp8,fp8,0,0.04221333563327789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,48,64,0,1,float16,fp8,0,0.034330666065216064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,48,64,0,1,fp8,fp8,0,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,1,64,128,1,float16,float16,0,0.03426666557788849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,1,64,0,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,1,64,128,1,float16,fp8,0,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,1,64,128,1,fp8,fp8,0,0.04027733455101649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,1,64,0,1,float16,fp8,0,0.031957333286603294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,1,64,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,2,64,128,1,float16,float16,0,0.0349386657277743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,2,64,0,1,float16,float16,0,0.03221333275238673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,2,64,128,1,float16,fp8,0,0.03454933315515518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,2,64,128,1,fp8,fp8,0,0.040607998768488564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,2,64,0,1,float16,fp8,0,0.03252266595760981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,2,64,0,1,fp8,fp8,0,0.029674666623274486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,4,64,128,1,float16,float16,0,0.03509333233038584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,4,64,0,1,float16,float16,0,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,4,64,128,1,float16,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,4,64,128,1,fp8,fp8,0,0.04159466673930486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,4,64,0,1,float16,fp8,0,0.032501332461833954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,4,64,0,1,fp8,fp8,0,0.03053866575161616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,8,64,128,1,float16,float16,0,0.03612799942493439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,8,64,0,1,float16,float16,0,0.0322773332397143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,8,64,128,1,float16,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,8,64,128,1,fp8,fp8,0,0.04192000130812327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,8,64,0,1,float16,fp8,0,0.033173332611719765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,8,64,0,1,fp8,fp8,0,0.03070399910211563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,1,64,128,1,float16,float16,0,1.4681065877278645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,1,64,0,1,float16,float16,0,1.4265066782633464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,1,64,128,1,float16,fp8,0,1.4756852785746257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,1,64,0,1,float16,fp8,0,1.4223839441935222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,1,64,0,1,fp8,fp8,0,1.3213333288828533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,1,64,128,1,fp8,fp8,0,2.0083893140157065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,2,64,128,1,float16,float16,0,1.4961759249369304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,2,64,0,1,float16,float16,0,1.4476693471272786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,2,64,128,1,float16,fp8,0,1.480730692545573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,2,64,0,1,fp8,fp8,0,1.356650670369466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,2,64,0,1,float16,fp8,0,1.4455040295918782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,2,64,128,1,fp8,fp8,0,2.0349599520365396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,4,64,128,1,float16,float16,0,1.5349547068277996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,4,64,0,1,float16,float16,0,1.4964159329732258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,4,64,128,1,float16,fp8,0,1.5372427304585774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,4,64,0,1,float16,fp8,0,1.4878452618916829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,4,64,128,1,fp8,fp8,0,2.0904693603515625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,4,64,0,1,fp8,fp8,0,1.4212692578633626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,8,64,128,1,float16,float16,0,1.5557972590128581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,8,64,0,1,float16,float16,0,1.5210879643758137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,8,64,128,1,float16,fp8,0,1.5561973253885906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,48,64,0,1,float16,float16,0,0.8164052963256836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,48,64,128,1,float16,float16,0,0.8362987041473389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,8,64,0,1,float16,fp8,0,1.5151893297831218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,48,64,128,1,float16,fp8,0,0.8192319869995117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,8,64,128,1,fp8,fp8,0,2.1107199986775718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,8,64,0,1,fp8,fp8,0,1.4301279385884602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,48,64,128,1,fp8,fp8,0,1.0766026973724365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,48,64,0,1,float16,fp8,0,0.804207960764567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,48,64,0,1,fp8,fp8,0,0.7348852952321371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,1,64,128,1,float16,float16,0,0.7434720198313395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,1,64,128,1,float16,fp8,0,0.7463253339131674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,1,64,0,1,float16,float16,0,0.7260586420694987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,1,64,0,1,float16,fp8,0,0.7247786521911621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,1,64,128,1,fp8,fp8,0,1.0088319778442383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,1,64,0,1,fp8,fp8,0,0.6671786308288574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,2,64,128,1,float16,float16,0,0.7538932959238688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,2,64,0,1,float16,float16,0,0.7302026748657227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,2,64,128,1,float16,fp8,0,0.7535040378570557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,2,64,128,1,fp8,fp8,0,1.020576000213623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,2,64,0,1,float16,fp8,0,0.7330613136291504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,4,64,128,1,float16,fp8,0,0.7708053588867188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,2,64,0,1,fp8,fp8,0,0.6783466339111328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,4,64,128,1,float16,float16,0,0.7721652984619141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,4,64,0,1,float16,float16,0,0.7567306359608968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,4,64,0,1,float16,fp8,0,0.7532906532287598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,4,64,128,1,fp8,fp8,0,1.0484426816304524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,4,64,0,1,fp8,fp8,0,0.7071359952290853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,8,64,128,1,float16,float16,0,0.7700746854146322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,8,64,0,1,float16,fp8,0,0.7551679611206055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,8,64,0,1,float16,float16,0,0.7523466746012369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,8,64,128,1,float16,fp8,0,0.7736426989237467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,8,64,128,1,fp8,fp8,0,1.0653546651204426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,48,64,128,1,fp8,fp8,0,0.5462133487065634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,48,64,0,1,float16,fp8,0,0.4161493380864461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,8,64,0,1,fp8,fp8,0,0.7262933254241943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,1,64,128,1,float16,float16,0,0.38171199957529706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,48,64,128,1,float16,float16,0,0.43348264694213867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,48,64,0,1,float16,float16,0,0.423583984375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,48,64,128,1,float16,fp8,0,0.42532265186309814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,48,64,0,1,fp8,fp8,0,0.37062935034434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,1,64,0,1,float16,float16,0,0.3723626534144084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,1,64,128,1,float16,fp8,0,0.38044265906016034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,1,64,128,1,fp8,fp8,0,0.5128906567891439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,1,64,0,1,float16,fp8,0,0.3715306520462036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,1,64,0,1,fp8,fp8,0,0.34419198830922443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,2,64,128,1,fp8,fp8,0,0.5218933423360189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,2,64,0,1,float16,fp8,0,0.3746346632639567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,2,64,128,1,float16,float16,0,0.3851999839146932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,2,64,0,1,float16,float16,0,0.3738986651102702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,2,64,128,1,float16,fp8,0,0.38427734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,2,64,0,1,fp8,fp8,0,0.3500639994939168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,4,64,128,1,fp8,fp8,0,0.5236959854761759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,4,64,128,1,float16,float16,0,0.39607465267181396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,4,64,0,1,float16,float16,0,0.38768001397450763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,4,64,128,1,float16,fp8,0,0.3962826728820801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,4,64,0,1,float16,fp8,0,0.38630398114522296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,4,64,0,1,fp8,fp8,0,0.3526879946390788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,8,64,128,1,float16,float16,0,0.3954026699066162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,8,64,0,1,float16,float16,0,0.38654931386311847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,8,64,128,1,float16,fp8,0,0.39561065038045246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,8,64,128,1,fp8,fp8,0,0.5378826856613159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,8,64,0,1,float16,fp8,0,0.3880426486333211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,8,64,0,1,fp8,fp8,0,0.36879467964172363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,48,64,128,1,float16,float16,0,0.23043199380238852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,48,64,0,1,float16,float16,0,0.22774400313695273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,48,64,128,1,float16,fp8,0,0.22643200556437174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,48,64,128,1,fp8,fp8,0,0.2806346615155538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,48,64,0,1,float16,fp8,0,0.22554133335749307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,1,64,128,1,fp8,fp8,0,0.26608532667160034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,48,64,0,1,fp8,fp8,0,0.19585599501927695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,1,64,128,1,float16,float16,0,0.20129066705703735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,1,64,0,1,float16,float16,0,0.19601066907246908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,1,64,128,1,float16,fp8,0,0.20138132572174072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,1,64,0,1,float16,fp8,0,0.19528534015019736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,1,64,0,1,fp8,fp8,0,0.1839466691017151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,2,64,128,1,float16,float16,0,0.20177600781122842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,2,64,0,1,float16,float16,0,0.19577600558598837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,2,64,128,1,float16,fp8,0,0.20306666692097983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,4,64,128,1,float16,fp8,0,0.20638400316238403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,2,64,128,1,fp8,fp8,0,0.2701173424720764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,2,64,0,1,float16,fp8,0,0.19739200671513876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,2,64,0,1,fp8,fp8,0,0.1859626571337382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,4,64,128,1,float16,float16,0,0.20645866791407266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,4,64,0,1,float16,float16,0,0.2012959917386373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,4,64,128,1,fp8,fp8,0,0.2717760006586711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,4,64,0,1,float16,fp8,0,0.20311999320983887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,4,64,0,1,fp8,fp8,0,0.18617600202560425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,8,64,128,1,float16,float16,0,0.20972800254821777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,8,64,0,1,float16,float16,0,0.20565332969029745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,8,64,128,1,float16,fp8,0,0.2087679902712504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,8,64,128,1,fp8,fp8,0,0.2767733335494995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,8,64,0,1,float16,fp8,0,0.20302400986353555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,8,64,0,1,fp8,fp8,0,0.19125332434972128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,48,64,128,1,float16,float16,0,0.12735467155774435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,48,64,0,1,float16,float16,0,0.12599999705950418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,48,64,128,1,float16,fp8,0,0.1272266705830892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,48,64,128,1,fp8,fp8,0,0.14990933736165366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,48,64,0,1,float16,fp8,0,0.12343466281890869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,48,64,0,1,fp8,fp8,0,0.11072533329327901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,1,64,0,1,float16,fp8,0,0.10357333223025005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,1,64,128,1,float16,float16,0,0.10530666510264079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,1,64,0,1,float16,float16,0,0.10262399911880493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,1,64,128,1,float16,fp8,0,0.10679466525713603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,1,64,128,1,fp8,fp8,0,0.1418560047944387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,1,64,0,1,fp8,fp8,0,0.10035733381907146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,2,64,0,1,float16,fp8,0,0.10428266723950703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,2,64,128,1,float16,float16,0,0.10706667105356853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,2,64,0,1,float16,float16,0,0.10337600111961365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,2,64,128,1,float16,fp8,0,0.10590400298436482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,4,64,128,1,float16,fp8,0,0.1086133321126302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,2,64,128,1,fp8,fp8,0,0.14190933108329773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,2,64,0,1,fp8,fp8,0,0.10099732875823975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,4,64,128,1,float16,float16,0,0.10877866546312968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,4,64,0,1,float16,float16,0,0.10776000221570332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,4,64,128,1,fp8,fp8,0,0.14325867096583048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,4,64,0,1,float16,fp8,0,0.10583466291427612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,4,64,0,1,fp8,fp8,0,0.10285333792368571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,8,64,128,1,float16,float16,0,0.11014933387438457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,8,64,0,1,float16,float16,0,0.10789866248766582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,8,64,128,1,float16,fp8,0,0.11026133100191753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,8,64,128,1,fp8,fp8,0,0.14666666587193808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,8,64,0,1,float16,fp8,0,0.10694400469462077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,8,64,0,1,fp8,fp8,0,0.10459199547767639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,48,64,128,1,float16,float16,0,0.07262933254241943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,48,64,0,1,float16,float16,0,0.07479466497898102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,48,64,128,1,float16,fp8,0,0.07110400001207988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,48,64,128,1,fp8,fp8,0,0.08614400029182434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,48,64,0,1,float16,fp8,0,0.07336000104745229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,48,64,0,1,fp8,fp8,0,0.06460799773534139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,1,64,128,1,float16,float16,0,0.061162665486335754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,1,64,0,1,float16,float16,0,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,1,64,128,1,float16,fp8,0,0.061333333452542625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,1,64,128,1,fp8,fp8,0,0.07675200204054515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,1,64,0,1,float16,fp8,0,0.06404266754786174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,1,64,0,1,fp8,fp8,0,0.057328000664711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,2,64,128,1,float16,float16,0,0.06387733419736226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,2,64,0,1,float16,float16,0,0.06241066753864288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,2,64,128,1,float16,fp8,0,0.06211199859778086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,2,64,128,1,fp8,fp8,0,0.07771199941635132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,2,64,0,1,float16,fp8,0,0.06261866788069408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,2,64,0,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,4,64,128,1,float16,float16,0,0.06382933259010315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,4,64,0,1,float16,float16,0,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,4,64,128,1,float16,fp8,0,0.06482666730880737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,4,64,128,1,fp8,fp8,0,0.07934933404127757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,4,64,0,1,float16,fp8,0,0.06443200012048085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,4,64,0,1,fp8,fp8,0,0.058373332023620605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,8,64,128,1,float16,float16,0,0.06363200147946675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,8,64,0,1,float16,float16,0,0.06413333117961884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,8,64,128,1,float16,fp8,0,0.06407466530799866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,8,64,128,1,fp8,fp8,0,0.08009600142637889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,8,64,0,1,float16,fp8,0,0.06618133187294006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,8,64,0,1,fp8,fp8,0,0.06007466713587443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,48,64,128,1,float16,float16,0,0.041135999063650765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,48,64,0,1,float16,float16,0,0.041306667029857635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,48,64,128,1,float16,fp8,0,0.04085333396991094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,48,64,128,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,48,64,0,1,float16,fp8,0,0.04001066585381826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,48,64,0,1,fp8,fp8,0,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,1,64,128,1,float16,float16,0,0.038373333712418876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,1,64,0,1,float16,float16,0,0.0373333344856898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,1,64,128,1,float16,fp8,0,0.03806400050719579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,1,64,128,1,fp8,fp8,0,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,1,64,0,1,float16,fp8,0,0.037317333122094475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,1,64,0,1,fp8,fp8,0,0.03497066597143809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,2,64,128,1,float16,float16,0,0.0386559988061587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,2,64,0,1,float16,float16,0,0.03729599962631861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,2,64,128,1,float16,fp8,0,0.03862400104602178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,2,64,128,1,fp8,fp8,0,0.046096002062161766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,2,64,0,1,float16,fp8,0,0.0373279998699824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,2,64,0,1,fp8,fp8,0,0.03573866685231527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,4,64,128,1,float16,float16,0,0.03877866764863332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,4,64,0,1,float16,float16,0,0.03782933453718821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,4,64,128,1,float16,fp8,0,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,4,64,128,1,fp8,fp8,0,0.04728533327579498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,4,64,0,1,float16,fp8,0,0.038389332592487335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,4,64,0,1,fp8,fp8,0,0.03741333385308584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,8,64,128,1,float16,float16,0,0.03913066784540812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,8,64,0,1,float16,float16,0,0.03811733424663544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,8,64,128,1,float16,fp8,0,0.03923733284076055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,8,64,128,1,fp8,fp8,0,0.04625066618124644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,8,64,0,1,float16,fp8,0,0.03872533390919367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,8,64,0,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,48,64,128,1,float16,float16,0,0.030661332110563915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,48,64,0,1,float16,float16,0,0.03091199944416682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,48,64,128,1,float16,fp8,0,0.03084266682465871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,48,64,128,1,fp8,fp8,0,0.03610666592915853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,48,64,0,1,float16,fp8,0,0.030154667794704437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,48,64,0,1,fp8,fp8,0,0.028512001037597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,1,64,128,1,float16,float16,0,0.02874133239189784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,1,64,0,1,float16,float16,0,0.028389332195123036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,1,64,128,1,float16,fp8,0,0.028410665690898895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,1,64,128,1,fp8,fp8,0,0.03482666611671448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,1,64,0,1,float16,fp8,0,0.028064000109831493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,1,64,0,1,fp8,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,2,64,128,1,float16,float16,0,0.028405333558718365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,2,64,0,1,float16,float16,0,0.028336000939210255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,2,64,128,1,float16,fp8,0,0.028933333853880566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,2,64,128,1,fp8,fp8,0,0.03397866586844126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,2,64,0,1,float16,fp8,0,0.028490667541821797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,2,64,0,1,fp8,fp8,0,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,4,64,128,1,float16,float16,0,0.02887466549873352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,4,64,0,1,float16,float16,0,0.028373333315054577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,4,64,128,1,float16,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,4,64,128,1,fp8,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,4,64,0,1,float16,fp8,0,0.028501334289709728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,4,64,0,1,fp8,fp8,0,0.028783999383449554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,8,64,128,1,float16,float16,0,0.028778667251269024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,8,64,0,1,float16,float16,0,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,8,64,128,1,float16,fp8,0,0.029050665597120922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,8,64,128,1,fp8,fp8,0,0.03509333233038584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,8,64,0,1,float16,fp8,0,0.028832000990708668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,8,64,0,1,fp8,fp8,0,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,48,64,128,1,float16,float16,0,0.01987733319401741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,48,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,48,64,128,1,float16,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,1,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,48,64,128,1,fp8,fp8,0,0.023904000719388325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,48,64,0,1,float16,fp8,0,0.020197333147128422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,48,64,0,1,fp8,fp8,0,0.0205226664741834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,1,64,128,1,float16,float16,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,2,64,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,1,64,0,1,float16,float16,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,1,64,128,1,fp8,fp8,0,0.02260799954334895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,1,64,0,1,float16,fp8,0,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,1,64,0,1,fp8,fp8,0,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,2,64,128,1,float16,float16,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,2,64,128,1,float16,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,2,64,128,1,fp8,fp8,0,0.02386133372783661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,2,64,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,2,64,0,1,fp8,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,4,64,128,1,float16,float16,0,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,4,64,0,1,float16,float16,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,4,64,128,1,float16,fp8,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,4,64,128,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,4,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,4,64,0,1,fp8,fp8,0,0.022154666483402252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,8,64,128,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,8,64,0,1,float16,float16,0,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,8,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,8,64,128,1,fp8,fp8,0,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,8,64,0,1,float16,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,8,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,1,64,128,1,float16,float16,0,0.5420533418655396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,1,64,0,1,float16,float16,0,0.5440853436787924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,1,64,128,1,float16,fp8,0,0.5401333173116049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,1,64,128,1,fp8,fp8,0,0.6604693333307902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,1,64,0,1,float16,fp8,0,0.5410506725311279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,1,64,0,1,fp8,fp8,0,0.6611466805140177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,2,64,128,1,fp8,fp8,0,0.6552480061848959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,2,64,128,1,float16,float16,0,0.5503520170847574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,2,64,0,1,float16,float16,0,0.549509326616923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,2,64,128,1,float16,fp8,0,0.5458293358484904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,2,64,0,1,float16,fp8,0,0.54585067431132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,2,64,0,1,fp8,fp8,0,0.6593653361002604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,4,64,128,1,float16,float16,0,0.575823982556661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,4,64,0,1,float16,float16,0,0.5768586794535319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,4,64,128,1,float16,fp8,0,0.570799986521403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,4,64,128,1,fp8,fp8,0,0.7079199949900309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,8,64,128,1,float16,float16,0,0.5799893140792847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,4,64,0,1,float16,fp8,0,0.5680106480916342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,4,64,0,1,fp8,fp8,0,0.7038826942443848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,8,64,0,1,float16,float16,0,0.5776533285776774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,8,64,128,1,float16,fp8,0,0.5716266632080078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,8,64,128,1,fp8,fp8,0,0.712165355682373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,48,64,128,1,float16,float16,0,0.3280319968859355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,48,64,0,1,float16,float16,0,0.3266506592432658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,8,64,0,1,float16,fp8,0,0.5737599929173788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,8,64,0,1,fp8,fp8,0,0.7155360380808512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,48,64,128,1,float16,fp8,0,0.32018667459487915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,1,64,0,1,float16,float16,0,0.2770400047302246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,48,64,128,1,fp8,fp8,0,0.36735467116038006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,48,64,0,1,float16,fp8,0,0.31888532638549805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,48,64,0,1,fp8,fp8,0,0.367301344871521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,1,64,128,1,float16,float16,0,0.2763413389523824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,1,64,128,1,float16,fp8,0,0.2762826681137085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,1,64,128,1,fp8,fp8,0,0.3386506636937459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,1,64,0,1,float16,fp8,0,0.2757386763890584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,1,64,0,1,fp8,fp8,0,0.3383786678314209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,2,64,128,1,float16,float16,0,0.2792106668154399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,2,64,0,1,float16,float16,0,0.28124799331029254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,2,64,128,1,float16,fp8,0,0.2797600030899048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,2,64,128,1,fp8,fp8,0,0.3385973374048869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,2,64,0,1,float16,fp8,0,0.2791626652081807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,2,64,0,1,fp8,fp8,0,0.33924798170725506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,4,64,128,1,float16,float16,0,0.292197326819102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,4,64,0,1,float16,float16,0,0.2911146680514018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,4,64,128,1,float16,fp8,0,0.2911199927330017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,4,64,128,1,fp8,fp8,0,0.3568426767985026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,4,64,0,1,float16,fp8,0,0.29263999064763385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,4,64,0,1,fp8,fp8,0,0.35732801755269367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,8,64,128,1,float16,float16,0,0.2930453419685364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,8,64,0,1,float16,float16,0,0.29341866572697956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,8,64,128,1,float16,fp8,0,0.2892000079154968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,8,64,128,1,fp8,fp8,0,0.3620479901631673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,8,64,0,1,float16,fp8,0,0.2899679938952128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,8,64,0,1,fp8,fp8,0,0.36163198947906494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,48,64,128,1,float16,float16,0,0.17705599466959634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,48,64,0,1,float16,float16,0,0.17563199996948242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,48,64,128,1,float16,fp8,0,0.17176000277201334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,48,64,128,1,fp8,fp8,0,0.1948960026105245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,48,64,0,1,float16,fp8,0,0.17197867234547934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,48,64,0,1,fp8,fp8,0,0.19472533464431763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,1,64,128,1,float16,float16,0,0.14575999975204468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,1,64,0,1,float16,float16,0,0.14658666650454202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,1,64,128,1,float16,fp8,0,0.14693333705266318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,1,64,128,1,fp8,fp8,0,0.18121600151062012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,1,64,0,1,float16,fp8,0,0.14512532949447632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,1,64,0,1,fp8,fp8,0,0.18095999956130981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,2,64,128,1,float16,float16,0,0.14760532975196838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,2,64,0,1,float16,float16,0,0.14777066310246786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,2,64,128,1,float16,fp8,0,0.14735999703407288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,2,64,128,1,fp8,fp8,0,0.1819360057512919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,2,64,0,1,float16,fp8,0,0.14758400122324625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,2,64,0,1,fp8,fp8,0,0.17857599258422852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,4,64,128,1,float16,float16,0,0.15335999925931296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,4,64,0,1,float16,float16,0,0.15264532963434854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,4,64,128,1,float16,fp8,0,0.1534826656182607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,4,64,128,1,fp8,fp8,0,0.1873226761817932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,4,64,0,1,float16,fp8,0,0.15275200208028158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,4,64,0,1,fp8,fp8,0,0.18806399901707968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,8,64,128,1,float16,float16,0,0.15409599741299948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,8,64,0,1,float16,float16,0,0.15440533558527628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,8,64,128,1,float16,fp8,0,0.1535360018412272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,8,64,128,1,fp8,fp8,0,0.19084266821543375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,8,64,0,1,float16,fp8,0,0.15334399541219076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,8,64,0,1,fp8,fp8,0,0.18953599532445273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,48,64,128,1,float16,float16,0,0.0981226662794749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,48,64,0,1,float16,float16,0,0.09823466340700786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,48,64,128,1,float16,fp8,0,0.09657067060470581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,48,64,128,1,fp8,fp8,0,0.10983467102050781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,48,64,0,1,float16,fp8,0,0.09603733817736308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,48,64,0,1,fp8,fp8,0,0.10799466570218404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,1,64,0,1,fp8,fp8,0,0.10009066263834636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,1,64,128,1,float16,float16,0,0.07938666641712189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,1,64,0,1,float16,float16,0,0.07941866914431255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,1,64,128,1,float16,fp8,0,0.07976533472537994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,1,64,128,1,fp8,fp8,0,0.09899200002352397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,1,64,0,1,float16,fp8,0,0.07945600152015686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,2,64,128,1,float16,float16,0,0.07850133379300435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,2,64,0,1,float16,float16,0,0.07911466558774312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,2,64,128,1,float16,fp8,0,0.07964799801508586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,2,64,128,1,fp8,fp8,0,0.09917866190274556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,2,64,0,1,float16,fp8,0,0.0796853353579839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,2,64,0,1,fp8,fp8,0,0.09907199939092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,4,64,128,1,float16,float16,0,0.08257066706816356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,4,64,0,1,float16,float16,0,0.08229866623878479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,4,64,128,1,float16,fp8,0,0.08229333162307739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,4,64,128,1,fp8,fp8,0,0.10205866893132527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,4,64,0,1,float16,fp8,0,0.0825439989566803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,4,64,0,1,fp8,fp8,0,0.10141866405804952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,8,64,0,1,fp8,fp8,0,0.10443733135859172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,8,64,128,1,float16,float16,0,0.08374933401743571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,8,64,0,1,float16,float16,0,0.08418132861455281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,8,64,128,1,float16,fp8,0,0.08243733147780101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,8,64,128,1,fp8,fp8,0,0.10363200306892395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,8,64,0,1,float16,fp8,0,0.08250666658083598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,48,64,128,1,float16,float16,0,0.057376002271970115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,48,64,0,1,float16,float16,0,0.05675200124581655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,1,64,0,1,float16,float16,0,0.045456002155939736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,48,64,128,1,float16,fp8,0,0.05576533575852712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,48,64,128,1,fp8,fp8,0,0.06338666876157124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,48,64,0,1,float16,fp8,0,0.056032001972198486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,48,64,0,1,fp8,fp8,0,0.06391466657320659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,1,64,128,1,float16,float16,0,0.04539200166861216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,1,64,128,1,float16,fp8,0,0.04600533346335093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,1,64,128,1,fp8,fp8,0,0.057258665561676025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,1,64,0,1,float16,fp8,0,0.045567999283472695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,1,64,0,1,fp8,fp8,0,0.057061334451039634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,2,64,128,1,float16,float16,0,0.046613335609436035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,2,64,0,1,float16,float16,0,0.04674133161703745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,2,64,128,1,float16,fp8,0,0.04641066491603851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,2,64,128,1,fp8,fp8,0,0.05715733269850413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,2,64,0,1,float16,fp8,0,0.04659200211366018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,2,64,0,1,fp8,fp8,0,0.05859733124574026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,4,64,128,1,float16,float16,0,0.047498668233553566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,4,64,0,1,float16,float16,0,0.04773866633574168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,8,64,128,1,float16,fp8,0,0.04822933177153269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,4,64,128,1,float16,fp8,0,0.04830400149027506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,4,64,128,1,fp8,fp8,0,0.059674665331840515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,4,64,0,1,float16,fp8,0,0.04771733283996582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,4,64,0,1,fp8,fp8,0,0.06010133524735769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,8,64,128,1,float16,float16,0,0.04877333343029022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,8,64,0,1,float16,float16,0,0.04827199876308441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,48,64,128,1,float16,fp8,0,0.030192000170548756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,8,64,128,1,fp8,fp8,0,0.059802666306495667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,8,64,0,1,float16,fp8,0,0.04859733581542969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,8,64,0,1,fp8,fp8,0,0.05974400043487549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,48,64,128,1,float16,float16,0,0.03047466774781545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,48,64,0,1,float16,float16,0,0.03012799968322118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,48,64,128,1,fp8,fp8,0,0.0386613334218661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,48,64,0,1,float16,fp8,0,0.029829333225886028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,48,64,0,1,fp8,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,1,64,128,1,float16,float16,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,1,64,0,1,float16,float16,0,0.02794666588306427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,1,64,128,1,float16,fp8,0,0.027893332143624622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,1,64,128,1,fp8,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,1,64,0,1,float16,fp8,0,0.028309332827727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,1,64,0,1,fp8,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,2,64,128,1,float16,float16,0,0.027664000789324444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,2,64,0,1,float16,float16,0,0.02790933350721995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,2,64,128,1,float16,fp8,0,0.028357334434986115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,2,64,128,1,fp8,fp8,0,0.03651199986537298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,2,64,0,1,float16,fp8,0,0.027888000011444092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,2,64,0,1,fp8,fp8,0,0.03729599962631861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,4,64,128,1,float16,float16,0,0.028373333315054577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,4,64,0,1,float16,float16,0,0.028373333315054577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,4,64,128,1,float16,fp8,0,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,4,64,128,1,fp8,fp8,0,0.03714133302370707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,4,64,0,1,float16,fp8,0,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,4,64,0,1,fp8,fp8,0,0.03658133248488108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,8,64,128,1,float16,float16,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,8,64,0,1,float16,float16,0,0.0286613330245018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,8,64,128,1,float16,fp8,0,0.02870933214823405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,8,64,128,1,fp8,fp8,0,0.03686933219432831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,8,64,0,1,float16,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,8,64,0,1,fp8,fp8,0,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,48,64,128,1,float16,float16,0,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,48,64,0,1,float16,float16,0,0.02387733260790507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,48,64,128,1,float16,fp8,0,0.023898666103680927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,48,64,0,1,float16,fp8,0,0.023962666591008503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,48,64,128,1,fp8,fp8,0,0.02863999952872594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,48,64,0,1,fp8,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,1,64,128,1,float16,float16,0,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,1,64,0,1,float16,float16,0,0.022426667312781017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,1,64,128,1,float16,fp8,0,0.021925332645575207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,1,64,128,1,fp8,fp8,0,0.027914665639400482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,1,64,0,1,float16,fp8,0,0.022272000710169475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,1,64,0,1,fp8,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,2,64,128,1,float16,float16,0,0.021712000171343487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,2,64,0,1,float16,float16,0,0.0220320001244545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,2,64,128,1,float16,fp8,0,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,2,64,128,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,4,64,128,1,fp8,fp8,0,0.028021333118279774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,2,64,0,1,float16,fp8,0,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,2,64,0,1,fp8,fp8,0,0.02796799937884013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,4,64,128,1,float16,float16,0,0.022245332598686218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,4,64,0,1,float16,float16,0,0.02242133269707362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,4,64,128,1,float16,fp8,0,0.022304000953833263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,4,64,0,1,float16,fp8,0,0.022757334013779957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,4,64,0,1,fp8,fp8,0,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,8,64,128,1,float16,float16,0,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,48,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,8,64,0,1,float16,float16,0,0.022282667458057404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,8,64,128,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,8,64,128,1,fp8,fp8,0,0.02794666588306427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,8,64,0,1,float16,fp8,0,0.02235200007756551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,8,64,0,1,fp8,fp8,0,0.027978666126728058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,48,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,48,64,128,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,48,64,128,1,fp8,fp8,0,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,1,64,128,1,fp8,fp8,0,0.020288000504175823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,48,64,0,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,48,64,0,1,fp8,fp8,0,0.0205226664741834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,1,64,128,1,float16,float16,0,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,1,64,0,1,float16,float16,0,0.01573866605758667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,1,64,128,1,float16,fp8,0,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,1,64,0,1,float16,fp8,0,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,1,64,0,1,fp8,fp8,0,0.020186666399240494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,2,64,0,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,2,64,128,1,float16,float16,0,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,2,64,0,1,float16,float16,0,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,2,64,128,1,float16,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,2,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,2,64,0,1,float16,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,4,64,128,1,float16,float16,0,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,4,64,0,1,float16,float16,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,4,64,128,1,float16,fp8,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,4,64,128,1,fp8,fp8,0,0.020351999749739964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,4,64,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,4,64,0,1,fp8,fp8,0,0.01969066634774208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,8,64,128,1,float16,float16,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,8,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,8,64,128,1,float16,fp8,0,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,8,64,128,1,fp8,fp8,0,0.020629333953062694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,8,64,0,1,float16,fp8,0,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,8,64,0,1,fp8,fp8,0,0.020373333245515823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,48,64,128,1,float16,float16,0,0.014261333892742792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,48,64,0,1,float16,float16,0,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,48,64,128,1,float16,fp8,0,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,48,64,128,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,48,64,0,1,float16,fp8,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,48,64,0,1,fp8,fp8,0,0.019813333948453266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,1,64,128,1,float16,float16,0,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,2,64,128,1,float16,float16,0,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,1,64,0,1,float16,float16,0,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,1,64,128,1,float16,fp8,0,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,1,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,1,64,0,1,float16,fp8,0,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,1,64,0,1,fp8,fp8,0,0.01851733277241389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,2,64,0,1,float16,float16,0,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,2,64,128,1,float16,fp8,0,0.014053333550691605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,2,64,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,2,64,0,1,float16,fp8,0,0.014597332725922266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,2,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,4,64,128,1,float16,float16,0,0.014096000542243322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,4,64,0,1,float16,float16,0,0.013674666484196981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,4,64,128,1,float16,fp8,0,0.01452800010641416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,4,64,128,1,fp8,fp8,0,0.01833600054184596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,4,64,0,1,float16,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,4,64,0,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,8,64,128,1,float16,float16,0,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,8,64,0,1,float16,float16,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,8,64,128,1,float16,fp8,0,0.014085333794355392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,8,64,128,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,8,64,0,1,float16,fp8,0,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,8,64,0,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,1,64,128,1,float16,float16,0,0.2562133272488912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,1,64,0,1,float16,float16,0,0.25682665904362995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,1,64,128,1,float16,fp8,0,0.2532479961713155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,2,64,128,1,float16,float16,0,0.25916266441345215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,1,64,128,1,fp8,fp8,0,0.4503093163172404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,1,64,0,1,float16,fp8,0,0.2545439998308818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,1,64,0,1,fp8,fp8,0,0.45138665040334064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,2,64,0,1,float16,float16,0,0.26050132513046265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,2,64,128,1,float16,fp8,0,0.25284266471862793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,2,64,128,1,fp8,fp8,0,0.4538613160451253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,2,64,0,1,float16,fp8,0,0.25249600410461426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,2,64,0,1,fp8,fp8,0,0.4497599999109904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,4,64,128,1,float16,float16,0,0.2682933410008748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,4,64,0,1,float16,float16,0,0.26770132780075073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,4,64,128,1,float16,fp8,0,0.26683199405670166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,4,64,128,1,fp8,fp8,0,0.48051734765370685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,4,64,0,1,float16,fp8,0,0.26770132780075073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,4,64,0,1,fp8,fp8,0,0.4769759972890218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,8,64,128,1,float16,float16,0,0.2695466677347819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,8,64,0,1,float16,float16,0,0.27011199792226154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,8,64,128,1,float16,fp8,0,0.2693919936815898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,8,64,128,1,fp8,fp8,0,0.480565349260966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,8,64,0,1,float16,fp8,0,0.2675679922103882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,48,64,128,1,float16,float16,0,0.16355199615160623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,8,64,0,1,fp8,fp8,0,0.4837599992752075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,48,64,0,1,float16,float16,0,0.16499200463294983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,48,64,128,1,float16,fp8,0,0.15994667013486227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,48,64,128,1,fp8,fp8,0,0.25649066766103107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,48,64,0,1,float16,fp8,0,0.16062399744987488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,48,64,0,1,fp8,fp8,0,0.25570666790008545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,1,64,0,1,float16,fp8,0,0.1350879967212677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,1,64,128,1,float16,float16,0,0.13620799779891968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,1,64,0,1,float16,float16,0,0.1357866624991099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,1,64,128,1,float16,fp8,0,0.13523733615875244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,1,64,128,1,fp8,fp8,0,0.23948800563812256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,1,64,0,1,fp8,fp8,0,0.23718400796254477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,2,64,128,1,float16,float16,0,0.1371893286705017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,2,64,0,1,float16,float16,0,0.13649066289265951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,2,64,128,1,float16,fp8,0,0.13556266824404398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,2,64,128,1,fp8,fp8,0,0.23873066902160645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,2,64,0,1,float16,fp8,0,0.13595733046531677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,2,64,0,1,fp8,fp8,0,0.24013332525889078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,4,64,0,1,float16,fp8,0,0.14221333463986716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,4,64,128,1,float16,float16,0,0.14192533493041992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,4,64,0,1,float16,float16,0,0.141077329715093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,4,64,128,1,float16,fp8,0,0.14192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,4,64,128,1,fp8,fp8,0,0.24614399671554565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,4,64,0,1,fp8,fp8,0,0.24751466512680054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,8,64,128,1,float16,float16,0,0.14300266901652017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,8,64,0,1,float16,float16,0,0.14342400431632996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,8,64,128,1,float16,fp8,0,0.14239466190338135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,8,64,128,1,fp8,fp8,0,0.2521226604779561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,8,64,0,1,float16,fp8,0,0.14152533809343973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,48,64,128,1,float16,float16,0,0.09168533484141032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,8,64,0,1,fp8,fp8,0,0.25019200642903644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,48,64,0,1,float16,float16,0,0.09123733639717102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,48,64,128,1,float16,fp8,0,0.08918933073679607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,48,64,128,1,fp8,fp8,0,0.14065066973368326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,48,64,0,1,float16,fp8,0,0.08910399675369263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,48,64,0,1,fp8,fp8,0,0.1395199994246165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,1,64,128,1,float16,float16,0,0.07252799967924754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,1,64,0,1,float16,float16,0,0.07250666618347168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,1,64,128,1,float16,fp8,0,0.07296533385912578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,1,64,128,1,fp8,fp8,0,0.12892799576123556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,1,64,0,1,float16,fp8,0,0.07284266750017802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,2,64,0,1,float16,fp8,0,0.0738613357146581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,1,64,0,1,fp8,fp8,0,0.13076800107955933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,2,64,128,1,float16,float16,0,0.07274666428565979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,2,64,0,1,float16,float16,0,0.07307733098665874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,2,64,128,1,float16,fp8,0,0.07228800157705943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,2,64,128,1,fp8,fp8,0,0.1281546652317047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,2,64,0,1,fp8,fp8,0,0.12869866689046225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,4,64,128,1,float16,float16,0,0.0761706680059433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,4,64,0,1,float16,float16,0,0.07780266801516215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,4,64,128,1,float16,fp8,0,0.07666133344173431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,4,64,128,1,fp8,fp8,0,0.13149333000183105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,4,64,0,1,float16,fp8,0,0.07560533285140991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,4,64,0,1,fp8,fp8,0,0.13218667109807333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,8,64,128,1,float16,float16,0,0.0764160007238388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,8,64,0,1,float16,float16,0,0.07789866626262665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,8,64,128,1,float16,fp8,0,0.07633600135644276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,8,64,128,1,fp8,fp8,0,0.13423466682434082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,8,64,0,1,float16,fp8,0,0.07693333427111308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,8,64,0,1,fp8,fp8,0,0.13403200109799704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,48,64,128,1,float16,float16,0,0.05142400165398916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,48,64,0,1,float16,float16,0,0.05202133456865946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,48,64,128,1,float16,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,48,64,128,1,fp8,fp8,0,0.0783733328183492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,48,64,0,1,float16,fp8,0,0.05091733237107595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,48,64,0,1,fp8,fp8,0,0.07981866598129272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,1,64,128,1,float16,float16,0,0.0423573354880015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,1,64,0,1,float16,float16,0,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,1,64,128,1,float16,fp8,0,0.04207466542720795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,1,64,128,1,fp8,fp8,0,0.07144000132878621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,1,64,0,1,float16,fp8,0,0.04223999877770742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,1,64,0,1,fp8,fp8,0,0.07197866837183635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,2,64,128,1,float16,float16,0,0.042223999897638954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,2,64,0,1,float16,float16,0,0.04283200204372406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,2,64,128,1,float16,fp8,0,0.04251199960708618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,2,64,128,1,fp8,fp8,0,0.07283199826876323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,2,64,0,1,float16,fp8,0,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,2,64,0,1,fp8,fp8,0,0.0728053351243337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,4,64,128,1,float16,float16,0,0.044810667634010315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,4,64,0,1,fp8,fp8,0,0.07478400071461995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,4,64,0,1,float16,float16,0,0.04400533437728882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,4,64,128,1,float16,fp8,0,0.044480000933011375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,4,64,128,1,fp8,fp8,0,0.07567999760309856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,4,64,0,1,float16,fp8,0,0.04414399961630503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,8,64,128,1,float16,float16,0,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,8,64,0,1,float16,float16,0,0.045381332437197365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,8,64,128,1,float16,fp8,0,0.04433600107828776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,8,64,128,1,fp8,fp8,0,0.07515199979146321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,8,64,0,1,float16,fp8,0,0.04460266729195913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,8,64,0,1,fp8,fp8,0,0.07566399872303009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,48,64,128,1,float16,float16,0,0.027632000545660656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,48,64,0,1,float16,float16,0,0.027935999135176342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,48,64,128,1,float16,fp8,0,0.028805332879225414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,48,64,128,1,fp8,fp8,0,0.04554133117198944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,48,64,0,1,float16,fp8,0,0.02807466685771942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,48,64,0,1,fp8,fp8,0,0.046037331223487854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,1,64,128,1,float16,float16,0,0.025648000339667004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,1,64,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,1,64,128,1,float16,fp8,0,0.02595199892918269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,1,64,128,1,fp8,fp8,0,0.04381866753101349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,1,64,0,1,float16,fp8,0,0.0260959987839063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,1,64,0,1,fp8,fp8,0,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,2,64,0,1,float16,float16,0,0.026586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,2,64,128,1,float16,float16,0,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,2,64,128,1,float16,fp8,0,0.026170666019121807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,2,64,128,1,fp8,fp8,0,0.04314666489760081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,2,64,0,1,float16,fp8,0,0.026677332818508148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,2,64,0,1,fp8,fp8,0,0.04424533247947693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,4,64,128,1,float16,float16,0,0.026816000541051228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,4,64,0,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,4,64,128,1,float16,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,4,64,128,1,fp8,fp8,0,0.04474133253097534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,4,64,0,1,float16,fp8,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,4,64,0,1,fp8,fp8,0,0.04478399952252706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,8,64,128,1,float16,float16,0,0.026821332673231762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,8,64,0,1,float16,float16,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,8,64,128,1,float16,fp8,0,0.026538667579491932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,8,64,128,1,fp8,fp8,0,0.04533866544564565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,8,64,0,1,float16,fp8,0,0.027690666417280834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,8,64,0,1,fp8,fp8,0,0.04478399952252706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,48,64,128,1,float16,float16,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,48,64,0,1,float16,float16,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,48,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,48,64,128,1,fp8,fp8,0,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,48,64,0,1,float16,fp8,0,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,48,64,0,1,fp8,fp8,0,0.0317493329445521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,1,64,128,1,float16,float16,0,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,1,64,0,1,float16,float16,0,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,1,64,128,1,float16,fp8,0,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,1,64,128,1,fp8,fp8,0,0.029733332494894665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,1,64,0,1,float16,fp8,0,0.019973333925008774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,1,64,0,1,fp8,fp8,0,0.030410667260487873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,2,64,128,1,float16,float16,0,0.01977066695690155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,2,64,0,1,float16,float16,0,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,2,64,128,1,float16,fp8,0,0.020282667130231857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,2,64,128,1,fp8,fp8,0,0.029701332251230877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,2,64,0,1,float16,fp8,0,0.020234666764736176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,2,64,0,1,fp8,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,4,64,128,1,float16,float16,0,0.019973333925008774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,4,64,0,1,float16,float16,0,0.02057066683967908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,4,64,128,1,float16,fp8,0,0.020736000190178554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,4,64,128,1,fp8,fp8,0,0.0305173322558403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,4,64,0,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,4,64,0,1,fp8,fp8,0,0.030767999589443207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,8,64,128,1,float16,float16,0,0.02021866664290428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,48,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,8,64,0,1,float16,float16,0,0.02037866661945979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,8,64,128,1,float16,fp8,0,0.020303999384244282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,8,64,128,1,fp8,fp8,0,0.030858665704727173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,48,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,8,64,0,1,float16,fp8,0,0.020138667275508244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,8,64,0,1,fp8,fp8,0,0.03048533449570338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,48,64,0,1,float16,float16,0,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,48,64,128,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,48,64,128,1,fp8,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,48,64,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,1,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,2,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,1,64,0,1,float16,float16,0,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,1,64,128,1,float16,fp8,0,0.014581333845853806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,1,64,128,1,fp8,fp8,0,0.02290133386850357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,1,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,1,64,0,1,fp8,fp8,0,0.022629333039124806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,2,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,2,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,2,64,128,1,fp8,fp8,0,0.02274666726589203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,2,64,0,1,float16,fp8,0,0.014602666099866232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,2,64,0,1,fp8,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,4,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,4,64,0,1,float16,float16,0,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,4,64,128,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,4,64,128,1,fp8,fp8,0,0.022890667120615642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,4,64,0,1,float16,fp8,0,0.014597332725922266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,4,64,0,1,fp8,fp8,0,0.022730665902296703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,8,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,8,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,8,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,8,64,128,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,8,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,8,64,0,1,fp8,fp8,0,0.022437334060668945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,48,64,128,1,float16,float16,0,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,48,64,0,1,float16,float16,0,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,48,64,128,1,float16,fp8,0,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,48,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,1,64,128,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,48,64,0,1,float16,fp8,0,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,48,64,0,1,fp8,fp8,0,0.01977066695690155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,1,64,128,1,float16,float16,0,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,1,64,0,1,float16,float16,0,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,1,64,128,1,float16,fp8,0,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,1,64,0,1,float16,fp8,0,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,1,64,0,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,2,64,128,1,float16,float16,0,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,2,64,0,1,float16,float16,0,0.013349333157142004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,2,64,128,1,float16,fp8,0,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,2,64,128,1,fp8,fp8,0,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,2,64,0,1,float16,fp8,0,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,2,64,0,1,fp8,fp8,0,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,4,64,128,1,float16,float16,0,0.013381333400805792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,4,64,0,1,float16,float16,0,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,4,64,128,1,float16,fp8,0,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,4,64,128,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,4,64,0,1,float16,fp8,0,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,4,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,8,64,128,1,float16,float16,0,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,8,64,0,1,float16,float16,0,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,8,64,128,1,float16,fp8,0,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,8,64,128,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,8,64,0,1,float16,fp8,0,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,8,64,0,1,fp8,fp8,0,0.019850666324297588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,48,64,128,1,float16,float16,0,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,48,64,0,1,float16,float16,0,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,48,64,128,1,float16,fp8,0,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,48,64,128,1,fp8,fp8,0,0.018031999468803406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,48,64,0,1,float16,fp8,0,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,48,64,0,1,fp8,fp8,0,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,1,64,128,1,float16,float16,0,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,1,64,0,1,float16,float16,0,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,1,64,128,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,1,64,128,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,1,64,0,1,float16,fp8,0,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,1,64,0,1,fp8,fp8,0,0.01802666609485944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,2,64,128,1,float16,float16,0,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,4,64,128,1,float16,float16,0,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,2,64,0,1,float16,float16,0,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,2,64,128,1,float16,fp8,0,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,2,64,128,1,fp8,fp8,0,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,2,64,0,1,float16,fp8,0,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,2,64,0,1,fp8,fp8,0,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,4,64,0,1,float16,float16,0,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,4,64,128,1,float16,fp8,0,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,4,64,128,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,4,64,0,1,float16,fp8,0,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,4,64,0,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,8,64,128,1,float16,float16,0,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,8,64,0,1,float16,float16,0,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,8,64,128,1,float16,fp8,0,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,8,64,128,1,fp8,fp8,0,0.01801066721479098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,8,64,0,1,float16,fp8,0,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,8,64,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,1,64,128,1,float16,float16,0,0.1521440049012502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,1,64,0,1,float16,float16,0,0.1520799994468689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,1,64,128,1,float16,fp8,0,0.1518826683362325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,2,64,0,1,float16,float16,0,0.15196266770362854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,1,64,128,1,fp8,fp8,0,0.38042132059733075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,1,64,0,1,float16,fp8,0,0.15173332889874777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,1,64,0,1,fp8,fp8,0,0.37912531693776447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,2,64,128,1,float16,float16,0,0.15254400173823038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,2,64,128,1,float16,fp8,0,0.15146666765213013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,2,64,128,1,fp8,fp8,0,0.37859201431274414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,2,64,0,1,float16,fp8,0,0.15178133050600687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,2,64,0,1,fp8,fp8,0,0.37702401479085285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,4,64,128,1,float16,float16,0,0.15833066900571188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,4,64,0,1,float16,float16,0,0.15737600127855936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,4,64,128,1,float16,fp8,0,0.15573333700497946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,4,64,0,1,float16,fp8,0,0.15683199961980185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,4,64,128,1,fp8,fp8,0,0.384986678759257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,4,64,0,1,fp8,fp8,0,0.3879786729812622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,8,64,128,1,float16,float16,0,0.1593546668688456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,8,64,0,1,float16,float16,0,0.1586079994837443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,8,64,128,1,float16,fp8,0,0.15953600406646729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,8,64,128,1,fp8,fp8,0,0.389301339785258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,8,64,0,1,float16,fp8,0,0.1592693328857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,48,64,128,1,float16,float16,0,0.09592533111572266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,8,64,0,1,fp8,fp8,0,0.3893333276112874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,48,64,0,1,float16,float16,0,0.09649067123730977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,48,64,128,1,float16,fp8,0,0.09290666381518047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,48,64,0,1,float16,fp8,0,0.09299733241399129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,48,64,128,1,fp8,fp8,0,0.21124267578125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,48,64,0,1,fp8,fp8,0,0.21146132548650107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,1,64,128,1,float16,float16,0,0.0814933329820633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,1,64,0,1,float16,float16,0,0.08188800017038982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,1,64,128,1,float16,fp8,0,0.0819893330335617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,1,64,128,1,fp8,fp8,0,0.19926400979359946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,1,64,0,1,float16,fp8,0,0.08120533327261607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,2,64,128,1,float16,float16,0,0.08161599934101105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,1,64,0,1,fp8,fp8,0,0.20332799355189005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,2,64,0,1,float16,float16,0,0.08216533561547597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,2,64,128,1,float16,fp8,0,0.08178133269151051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,2,64,0,1,float16,fp8,0,0.08210133512814839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,2,64,128,1,fp8,fp8,0,0.20011732975641885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,2,64,0,1,fp8,fp8,0,0.19869865973790488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,4,64,128,1,float16,float16,0,0.0846026639143626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,4,64,128,1,float16,fp8,0,0.08342400193214417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,4,64,0,1,float16,float16,0,0.0855519970258077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,4,64,128,1,fp8,fp8,0,0.20169599850972494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,4,64,0,1,float16,fp8,0,0.08334933718045552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,4,64,0,1,fp8,fp8,0,0.20237332582473755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,8,64,128,1,float16,float16,0,0.08556800087292989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,8,64,0,1,float16,float16,0,0.08656000097592671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,8,64,128,1,float16,fp8,0,0.08470933636029561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,8,64,128,1,fp8,fp8,0,0.2037973403930664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,8,64,0,1,float16,fp8,0,0.08450667063395183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,48,64,128,1,float16,float16,0,0.05362666646639506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,8,64,0,1,fp8,fp8,0,0.2041226625442505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,48,64,0,1,float16,float16,0,0.054511999090512596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,48,64,128,1,float16,fp8,0,0.052229334910710655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,48,64,128,1,fp8,fp8,0,0.11561066905657451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,48,64,0,1,float16,fp8,0,0.05219733218352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,48,64,0,1,fp8,fp8,0,0.11406399806340535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,1,64,128,1,float16,float16,0,0.045034666856129967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,1,64,0,1,float16,float16,0,0.04456000030040741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,1,64,128,1,float16,fp8,0,0.04540266593297323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,1,64,128,1,fp8,fp8,0,0.10772266983985901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,1,64,0,1,float16,fp8,0,0.04470933477083842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,1,64,0,1,fp8,fp8,0,0.10809600353240967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,2,64,128,1,float16,float16,0,0.04528533418973287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,4,64,128,1,float16,float16,0,0.04669333497683207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,2,64,0,1,float16,float16,0,0.045312002301216125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,2,64,128,1,float16,fp8,0,0.045552000403404236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,2,64,128,1,fp8,fp8,0,0.10697600245475769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,2,64,0,1,float16,fp8,0,0.0452106644709905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,2,64,0,1,fp8,fp8,0,0.10764799515406291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,8,64,128,1,float16,float16,0,0.04626133541266123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,4,64,0,1,float16,float16,0,0.04659200211366018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,4,64,128,1,float16,fp8,0,0.04646400113900503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,8,64,0,1,float16,fp8,0,0.04637866715590159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,4,64,128,1,fp8,fp8,0,0.10921066999435425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,4,64,0,1,float16,fp8,0,0.046629334489504494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,4,64,0,1,fp8,fp8,0,0.111135999361674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,8,64,0,1,float16,float16,0,0.04640533526738485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,8,64,128,1,float16,fp8,0,0.047151997685432434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,8,64,128,1,fp8,fp8,0,0.1104746659596761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,8,64,0,1,fp8,fp8,0,0.11011200149854024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,48,64,128,1,float16,float16,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,48,64,0,1,float16,float16,0,0.02889599899450938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,48,64,128,1,float16,fp8,0,0.028730665644009907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,48,64,128,1,fp8,fp8,0,0.06468800206979115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,48,64,0,1,float16,fp8,0,0.028762665887673695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,48,64,0,1,fp8,fp8,0,0.06405866642793019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,1,64,0,1,fp8,fp8,0,0.06196799874305725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,1,64,128,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,1,64,0,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,1,64,128,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,1,64,128,1,fp8,fp8,0,0.06071466704209646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,1,64,0,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,2,64,128,1,float16,float16,0,0.027722666660944622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,2,64,0,1,float16,float16,0,0.027850667635599773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,2,64,128,1,float16,fp8,0,0.027877333263556164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,2,64,128,1,fp8,fp8,0,0.061162665486335754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,2,64,0,1,float16,fp8,0,0.02775466690460841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,2,64,0,1,fp8,fp8,0,0.06306666632493337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,4,64,128,1,float16,float16,0,0.028042666614055634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,4,64,0,1,float16,float16,0,0.027962667246659596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,4,64,128,1,float16,fp8,0,0.02882666637500127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,4,64,128,1,fp8,fp8,0,0.06263466676076253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,4,64,0,1,float16,fp8,0,0.028522667785485584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,4,64,0,1,fp8,fp8,0,0.062309334675470986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,8,64,128,1,float16,float16,0,0.0283146674434344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,8,64,0,1,float16,float16,0,0.027999999622503918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,8,64,128,1,float16,fp8,0,0.028250666956106823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,8,64,128,1,fp8,fp8,0,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,8,64,0,1,float16,fp8,0,0.028853334486484528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,8,64,0,1,fp8,fp8,0,0.0626453310251236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,48,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,48,64,0,1,float16,float16,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,48,64,128,1,float16,fp8,0,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,48,64,128,1,fp8,fp8,0,0.04095466683308283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,48,64,0,1,float16,fp8,0,0.02067733307679494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,48,64,0,1,fp8,fp8,0,0.040474665661652885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,1,64,128,1,float16,float16,0,0.020138667275508244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,1,64,0,1,float16,float16,0,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,1,64,128,1,float16,fp8,0,0.02032533288002014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,2,64,0,1,float16,float16,0,0.020224000016848247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,1,64,128,1,fp8,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,1,64,0,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,1,64,0,1,fp8,fp8,0,0.03877866764863332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,2,64,128,1,float16,float16,0,0.01987733319401741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,2,64,128,1,float16,fp8,0,0.020506666352351505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,2,64,128,1,fp8,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,2,64,0,1,float16,fp8,0,0.020303999384244282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,2,64,0,1,fp8,fp8,0,0.03937600056330363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,4,64,128,1,float16,float16,0,0.01985599969824155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,4,64,0,1,fp8,fp8,0,0.03994133323431015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,8,64,128,1,float16,float16,0,0.019882666567961376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,4,64,0,1,float16,float16,0,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,4,64,128,1,float16,fp8,0,0.02059200033545494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,4,64,128,1,fp8,fp8,0,0.04052799940109253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,4,64,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,8,64,0,1,float16,float16,0,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,8,64,128,1,float16,fp8,0,0.020394666741291683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,8,64,128,1,fp8,fp8,0,0.03992533435424169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,8,64,0,1,float16,fp8,0,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,8,64,0,1,fp8,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,48,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,1,64,128,1,float16,float16,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,48,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,48,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,48,64,128,1,fp8,fp8,0,0.02700799951950709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,48,64,0,1,float16,fp8,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,48,64,0,1,fp8,fp8,0,0.026837334036827087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,1,64,0,1,float16,float16,0,0.014005333185195923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,1,64,128,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,1,64,128,1,fp8,fp8,0,0.026133333643277485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,2,64,0,1,float16,fp8,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,1,64,0,1,float16,fp8,0,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,1,64,0,1,fp8,fp8,0,0.026602665583292644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,2,64,128,1,float16,float16,0,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,2,64,0,1,float16,float16,0,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,2,64,128,1,float16,fp8,0,0.014416000495354334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,2,64,128,1,fp8,fp8,0,0.026000000536441803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,2,64,0,1,fp8,fp8,0,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,4,64,128,1,float16,float16,0,0.014463999619086584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,4,64,0,1,float16,float16,0,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,4,64,128,1,float16,fp8,0,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,4,64,128,1,fp8,fp8,0,0.026522666215896606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,4,64,0,1,float16,fp8,0,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,4,64,0,1,fp8,fp8,0,0.02621866762638092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,8,64,0,1,fp8,fp8,0,0.026730666557947796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,8,64,128,1,float16,float16,0,0.014346666634082794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,8,64,0,1,float16,float16,0,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,8,64,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,8,64,128,1,fp8,fp8,0,0.02664000044266383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,8,64,0,1,float16,fp8,0,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,48,64,128,1,float16,float16,0,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,48,64,0,1,float16,float16,0,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,48,64,128,1,float16,fp8,0,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,48,64,128,1,fp8,fp8,0,0.022405333817005157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,48,64,0,1,float16,fp8,0,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,48,64,0,1,fp8,fp8,0,0.02293333411216736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,1,64,128,1,float16,float16,0,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,1,64,0,1,float16,float16,0,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,1,64,128,1,float16,fp8,0,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,1,64,128,1,fp8,fp8,0,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,1,64,0,1,float16,fp8,0,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,1,64,0,1,fp8,fp8,0,0.022106667359670002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,2,64,128,1,float16,float16,0,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,2,64,0,1,float16,float16,0,0.013642666240533194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,2,64,128,1,float16,fp8,0,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,2,64,128,1,fp8,fp8,0,0.022042666872342426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,2,64,0,1,float16,fp8,0,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,2,64,0,1,fp8,fp8,0,0.021727999051411945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,4,64,128,1,float16,float16,0,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,4,64,0,1,float16,float16,0,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,4,64,128,1,float16,fp8,0,0.013525333255529404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,4,64,128,1,fp8,fp8,0,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,4,64,0,1,float16,fp8,0,0.014021333307027817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,4,64,0,1,fp8,fp8,0,0.022117334107557934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,8,64,128,1,float16,float16,0,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,8,64,0,1,float16,float16,0,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,8,64,128,1,float16,fp8,0,0.01403733342885971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,8,64,128,1,fp8,fp8,0,0.022458667556444805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,8,64,0,1,float16,fp8,0,0.01357866699496905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,8,64,0,1,fp8,fp8,0,0.022895999252796173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,48,64,128,1,float16,float16,0,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,48,64,0,1,float16,float16,0,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,48,64,128,1,float16,fp8,0,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,48,64,128,1,fp8,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,48,64,0,1,float16,fp8,0,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,48,64,0,1,fp8,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,1,64,128,1,float16,float16,0,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,1,64,0,1,float16,float16,0,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,1,64,128,1,float16,fp8,0,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,1,64,128,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,1,64,0,1,float16,fp8,0,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,1,64,0,1,fp8,fp8,0,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,2,64,128,1,float16,float16,0,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,2,64,0,1,float16,float16,0,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,2,64,128,1,float16,fp8,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,2,64,128,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,2,64,0,1,float16,fp8,0,0.012586666891972223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,2,64,0,1,fp8,fp8,0,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,4,64,128,1,float16,float16,0,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,4,64,0,1,float16,float16,0,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,4,64,128,1,float16,fp8,0,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,4,64,128,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,4,64,0,1,float16,fp8,0,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,4,64,0,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,8,64,128,1,float16,float16,0,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,8,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,8,64,128,1,float16,fp8,0,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,8,64,128,1,fp8,fp8,0,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,8,64,0,1,float16,fp8,0,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,8,64,0,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,48,64,128,1,float16,float16,0,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,48,64,0,1,float16,float16,0,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,48,64,128,1,float16,fp8,0,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,48,64,128,1,fp8,fp8,0,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,48,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,48,64,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,1,64,128,1,float16,float16,0,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,1,64,0,1,float16,float16,0,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,1,64,128,1,float16,fp8,0,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,1,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,1,64,0,1,float16,fp8,0,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,1,64,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,2,64,128,1,float16,float16,0,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,2,64,0,1,float16,float16,0,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,2,64,128,1,float16,fp8,0,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,2,64,128,1,fp8,fp8,0,0.017871999492247898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,2,64,0,1,float16,fp8,0,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,2,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,4,64,128,1,float16,float16,0,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,4,64,0,1,float16,float16,0,0.012453333785136541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,4,64,128,1,float16,fp8,0,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,4,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,4,64,0,1,float16,fp8,0,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,4,64,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,8,64,128,1,float16,float16,0,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,8,64,0,1,float16,float16,0,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,8,64,128,1,float16,fp8,0,0.011930666863918304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,8,64,128,1,fp8,fp8,0,0.017957333475351334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,8,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,8,64,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,1,64,128,1,float16,float16,0,0.1112000048160553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,1,64,0,1,float16,float16,0,0.11098666985829671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,1,64,128,1,float16,fp8,0,0.11061333616574605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,1,64,128,1,fp8,fp8,0,0.3403466542561849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,1,64,0,1,float16,fp8,0,0.11025599638621013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,2,64,128,1,float16,float16,0,0.11079466342926025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,1,64,0,1,fp8,fp8,0,0.3402880032857259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,2,64,0,1,float16,float16,0,0.11127466956774394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,2,64,128,1,float16,fp8,0,0.11142399907112122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,2,64,0,1,float16,fp8,0,0.1108746627966563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,2,64,128,1,fp8,fp8,0,0.3432106574376424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,2,64,0,1,fp8,fp8,0,0.34038400650024414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,4,64,128,1,float16,float16,0,0.11344533165295918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,4,64,0,1,float16,float16,0,0.11414399743080139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,4,64,128,1,float16,fp8,0,0.11377599835395813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,4,64,128,1,fp8,fp8,0,0.3414880037307739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,4,64,0,1,float16,fp8,0,0.11337066690127055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,4,64,0,1,fp8,fp8,0,0.3455040057500203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,8,64,128,1,float16,float16,0,0.11502400040626526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,8,64,0,1,float16,float16,0,0.11427199840545654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,8,64,128,1,float16,fp8,0,0.113946666320165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,8,64,128,1,fp8,fp8,0,0.3445440133412679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,8,64,0,1,float16,fp8,0,0.11404800415039062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,48,64,128,1,float16,float16,0,0.06500266492366791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,8,64,0,1,fp8,fp8,0,0.34320000807444256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,48,64,0,1,float16,float16,0,0.06506666541099548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,48,64,128,1,float16,fp8,0,0.06326400240262349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,48,64,128,1,fp8,fp8,0,0.18529599905014038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,48,64,0,1,float16,fp8,0,0.062319998939832054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,48,64,0,1,fp8,fp8,0,0.1837493379910787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,1,64,0,1,fp8,fp8,0,0.17795199155807495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,1,64,128,1,float16,float16,0,0.05937066674232483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,1,64,0,1,float16,float16,0,0.059157331784566246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,1,64,128,1,float16,fp8,0,0.058592001597086586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,1,64,128,1,fp8,fp8,0,0.17766932646433511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,1,64,0,1,float16,fp8,0,0.059664001067479454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,2,64,128,1,float16,float16,0,0.05942399799823761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,2,64,0,1,float16,float16,0,0.05975999931494395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,2,64,128,1,float16,fp8,0,0.05929600199063619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,2,64,128,1,fp8,fp8,0,0.17641599973042807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,2,64,0,1,float16,fp8,0,0.059279998143514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,2,64,0,1,fp8,fp8,0,0.1769333283106486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,4,64,128,1,float16,float16,0,0.061199997862180076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,4,64,0,1,float16,float16,0,0.06163200239340464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,4,64,128,1,float16,fp8,0,0.06058133145173391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,4,64,128,1,fp8,fp8,0,0.1800480087598165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,4,64,0,1,float16,fp8,0,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,4,64,0,1,fp8,fp8,0,0.17922133207321167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,8,64,128,1,float16,float16,0,0.06150400141874949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,8,64,0,1,float16,float16,0,0.06083733340104421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,8,64,128,1,float16,fp8,0,0.06137066582838694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,8,64,128,1,fp8,fp8,0,0.1814346710840861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,8,64,0,1,float16,fp8,0,0.06112533311049143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,8,64,0,1,fp8,fp8,0,0.1797973314921061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,48,64,128,1,float16,float16,0,0.03486400097608566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,48,64,0,1,float16,float16,0,0.035045333206653595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,48,64,128,1,float16,fp8,0,0.035088000198205314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,48,64,128,1,fp8,fp8,0,0.10029866298039754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,48,64,0,1,float16,fp8,0,0.034714666505654655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,48,64,0,1,fp8,fp8,0,0.09893332918485005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,1,64,128,1,float16,float16,0,0.03480533262093862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,1,64,0,1,float16,float16,0,0.034559999903043113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,1,64,128,1,float16,fp8,0,0.03489066660404205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,1,64,128,1,fp8,fp8,0,0.09756267070770264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,1,64,0,1,float16,fp8,0,0.03459733227888743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,1,64,0,1,fp8,fp8,0,0.0981226662794749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,2,64,128,1,float16,float16,0,0.03439466655254364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,2,64,0,1,float16,float16,0,0.03479466587305069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,2,64,128,1,float16,fp8,0,0.03468266626199087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,2,64,128,1,fp8,fp8,0,0.09705066680908203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,2,64,0,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,2,64,0,1,fp8,fp8,0,0.09673600395520528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,4,64,128,1,float16,float16,0,0.03579200059175491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,4,64,0,1,float16,float16,0,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,4,64,128,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,4,64,128,1,fp8,fp8,0,0.0986293355623881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,4,64,0,1,float16,fp8,0,0.036176001032193504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,4,64,0,1,fp8,fp8,0,0.09870400031407674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,8,64,128,1,float16,float16,0,0.035029334326585136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,8,64,0,1,float16,float16,0,0.035461333890755974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,8,64,128,1,float16,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,8,64,128,1,fp8,fp8,0,0.09846400221188863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,8,64,0,1,float16,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,8,64,0,1,fp8,fp8,0,0.09852799773216248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,48,64,128,1,float16,float16,0,0.024330665667851765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,48,64,0,1,float16,float16,0,0.023706667125225067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,48,64,128,1,float16,fp8,0,0.024442667762438457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,48,64,128,1,fp8,fp8,0,0.05968533456325531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,48,64,0,1,float16,fp8,0,0.024165332317352295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,1,64,0,1,float16,fp8,0,0.023951999843120575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,48,64,0,1,fp8,fp8,0,0.059152002135912575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,1,64,128,1,float16,float16,0,0.023599999646345775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,1,64,0,1,float16,float16,0,0.023973333338896435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,1,64,128,1,float16,fp8,0,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,1,64,128,1,fp8,fp8,0,0.05723733206590017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,1,64,0,1,fp8,fp8,0,0.05901333192984263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,2,64,128,1,float16,float16,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,2,64,0,1,float16,float16,0,0.023813332120577495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,2,64,128,1,float16,fp8,0,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,2,64,128,1,fp8,fp8,0,0.05765333275000254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,2,64,0,1,float16,fp8,0,0.024192000428835552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,2,64,0,1,fp8,fp8,0,0.057477335135142006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,4,64,128,1,float16,float16,0,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,4,64,0,1,float16,float16,0,0.02418133368094762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,4,64,128,1,float16,fp8,0,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,4,64,128,1,fp8,fp8,0,0.058517331878344216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,4,64,0,1,float16,fp8,0,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,4,64,0,1,fp8,fp8,0,0.05902933577696482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,8,64,128,1,float16,float16,0,0.024271999796231587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,8,64,0,1,float16,float16,0,0.024959998826185863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,8,64,128,1,float16,fp8,0,0.024304000039895374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,8,64,128,1,fp8,fp8,0,0.05824000140031179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,8,64,0,1,float16,fp8,0,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,8,64,0,1,fp8,fp8,0,0.05830933153629303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,48,64,128,1,float16,float16,0,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,48,64,0,1,float16,float16,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,48,64,128,1,float16,fp8,0,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,48,64,128,1,fp8,fp8,0,0.036805334190527596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,48,64,0,1,float16,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,48,64,0,1,fp8,fp8,0,0.036805334190527596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,1,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,1,64,0,1,float16,float16,0,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,1,64,128,1,float16,fp8,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,1,64,128,1,fp8,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,1,64,0,1,float16,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,1,64,0,1,fp8,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,2,64,128,1,float16,float16,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,4,64,128,1,float16,float16,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,2,64,0,1,float16,float16,0,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,2,64,128,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,2,64,128,1,fp8,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,2,64,0,1,float16,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,2,64,0,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,4,64,0,1,float16,float16,0,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,4,64,128,1,float16,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,4,64,128,1,fp8,fp8,0,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,8,64,128,1,fp8,fp8,0,0.03575466573238373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,4,64,0,1,float16,fp8,0,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,4,64,0,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,8,64,128,1,float16,float16,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,8,64,0,1,float16,float16,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,8,64,128,1,float16,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,48,64,128,1,fp8,fp8,0,0.026314665873845417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,8,64,0,1,float16,fp8,0,0.015834666788578033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,8,64,0,1,fp8,fp8,0,0.03580799947182337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,48,64,128,1,float16,float16,0,0.013440000514189402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,48,64,0,1,float16,float16,0,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,48,64,128,1,float16,fp8,0,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,48,64,0,1,float16,fp8,0,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,48,64,0,1,fp8,fp8,0,0.02589333305756251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,1,64,128,1,float16,float16,0,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,1,64,0,1,float16,float16,0,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,1,64,128,1,float16,fp8,0,0.01404800017674764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,2,64,128,1,float16,fp8,0,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,1,64,128,1,fp8,fp8,0,0.02585600068171819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,1,64,0,1,float16,fp8,0,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,1,64,0,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,2,64,128,1,float16,float16,0,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,2,64,0,1,float16,float16,0,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,2,64,128,1,fp8,fp8,0,0.025775998830795288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,2,64,0,1,float16,fp8,0,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,2,64,0,1,fp8,fp8,0,0.02619733413060506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,4,64,128,1,float16,float16,0,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,4,64,0,1,float16,float16,0,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,8,64,128,1,float16,float16,0,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,4,64,128,1,float16,fp8,0,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,4,64,128,1,fp8,fp8,0,0.025583999852339428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,4,64,0,1,float16,fp8,0,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,4,64,0,1,fp8,fp8,0,0.026373334228992462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,8,64,0,1,float16,float16,0,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,8,64,128,1,float16,fp8,0,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,8,64,128,1,fp8,fp8,0,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,8,64,0,1,float16,fp8,0,0.013450667262077332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,8,64,0,1,fp8,fp8,0,0.02625600000222524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,48,64,0,1,float16,fp8,0,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,48,64,128,1,float16,float16,0,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,48,64,0,1,float16,float16,0,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,48,64,128,1,float16,fp8,0,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,48,64,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,48,64,0,1,fp8,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,1,64,128,1,float16,float16,0,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,1,64,0,1,float16,float16,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,1,64,128,1,float16,fp8,0,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,1,64,128,1,fp8,fp8,0,0.021669333179791767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,1,64,0,1,float16,fp8,0,0.012543999900420507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,1,64,0,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,2,64,128,1,float16,float16,0,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,2,64,0,1,float16,float16,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,2,64,128,1,float16,fp8,0,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,2,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,2,64,0,1,float16,fp8,0,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,2,64,0,1,fp8,fp8,0,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,4,64,128,1,float16,float16,0,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,4,64,0,1,float16,float16,0,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,4,64,128,1,float16,fp8,0,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,4,64,128,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,4,64,0,1,float16,fp8,0,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,4,64,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,8,64,128,1,float16,float16,0,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,8,64,0,1,float16,float16,0,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,8,64,128,1,float16,fp8,0,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,8,64,128,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,8,64,0,1,float16,fp8,0,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,48,64,0,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,8,64,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,48,64,128,1,float16,float16,0,0.011306667079528173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,48,64,0,1,float16,float16,0,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,48,64,128,1,float16,fp8,0,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,48,64,128,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,48,64,0,1,float16,fp8,0,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,1,64,128,1,float16,float16,0,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,1,64,0,1,float16,float16,0,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,1,64,128,1,float16,fp8,0,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,1,64,128,1,fp8,fp8,0,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,1,64,0,1,float16,fp8,0,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,1,64,0,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,2,64,0,1,fp8,fp8,0,0.018298666924238205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,2,64,128,1,float16,float16,0,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,2,64,0,1,float16,float16,0,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,2,64,128,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,2,64,128,1,fp8,fp8,0,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,2,64,0,1,float16,fp8,0,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,4,64,128,1,float16,float16,0,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,4,64,0,1,float16,float16,0,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,4,64,128,1,float16,fp8,0,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,4,64,128,1,fp8,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,4,64,0,1,float16,fp8,0,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,8,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,4,64,0,1,fp8,fp8,0,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,8,64,128,1,float16,float16,0,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,8,64,0,1,float16,float16,0,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,8,64,128,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,8,64,128,1,fp8,fp8,0,0.017909333109855652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,8,64,0,1,fp8,fp8,0,0.01781333362062772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,48,64,128,1,float16,float16,0,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,48,64,0,1,float16,float16,0,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,48,64,128,1,float16,fp8,0,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,1,64,128,1,float16,fp8,0,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,48,64,128,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,48,64,0,1,float16,fp8,0,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,48,64,0,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,1,64,128,1,float16,float16,0,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,1,64,0,1,float16,float16,0,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,1,64,128,1,fp8,fp8,0,0.017946666727463405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,1,64,0,1,float16,fp8,0,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,1,64,0,1,fp8,fp8,0,0.017781333376963932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,2,64,128,1,float16,float16,0,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,2,64,0,1,float16,float16,0,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,2,64,128,1,float16,fp8,0,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,2,64,128,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,2,64,0,1,float16,fp8,0,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,2,64,0,1,fp8,fp8,0,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,4,64,128,1,float16,float16,0,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,4,64,0,1,float16,float16,0,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,4,64,128,1,float16,fp8,0,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,4,64,128,1,fp8,fp8,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,4,64,0,1,float16,fp8,0,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,4,64,0,1,fp8,fp8,0,0.01773333301146825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,8,64,0,1,fp8,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,8,64,128,1,float16,float16,0,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,8,64,0,1,float16,float16,0,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,8,64,128,1,float16,fp8,0,0.011621333658695221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,8,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,8,64,0,1,float16,fp8,0,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,64,128,1,float16,float16,0,3.54912535349528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,64,128,1,float16,fp8,0,3.5043999354044595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,float16,0,3.6143999099731445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,64,128,1,fp8,fp8,0,4.58131726582845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,fp8,0,3.5731093088785806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,64,128,1,fp8,fp8,0,4.623477300008138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,float16,0,3.631253242492676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,fp8,0,3.589050610860189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,64,0,1,fp8,fp8,0,21.482218424479168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,64,128,1,fp8,fp8,0,4.669562657674153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,64,0,1,float16,float16,0,23.874374389648438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,64,0,1,float16,fp8,0,23.821131388346355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,float16,0,24.136878967285156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,float16,0,3.673983891805013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,64,0,1,fp8,fp8,0,21.548016866048176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,fp8,0,23.671541849772137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,fp8,0,3.6394131978352866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,64,128,1,fp8,fp8,0,4.687546730041504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,float16,0,23.979141235351562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,float16,0,2.013199965159098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,fp8,0,2.004271984100342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,64,128,1,fp8,fp8,0,2.6054986317952475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,64,0,1,fp8,fp8,0,21.559903462727863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,fp8,0,24.6231206258138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,float16,0,12.124783833821615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,64,128,1,float16,float16,0,1.807103951772054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,float16,0,24.28418223063151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,64,128,1,float16,fp8,0,1.787552038828532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,fp8,0,12.241781870524088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,64,0,1,fp8,fp8,0,11.107823689778646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,64,0,1,fp8,fp8,0,21.731979370117188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,64,128,1,fp8,fp8,0,2.3411146799723306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,float16,0,1.8428053855895996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,fp8,0,1.8021492958068848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,fp8,0,24.68243153889974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,64,128,1,fp8,fp8,0,2.3760693868001304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,float16,0,1.8515146573384602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,64,0,1,float16,float16,0,11.947680155436197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,fp8,0,1.8262613614400227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,64,0,1,fp8,fp8,0,10.827845255533854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,64,0,1,float16,fp8,0,12.000367482503256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,64,128,1,fp8,fp8,0,2.4164692560831704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,float16,0,12.205061594645182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,float16,0,1.8761332829793294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,64,0,1,fp8,fp8,0,10.878378550211588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,fp8,0,12.089040120442709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,fp8,0,1.8467520078023274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,float16,0,12.055093129475912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,64,128,1,fp8,fp8,0,2.4503679275512695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,float16,0,1.0505332946777344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,fp8,0,1.0669013659159343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,64,128,1,fp8,fp8,0,1.3744799296061199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,fp8,0,12.099797566731771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,64,0,1,fp8,fp8,0,10.878181457519531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,64,128,1,float16,float16,0,0.9877973397572836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,float16,0,6.197290420532227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,float16,0,12.098485310872396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,64,128,1,float16,fp8,0,0.9791626930236816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,fp8,0,6.2010453542073565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,64,0,1,fp8,fp8,0,10.9627685546875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,64,128,1,fp8,fp8,0,1.2565546830495198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,fp8,0,11.933748881022135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,64,0,1,fp8,fp8,0,5.650719960530599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,float16,0,0.9931466579437256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,fp8,0,0.9743999640146891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,64,128,1,fp8,fp8,0,1.2607519626617432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,64,0,1,float16,float16,0,6.081520080566406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,float16,0,0.991050640741984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,fp8,0,0.9782613118489584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,64,0,1,float16,fp8,0,6.082101186116536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,64,0,1,fp8,fp8,0,5.57366943359375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,64,128,1,fp8,fp8,0,1.2674240271250408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,float16,0,6.143999735514323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,float16,0,0.9912532965342203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,64,0,1,fp8,fp8,0,5.615749359130859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,fp8,0,6.1382185618082685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,fp8,0,0.9885439872741699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,float16,0,6.081759770711263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,64,128,1,fp8,fp8,0,1.2807679971059163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,float16,0,0.6912533442179362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,fp8,0,0.6892159779866537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,fp8,0,6.0778452555338545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,64,128,1,fp8,fp8,0,0.8281546433766683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,64,0,1,fp8,fp8,0,5.571727752685547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,64,128,1,float16,float16,0,0.6907573541005453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,float16,0,3.2782773971557617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,float16,0,6.09767468770345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,fp8,0,6.135904312133789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,64,0,1,fp8,fp8,0,5.591146469116211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,64,128,1,float16,fp8,0,0.6916000048319498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,64,128,1,fp8,fp8,0,0.8275520006815592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,64,0,1,fp8,fp8,0,3.0269012451171875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,fp8,0,3.2601601282755532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,64,0,1,float16,float16,0,3.2807626724243164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,float16,0,0.692197322845459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,fp8,0,0.6952853202819824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,64,0,1,float16,fp8,0,3.3907200495402017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,64,128,1,fp8,fp8,0,0.8247733116149902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,64,0,1,fp8,fp8,0,3.0080960591634116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,float16,0,0.6892480055491129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,float16,0,3.276362737019857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,fp8,0,0.6883093516031901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,64,0,1,fp8,fp8,0,2.997503916422526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,fp8,0,3.281253178914388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,64,128,1,fp8,fp8,0,0.8256373405456543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,float16,0,0.6875306765238444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,float16,0,3.3019841512044272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,fp8,0,0.6899306774139404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,fp8,0,3.2688318888346353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,64,128,1,fp8,fp8,0,0.8240640163421631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,64,0,1,fp8,fp8,0,3.016090710957845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,float16,0,3.3342666625976562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,64,128,1,float16,float16,0,2.6777706146240234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,fp8,0,3.2669121424357095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,64,0,1,fp8,fp8,0,3.014362653096517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,64,128,1,float16,fp8,0,2.6056853930155435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,64,128,1,fp8,fp8,0,3.4248479207356772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,float16,0,2.7108640670776367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,fp8,0,2.681152025858561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,64,128,1,fp8,fp8,0,3.4691041310628257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,float16,0,2.7116638819376626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,fp8,0,2.687317212422689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,64,0,1,float16,float16,0,13.818191528320312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,64,0,1,fp8,fp8,0,12.542884826660156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,64,0,1,float16,fp8,0,13.850218454996744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,float16,0,13.988656361897787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,64,128,1,fp8,fp8,0,3.49018128712972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,fp8,0,13.817845662434896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,64,0,1,fp8,fp8,0,12.574442545572916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,float16,0,2.7555519739786782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,fp8,0,2.71120548248291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,float16,0,14.172533671061197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,64,128,1,fp8,fp8,0,3.544447898864746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,float16,0,1.4837600390116374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,fp8,0,1.495482603708903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,64,128,1,fp8,fp8,0,1.942090670267741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,64,0,1,fp8,fp8,0,12.605147043863932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,fp8,0,13.982714335123697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,64,128,1,float16,float16,0,1.3552160263061523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,float16,0,7.237855911254883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,64,128,1,float16,fp8,0,1.3314879735310872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,64,0,1,fp8,fp8,0,12.636912027994791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,fp8,0,7.087530771891276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,float16,0,14.33023452758789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,64,128,1,fp8,fp8,0,1.76145601272583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,fp8,0,13.94936498006185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,64,0,1,fp8,fp8,0,6.5335947672526045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,float16,0,1.3721866607666016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,fp8,0,1.3500480651855469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,64,128,1,fp8,fp8,0,1.775701363881429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,64,0,1,float16,float16,0,6.993253072102864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,float16,0,1.3772692680358887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,fp8,0,1.3565600713094075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,64,0,1,fp8,fp8,0,6.324239730834961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,64,0,1,float16,fp8,0,7.0098616282145185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,64,128,1,fp8,fp8,0,1.7880053520202637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,float16,0,7.054496129353841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,fp8,0,6.937061309814453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,64,0,1,fp8,fp8,0,6.385221481323242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,float16,0,1.38864533106486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,fp8,0,1.3742720286051433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,float16,0,6.989840189615886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,64,128,1,fp8,fp8,0,1.8088372548421223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,float16,0,0.7923946380615234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,fp8,0,0.8061760266621908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,fp8,0,6.937807718912761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,64,128,1,fp8,fp8,0,1.0365599791208904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,64,0,1,fp8,fp8,0,6.348997116088867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,64,128,1,float16,float16,0,0.7462026278177897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,float16,0,7.095082600911458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,float16,0,3.635162671407064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,64,0,1,fp8,fp8,0,6.439178466796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,64,128,1,float16,fp8,0,0.735093355178833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,fp8,0,6.949658711751302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,64,128,1,fp8,fp8,0,0.9510239760080973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,64,0,1,fp8,fp8,0,3.3771839141845703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,fp8,0,3.6312907536824546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,float16,0,0.7538560231526693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,64,0,1,float16,float16,0,3.592010815938314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,fp8,0,0.7372053464253744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,64,128,1,fp8,fp8,0,0.9544106324513754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,64,0,1,float16,fp8,0,3.5648746490478516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,float16,0,0.7498506704966227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,64,0,1,fp8,fp8,0,3.2532052993774414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,fp8,0,0.739402691523234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,float16,0,3.5751307805379233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,64,128,1,fp8,fp8,0,0.9584373633066813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,float16,0,0.7547732988993326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,64,0,1,fp8,fp8,0,3.259968121846517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,fp8,0,3.602341334025065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,fp8,0,0.7468586762746176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,float16,0,3.632944107055664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,64,128,1,fp8,fp8,0,0.9623573621114095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,fp8,0,3.6268320083618164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,64,0,1,fp8,fp8,0,3.2593119939168296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,float16,0,3.5833921432495117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,float16,0,0.5244853496551514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,fp8,0,0.5246933301289877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,64,128,1,fp8,fp8,0,0.6300693353017172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,float16,0,1.9552693367004395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,64,128,1,float16,float16,0,0.5241546630859375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,64,0,1,fp8,fp8,0,3.317861239115397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,fp8,0,3.5801706314086914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,fp8,0,1.9656319618225098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,64,128,1,float16,fp8,0,0.5236373345057169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,64,0,1,fp8,fp8,0,1.753503958384196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,64,128,1,fp8,fp8,0,0.6318826675415039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,64,0,1,float16,float16,0,1.9539252916971843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,float16,0,0.5226773420969645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,fp8,0,0.5239359935124716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,64,0,1,float16,fp8,0,1.9583093325297039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,64,128,1,fp8,fp8,0,0.63155198097229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,64,0,1,fp8,fp8,0,1.7543627421061199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,float16,0,1.9600159327189128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,float16,0,0.5248746474583944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,fp8,0,1.9548907279968262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,fp8,0,0.5243359804153442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,64,0,1,fp8,fp8,0,1.751706600189209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,64,128,1,fp8,fp8,0,0.6282879908879598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,float16,0,1.95414400100708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,float16,0,0.5215466817220052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,fp8,0,0.5235999822616577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,64,128,1,fp8,fp8,0,0.6286186774571737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,64,0,1,fp8,fp8,0,1.7448320388793945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,fp8,0,1.9584266344706218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,float16,0,1.97107728322347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,fp8,0,1.955349286397298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,64,0,1,fp8,fp8,0,1.7439306577046711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,64,128,1,float16,float16,0,2.1916373570760093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,64,128,1,float16,fp8,0,2.1550025939941406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,64,128,1,fp8,fp8,0,2.8500213623046875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,float16,0,2.2340854008992515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,fp8,0,2.207365353902181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,64,128,1,fp8,fp8,0,2.8944854736328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,float16,0,2.246901353200277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,64,0,1,float16,float16,0,9.803269068400065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,64,0,1,fp8,fp8,0,8.934362411499023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,fp8,0,2.2209973335266113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,64,0,1,float16,fp8,0,9.849301020304361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,float16,0,9.962698618570963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,64,128,1,fp8,fp8,0,2.9290345509847007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,64,0,1,fp8,fp8,0,8.979455947875977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,float16,0,2.2843467394510903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,fp8,0,10.063594818115234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,fp8,0,2.259946664174398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,float16,0,9.886234919230143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,64,128,1,fp8,fp8,0,2.935797373453776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,float16,0,1.2339893182118733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,fp8,0,1.2373013496398926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,64,0,1,fp8,fp8,0,8.999493281046549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,64,128,1,fp8,fp8,0,1.6220746040344238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,fp8,0,9.979679743448893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,64,128,1,float16,float16,0,1.1314133008321126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,float16,0,5.066272099812825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,float16,0,9.928842544555664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,64,128,1,float16,fp8,0,1.111674706141154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,64,0,1,fp8,fp8,0,9.095439910888672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,fp8,0,5.145509401957194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,64,128,1,fp8,fp8,0,1.472266674041748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,fp8,0,10.030170440673828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,64,0,1,fp8,fp8,0,4.6886240641276045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,float16,0,1.1384373505910237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,64,0,1,float16,float16,0,5.069024085998535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,fp8,0,1.1255253156026204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,64,128,1,fp8,fp8,0,1.4817652702331543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,float16,0,1.1498560110727947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,64,0,1,float16,fp8,0,5.010885238647461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,64,0,1,fp8,fp8,0,4.525562604268392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,fp8,0,1.1333973407745361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,float16,0,4.9912214279174805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,64,128,1,fp8,fp8,0,1.5027732849121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,fp8,0,4.98580265045166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,float16,0,1.1576639811197917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,64,0,1,fp8,fp8,0,4.530223846435547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,fp8,0,1.150480031967163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,float16,0,5.028021176656087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,64,128,1,fp8,fp8,0,1.5048160552978516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,float16,0,0.6681226889292399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,64,0,1,fp8,fp8,0,4.537082672119141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,fp8,0,4.951130549112956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,fp8,0,0.6757653554280599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,64,128,1,fp8,fp8,0,0.8771039644877116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,float16,0,4.983711878458659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,float16,0,2.6068266232808432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,64,128,1,float16,float16,0,0.6262346506118774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,64,128,1,float16,fp8,0,0.6160480181376139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,64,0,1,fp8,fp8,0,4.554394721984863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,fp8,0,2.590208053588867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,fp8,0,5.088608105977376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,64,128,1,fp8,fp8,0,0.7995626926422119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,64,0,1,fp8,fp8,0,2.431946595509847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,64,0,1,float16,float16,0,2.5638133684794107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,float16,0,0.6229279836018881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,64,0,1,float16,fp8,0,2.55569060643514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,fp8,0,0.6226079861323038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,64,0,1,fp8,fp8,0,2.3585546811421714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,64,128,1,fp8,fp8,0,0.8014506498972574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,float16,0,0.6291360060373942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,float16,0,2.5787413914998374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,fp8,0,0.6223093271255493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,64,128,1,fp8,fp8,0,0.8081759611765543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,fp8,0,2.5779946645100913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,64,0,1,fp8,fp8,0,2.3471892674764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,float16,0,0.6346240043640137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,float16,0,2.571664015452067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,fp8,0,0.6300906737645467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,fp8,0,2.5652213096618652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,64,128,1,fp8,fp8,0,0.8060906728108724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,64,0,1,fp8,fp8,0,2.370581309000651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,float16,0,0.44230401515960693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,float16,0,2.573018709818522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,fp8,0,0.4407999912897746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,64,128,1,fp8,fp8,0,0.533519983291626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,float16,0,1.43394136428833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,fp8,0,2.548602739969889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,64,128,1,float16,float16,0,0.4398133357365926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,64,0,1,fp8,fp8,0,2.3538079261779785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,fp8,0,1.442794640858968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,64,0,1,fp8,fp8,0,1.268783966700236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,64,128,1,float16,fp8,0,0.4416586558024089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,64,128,1,fp8,fp8,0,0.528058648109436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,64,0,1,float16,float16,0,1.4425387382507324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,float16,0,0.44192532698313397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,fp8,0,0.4378986756006877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,64,0,1,float16,fp8,0,1.4270399411519368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,64,128,1,fp8,fp8,0,0.5308106740315756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,64,0,1,fp8,fp8,0,1.2688852945963542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,float16,0,1.4267093340555828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,float16,0,0.4408053159713745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,fp8,0,1.4267306327819824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,64,0,1,fp8,fp8,0,1.2675306797027588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,fp8,0,0.4399786790211995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,64,128,1,fp8,fp8,0,0.5314079920450846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,float16,0,1.4233546257019043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,float16,0,0.4406079848607381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,64,0,1,fp8,fp8,0,1.2690773010253906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,fp8,0,1.429946740468343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,fp8,0,0.4395466645558675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,64,128,1,fp8,fp8,0,0.5292853514353434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,float16,0,1.4248746236165364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,fp8,0,1.4280907313028972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,64,0,1,fp8,fp8,0,1.261349360148112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,64,128,1,float16,float16,0,3.500080108642578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,64,128,1,float16,fp8,0,3.4276908238728843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,float16,0,3.5461225509643555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,64,128,1,fp8,fp8,0,4.494650522867839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,fp8,0,3.528191884358724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,64,128,1,fp8,fp8,0,4.550485293070476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,float16,0,3.5862773259480796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,64,0,1,fp8,fp8,0,11.811893463134766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,64,0,1,float16,float16,0,12.97988255818685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,64,0,1,float16,fp8,0,13.22756830851237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,float16,0,13.041690826416016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,fp8,0,3.549776077270508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,64,0,1,fp8,fp8,0,11.89236323038737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,64,128,1,fp8,fp8,0,4.581397374471028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,fp8,0,13.109322865804037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,float16,0,3.625274658203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,fp8,0,3.569941202799479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,float16,0,13.115594228108725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,64,128,1,fp8,fp8,0,4.620293299357097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,float16,0,1.9665759404500325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,fp8,0,1.9407893816630046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,64,0,1,fp8,fp8,0,11.961034138997396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,fp8,0,13.10601552327474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,64,128,1,fp8,fp8,0,2.529850641886393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,64,128,1,float16,float16,0,1.7529385884602864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,float16,0,13.196554819742838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,float16,0,6.7711842854817705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,64,0,1,fp8,fp8,0,11.981077829996744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,64,128,1,float16,fp8,0,1.7221333185831706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,fp8,0,13.174858093261719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,64,128,1,fp8,fp8,0,2.2926294008890786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,fp8,0,6.717658360799153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,64,0,1,fp8,fp8,0,6.206421534220378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,float16,0,1.7812479337056477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,fp8,0,1.7548747062683105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,64,0,1,float16,float16,0,6.563850402832031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,64,128,1,fp8,fp8,0,2.318160057067871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,float16,0,1.7871732711791992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,64,0,1,fp8,fp8,0,5.940912246704102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,64,0,1,float16,fp8,0,6.478309631347656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,fp8,0,1.7649013201395671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,float16,0,6.813562393188477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,64,128,1,fp8,fp8,0,2.3452693621317544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,float16,0,1.8037439982096355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,64,0,1,fp8,fp8,0,5.982287724812825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,fp8,0,6.60530153910319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,float16,0,6.592069625854492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,fp8,0,1.775584061940511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,64,128,1,fp8,fp8,0,2.3634026845296225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,float16,0,0.9848266442616781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,64,0,1,fp8,fp8,0,6.039098739624023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,fp8,0,6.503440221150716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,fp8,0,0.9930613040924072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,64,128,1,fp8,fp8,0,1.2931520144144695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,float16,0,6.610970815022786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,float16,0,3.398437182108561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,fp8,0,6.552725474039714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,64,128,1,float16,float16,0,0.9101013342539469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,64,128,1,float16,fp8,0,0.900218645731608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,64,0,1,fp8,fp8,0,6.083610534667969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,fp8,0,3.391237258911133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,64,0,1,fp8,fp8,0,3.1685333251953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,64,128,1,fp8,fp8,0,1.1818986733754475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,64,0,1,float16,float16,0,3.3493067423502603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,float16,0,0.927237351735433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,fp8,0,0.9018346468607584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,64,0,1,float16,fp8,0,3.271717389424642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,64,0,1,fp8,fp8,0,3.043066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,64,128,1,fp8,fp8,0,1.1857066949208577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,float16,0,3.31934388478597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,float16,0,0.9206293423970541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,fp8,0,0.9076639811197916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,fp8,0,3.3311732610066733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,64,0,1,fp8,fp8,0,3.0313920974731445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,64,128,1,fp8,fp8,0,1.1981759866078694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,float16,0,0.9279999732971191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,fp8,0,0.9249280293782552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,float16,0,3.3300320307413735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,64,128,1,fp8,fp8,0,1.1992586453755696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,fp8,0,3.3068161010742188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,float16,0,0.5358719825744629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,64,0,1,fp8,fp8,0,3.032778739929199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,float16,0,3.3272746404012046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,fp8,0,0.5428213278452555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,64,128,1,fp8,fp8,0,0.7034506797790527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,float16,0,1.7617759704589844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,64,0,1,fp8,fp8,0,3.065567970275879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,64,128,1,float16,float16,0,0.5045226812362671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,fp8,0,3.3572425842285156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,fp8,0,1.7651467323303223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,64,0,1,fp8,fp8,0,1.6391733487447102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,64,128,1,float16,fp8,0,0.4974079926808675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,64,128,1,fp8,fp8,0,0.6454559961954752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,64,0,1,float16,float16,0,1.7136212984720867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,float16,0,0.5069226821263632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,64,0,1,float16,fp8,0,1.7145013809204102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,64,0,1,fp8,fp8,0,1.5889973640441895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,fp8,0,0.49674665927886963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,64,128,1,fp8,fp8,0,0.6476106643676758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,float16,0,1.7271092732747395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,fp8,0,0.5020213524500529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,fp8,0,1.7195839881896973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,float16,0,0.5091840028762817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,64,0,1,fp8,fp8,0,1.581914742787679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,64,128,1,fp8,fp8,0,0.6472906668980917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,float16,0,1.7252693176269531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,float16,0,0.5106613238652548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,64,0,1,fp8,fp8,0,1.5826239585876465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,fp8,0,0.5079360008239746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,fp8,0,1.7137333552042644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,64,128,1,fp8,fp8,0,0.6529706716537476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,float16,0,1.720746676127116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,float16,0,0.35630400975545246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,fp8,0,0.35928531487782794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,64,0,1,fp8,fp8,0,1.5992213884989421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,fp8,0,1.710858662923177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,float16,0,0.9772693316141764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,64,128,1,fp8,fp8,0,0.43166399002075195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,64,128,1,float16,float16,0,0.35556264718373615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,fp8,0,0.9801386992136637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,64,0,1,fp8,fp8,0,0.8702666759490967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,64,128,1,float16,fp8,0,0.3569173415501912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,64,0,1,float16,float16,0,0.9851253032684326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,64,128,1,fp8,fp8,0,0.4303520123163859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,float16,0,0.35553598403930664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,64,0,1,float16,fp8,0,0.9819680054982504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,64,0,1,fp8,fp8,0,0.8684906959533691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,fp8,0,0.3563786745071411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,float16,0,0.9807519912719727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,64,128,1,fp8,fp8,0,0.43386133511861164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,float16,0,0.35596799850463867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,fp8,0,0.985215981801351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,64,0,1,fp8,fp8,0,0.8698986371358236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,fp8,0,0.35550932089487713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,float16,0,0.9803840319315592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,64,128,1,fp8,fp8,0,0.4307146469751994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,float16,0,0.3562133312225342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,fp8,0,0.9825173219045004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,64,0,1,fp8,fp8,0,0.8747466405232748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,fp8,0,0.357040007909139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,64,128,1,fp8,fp8,0,0.43112532297770184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,float16,0,0.9867626825968424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,fp8,0,0.9833866755167643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,64,0,1,fp8,fp8,0,0.8702346483866373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,64,128,1,float16,float16,0,2.592730681101481
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,64,128,1,float16,fp8,0,2.5532533327738443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,64,128,1,fp8,fp8,0,3.36405881245931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,float16,0,2.654138724009196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,fp8,0,2.625669320424398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,64,0,1,float16,float16,0,7.796735763549805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,64,128,1,fp8,fp8,0,3.4159892400105796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,64,0,1,fp8,fp8,0,7.110037485758464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,64,0,1,float16,fp8,0,7.670250574747722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,float16,0,7.808437347412109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,float16,0,2.6867945988972983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,fp8,0,2.643242677052816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,fp8,0,7.8273970286051435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,64,128,1,fp8,fp8,0,3.459130605061849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,64,0,1,fp8,fp8,0,7.121045430501302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,float16,0,2.696159998575846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,fp8,0,2.690005302429199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,float16,0,7.893168131510417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,64,128,1,fp8,fp8,0,3.48093318939209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,fp8,0,7.768853505452474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,float16,0,1.4604533513387044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,64,0,1,fp8,fp8,0,7.190624237060547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,fp8,0,1.4410239855448406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,float16,0,7.890757242838542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,64,128,1,fp8,fp8,0,1.907061258951823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,64,128,1,float16,float16,0,1.3020213445027669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,float16,0,4.09225591023763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,fp8,0,7.8572642008463545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,64,128,1,float16,fp8,0,1.2882239818572998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,64,0,1,fp8,fp8,0,7.175514856974284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,fp8,0,4.0554507573445635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,64,0,1,fp8,fp8,0,3.750629425048828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,64,128,1,fp8,fp8,0,1.6893493334452312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,float16,0,1.3203893502553303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,64,0,1,float16,float16,0,3.920618693033854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,fp8,0,1.299781322479248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,64,0,1,fp8,fp8,0,3.573941230773926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,64,0,1,float16,fp8,0,3.9534505208333335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,64,128,1,fp8,fp8,0,1.7101972897847493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,float16,0,1.3339200019836426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,float16,0,3.982576052347819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,fp8,0,1.3074719905853271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,64,128,1,fp8,fp8,0,1.7277812957763672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,64,0,1,fp8,fp8,0,3.582890510559082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,fp8,0,3.8494720458984375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,float16,0,1.3396746317545574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,float16,0,3.9498774210611978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,fp8,0,1.3239893118540447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,64,0,1,fp8,fp8,0,3.6270453135172525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,64,128,1,fp8,fp8,0,1.737328052520752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,fp8,0,4.027210553487142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,float16,0,0.7466186682383219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,fp8,0,0.7595413525899252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,float16,0,3.9151360193888345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,64,128,1,fp8,fp8,0,0.9748693307240804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,float16,0,2.0463627179463706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,64,128,1,float16,float16,0,0.6968159675598145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,fp8,0,3.890080134073893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,64,0,1,fp8,fp8,0,3.6263039906819663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,64,128,1,float16,fp8,0,0.6759306589762369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,fp8,0,2.052346706390381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,64,0,1,fp8,fp8,0,1.923349380493164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,64,128,1,fp8,fp8,0,0.8934346834818522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,64,0,1,float16,float16,0,1.99236265818278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,float16,0,0.6961493492126465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,fp8,0,0.6847360134124756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,64,128,1,fp8,fp8,0,0.9045120080312093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,64,0,1,fp8,fp8,0,1.8354986508687336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,64,0,1,float16,fp8,0,1.9643893241882324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,float16,0,1.9804800351460774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,float16,0,0.6964426835378011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,fp8,0,1.9728800455729167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,64,0,1,fp8,fp8,0,1.8403147061665852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,float16,0,0.7012960116068522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,fp8,0,0.6876373291015625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,fp8,0,1.9898932774861653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,64,128,1,fp8,fp8,0,0.9007999897003174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,float16,0,2.0014185905456543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,64,0,1,fp8,fp8,0,1.8296747207641602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,fp8,0,0.6988906860351562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,float16,0,1.990213394165039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,64,128,1,fp8,fp8,0,0.9095199902852377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,float16,0,0.4079893430074056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,64,0,1,fp8,fp8,0,1.8385440508524578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,fp8,0,0.41208000977834064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,fp8,0,1.9806826909383137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,float16,0,1.0717333157857258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,64,128,1,fp8,fp8,0,0.5315200090408325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,fp8,0,1.0709706942240398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,64,0,1,float16,float16,0,1.0414400100708008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,64,128,1,float16,float16,0,0.38192001978556317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,64,0,1,fp8,fp8,0,0.9769972960154215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,64,128,1,float16,fp8,0,0.3771786689758301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,64,128,1,fp8,fp8,0,0.49027733008066815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,float16,0,0.3831520080566406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,64,0,1,float16,fp8,0,1.040010690689087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,64,0,1,fp8,fp8,0,0.9311253229777018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,fp8,0,0.37726398309071857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,float16,0,1.04147736231486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,64,128,1,fp8,fp8,0,0.4925440152486165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,float16,0,0.38450666268666583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,fp8,0,1.0364533265431721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,64,0,1,fp8,fp8,0,0.9349386692047119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,fp8,0,0.3797760009765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,64,128,1,fp8,fp8,0,0.49083733558654785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,float16,0,1.047327995300293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,fp8,0,1.0410186449686687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,float16,0,0.3869386514027913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,64,0,1,fp8,fp8,0,0.9416320323944092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,fp8,0,0.3837013244628906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,float16,0,1.0465386708577473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,64,128,1,fp8,fp8,0,0.4963466723759969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,fp8,0,1.04639466603597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,float16,0,0.2741866707801819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,64,0,1,fp8,fp8,0,0.9401333332061768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,float16,0,0.5893813371658325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,fp8,0,0.2739680012067159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,64,128,1,fp8,fp8,0,0.33294399579366046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,fp8,0,0.5899413426717123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,64,0,1,fp8,fp8,0,0.5469599962234497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,64,128,1,float16,float16,0,0.2734666665395101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,64,0,1,float16,float16,0,0.585370659828186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,64,128,1,float16,fp8,0,0.2728586594263713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,64,128,1,fp8,fp8,0,0.3309706648190816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,64,0,1,float16,fp8,0,0.5864799817403158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,64,0,1,fp8,fp8,0,0.5421813329060873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,float16,0,0.274122675259908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,fp8,0,0.27301865816116333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,float16,0,0.585749348004659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,64,128,1,fp8,fp8,0,0.33155733346939087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,64,128,1,fp8,fp8,0,0.3327573339144389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,fp8,0,0.5857653220494589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,64,0,1,fp8,fp8,0,0.5453439950942993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,float16,0,0.27238933245340985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,fp8,0,0.2734453280766805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,float16,0,0.5858186483383179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,float16,0,0.2730026642481486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,fp8,0,0.5879306793212891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,64,0,1,fp8,fp8,0,0.5455199877421061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,fp8,0,0.27455999453862506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,float16,0,0.5842399994532267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,64,128,1,fp8,fp8,0,0.3304479916890462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,fp8,0,0.5858879884084066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,64,0,1,fp8,fp8,0,0.5468053420384725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,64,128,1,float16,float16,0,3.4386186599731445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,64,128,1,float16,fp8,0,3.3928960164388022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,64,128,1,fp8,fp8,0,4.434368133544922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,float16,0,3.5086987813313804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,fp8,0,3.476319948832194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,64,0,1,float16,float16,0,7.646997451782227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,64,0,1,fp8,fp8,0,6.975562413533528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,64,0,1,float16,fp8,0,7.6163787841796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,64,128,1,fp8,fp8,0,4.501274744669597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,float16,0,7.759136199951172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,float16,0,3.5342559814453125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,fp8,0,3.5006027221679688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,64,0,1,fp8,fp8,0,7.039648056030273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,fp8,0,7.678223927815755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,64,128,1,fp8,fp8,0,4.5589173634847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,float16,0,3.572538693745931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,float16,0,7.686410903930664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,fp8,0,3.544133186340332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,fp8,0,7.722378412882487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,64,0,1,fp8,fp8,0,7.142650604248047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,64,128,1,fp8,fp8,0,4.603589375813802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,float16,0,1.9557654062906902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,fp8,0,1.9252907435099285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,float16,0,7.6914933522542315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,64,128,1,fp8,fp8,0,2.510181268056234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,float16,0,4.080522537231445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,fp8,0,7.704469045003255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,64,0,1,fp8,fp8,0,7.112122853597005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,64,128,1,float16,float16,0,1.7138773600260417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,fp8,0,4.098512013753255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,64,0,1,fp8,fp8,0,3.7883733113606772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,64,128,1,float16,fp8,0,1.6735626856486003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,64,128,1,fp8,fp8,0,2.2574240366617837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,64,0,1,float16,float16,0,3.8001600901285806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,float16,0,1.7404425938924153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,64,0,1,fp8,fp8,0,3.4813334147135415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,fp8,0,1.7059733072916667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,64,0,1,float16,fp8,0,3.8036746978759766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,64,128,1,fp8,fp8,0,2.25981871287028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,float16,0,3.8511041005452475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,float16,0,1.7384479840596516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,fp8,0,3.821791966756185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,fp8,0,1.721530596415202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,64,0,1,fp8,fp8,0,3.5521599451700845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,64,128,1,fp8,fp8,0,2.2864534060160318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,float16,0,3.838298797607422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,float16,0,1.7567946116129558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,fp8,0,1.7442399660746257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,fp8,0,3.939359982808431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,64,128,1,fp8,fp8,0,2.314682642618815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,64,0,1,fp8,fp8,0,3.566464106241862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,float16,0,0.959333340326945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,float16,0,3.8912318547566733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,fp8,0,0.9784320195515951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,64,128,1,fp8,fp8,0,1.2612160046895344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,float16,0,2.0112160046895347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,fp8,0,3.8293708165486655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,64,0,1,fp8,fp8,0,3.5942773818969727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,64,128,1,float16,float16,0,0.8847839832305908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,fp8,0,2.014527956644694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,64,0,1,fp8,fp8,0,1.8885706265767415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,64,128,1,float16,fp8,0,0.8538080056508383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,64,128,1,fp8,fp8,0,1.1393919785817463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,64,0,1,float16,float16,0,1.934384028116862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,float16,0,0.8835999965667725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,64,0,1,float16,fp8,0,1.9060319264729817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,64,0,1,fp8,fp8,0,1.796112060546875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,fp8,0,0.8618720372517904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,64,128,1,fp8,fp8,0,1.142367998758952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,float16,0,1.9208159446716309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,float16,0,0.8833386898040771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,fp8,0,1.923850695292155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,64,0,1,fp8,fp8,0,1.7890186309814453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,fp8,0,0.8680480321248373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,float16,0,1.9508053461710613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,64,128,1,fp8,fp8,0,1.1501013437906902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,fp8,0,1.911776065826416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,float16,0,0.9045066833496094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,64,0,1,fp8,fp8,0,1.7817066510518391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,fp8,0,0.8785226345062256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,float16,0,1.9306666056315105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,64,128,1,fp8,fp8,0,1.1610933144887288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,float16,0,0.5069493452707926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,float16,0,1.0328426361083984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,fp8,0,1.931711991628011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,fp8,0,0.508346676826477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,64,0,1,fp8,fp8,0,1.7928214073181152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,64,128,1,float16,float16,0,0.466597318649292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,64,128,1,fp8,fp8,0,0.6604746580123901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,fp8,0,1.0436426798502605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,64,128,1,float16,fp8,0,0.4604213237762451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,64,0,1,fp8,fp8,0,0.9836906592051188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,64,0,1,float16,float16,0,0.9966186682383219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,64,128,1,fp8,fp8,0,0.6033546527226766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,64,0,1,float16,fp8,0,0.9896053473154703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,float16,0,0.4681386550267537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,64,0,1,fp8,fp8,0,0.9236693382263184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,float16,0,1.0054773489634197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,float16,0,0.47017065684000653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,fp8,0,0.4623733361562093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,64,128,1,fp8,fp8,0,0.6060266494750977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,fp8,0,0.9899093310038248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,64,0,1,fp8,fp8,0,0.9194453557332357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,fp8,0,0.46267199516296387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,float16,0,1.0015839735666912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,64,128,1,fp8,fp8,0,0.6137066682179769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,float16,0,0.4747680028279622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,fp8,0,0.9959786732991537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,64,0,1,fp8,fp8,0,0.9334186712900797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,fp8,0,0.4671359856923421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,float16,0,1.0028639634450276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,64,128,1,fp8,fp8,0,0.6139253377914429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,float16,0,0.2799999912579854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,fp8,0,0.9980533123016357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,64,0,1,fp8,fp8,0,0.9382666746775309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,float16,0,0.5549546480178833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,fp8,0,0.2847306728363037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,64,128,1,fp8,fp8,0,0.36722131570180255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,fp8,0,0.5541546742121378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,64,0,1,fp8,fp8,0,0.5076373418172201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,64,128,1,float16,float16,0,0.2643946607907613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,64,0,1,float16,float16,0,0.5368320147196451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,64,128,1,float16,fp8,0,0.2600586613019307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,64,128,1,fp8,fp8,0,0.3384480079015096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,64,0,1,float16,fp8,0,0.5355626742045084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,64,0,1,fp8,fp8,0,0.4774506489435832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,float16,0,0.26311999559402466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,fp8,0,0.26078933477401733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,float16,0,0.5385226806004842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,64,128,1,fp8,fp8,0,0.33843199412027997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,fp8,0,0.5359359979629517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,64,0,1,fp8,fp8,0,0.48235201835632324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,float16,0,0.26504000027974445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,fp8,0,0.2611146569252014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,float16,0,0.5360639890034994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,64,128,1,fp8,fp8,0,0.34175999959309894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,fp8,0,0.5361973444620768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,64,0,1,fp8,fp8,0,0.4824106693267822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,float16,0,0.26717867453893024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,float16,0,0.5410613218943278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,fp8,0,0.2654133240381877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,64,128,1,fp8,fp8,0,0.34308799107869464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,fp8,0,0.535642663637797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,64,0,1,fp8,fp8,0,0.4855733315149943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,float16,0,0.19139200448989868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,float16,0,0.3170773386955261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,fp8,0,0.19211200873057047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,64,128,1,fp8,fp8,0,0.23384533325831094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,fp8,0,0.317194660504659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,64,0,1,fp8,fp8,0,0.29622934261957806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,64,128,1,float16,float16,0,0.18801599740982056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,64,0,1,float16,float16,0,0.3154826760292053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,64,128,1,float16,fp8,0,0.19021334250768027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,64,128,1,fp8,fp8,0,0.23141332467397055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,64,0,1,float16,fp8,0,0.3150186737378438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,64,0,1,fp8,fp8,0,0.2955893278121948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,float16,0,0.18992000818252563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,float16,0,0.3131626645723979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,fp8,0,0.18913066387176514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,64,128,1,fp8,fp8,0,0.23292799790700278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,64,128,1,fp8,fp8,0,0.23136534293492636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,fp8,0,0.3144320050875346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,64,0,1,fp8,fp8,0,0.29550399382909137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,float16,0,0.1897546648979187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,float16,0,0.313482662041982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,fp8,0,0.19056532780329385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,fp8,0,0.3152959942817688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,64,0,1,fp8,fp8,0,0.2944906751314799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,float16,0,0.1906613310178121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,float16,0,0.31482134262720746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,fp8,0,0.19041067361831665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,64,128,1,fp8,fp8,0,0.23297599951426187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,fp8,0,0.31572266419728595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,64,0,1,fp8,fp8,0,0.2972053289413452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,64,128,1,float16,float16,0,2.539445400238037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,64,128,1,float16,fp8,0,2.5081653594970703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,64,128,1,fp8,fp8,0,3.3188533782958984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,float16,0,2.60969607035319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,64,0,1,float16,float16,0,4.723461469014485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,64,0,1,float16,fp8,0,4.678805351257324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,fp8,0,2.554639975229899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,64,0,1,fp8,fp8,0,4.317482630411784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,64,128,1,fp8,fp8,0,3.36407470703125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,float16,0,2.6372106870015464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,float16,0,4.728864034016927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,fp8,0,4.744261423746745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,64,0,1,fp8,fp8,0,4.384453455607097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,fp8,0,2.5797120730082193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,float16,0,4.8096052805582685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,64,128,1,fp8,fp8,0,3.3953332901000977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,float16,0,2.6530027389526367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,fp8,0,2.620181401570638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,64,0,1,fp8,fp8,0,4.435503959655762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,fp8,0,4.701962788899739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,float16,0,4.884005228678386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,64,128,1,fp8,fp8,0,3.4147841135660806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,float16,0,1.4375893274943035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,float16,0,2.5190614064534507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,fp8,0,1.4150080680847168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,64,128,1,fp8,fp8,0,1.8577386538187664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,fp8,0,4.804762522379558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,64,0,1,fp8,fp8,0,4.454506556193034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,fp8,0,2.511770725250244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,64,0,1,fp8,fp8,0,2.362127939860026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,64,128,1,float16,float16,0,1.2680266698201497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,64,128,1,float16,fp8,0,1.2496586640675862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,64,128,1,fp8,fp8,0,1.6530399322509766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,64,0,1,float16,float16,0,2.3581493695576987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,64,0,1,fp8,fp8,0,2.1696747144063315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,64,0,1,float16,fp8,0,2.331754684448242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,float16,0,1.2977333068847656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,fp8,0,1.2546453475952148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,float16,0,2.344330628712972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,64,128,1,fp8,fp8,0,1.6659679412841797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,float16,0,1.2927893002827961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,fp8,0,2.323359966278076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,64,0,1,fp8,fp8,0,2.1882346471150718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,fp8,0,1.2787093321482341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,float16,0,2.367856025695801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,64,128,1,fp8,fp8,0,1.671877384185791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,float16,0,1.3047146797180176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,fp8,0,2.3321173985799155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,64,0,1,fp8,fp8,0,2.182682673136393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,fp8,0,1.287450631459554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,64,128,1,fp8,fp8,0,1.7150239944458008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,float16,0,0.7274026870727539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,float16,0,2.3646720250447593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,fp8,0,0.7262506484985352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,float16,0,1.261029322942098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,fp8,0,2.3495519955952964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,64,0,1,fp8,fp8,0,2.191866715749105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,64,128,1,fp8,fp8,0,0.9439093271891276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,fp8,0,1.268234650293986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,64,0,1,fp8,fp8,0,1.2005919615427654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,64,128,1,float16,float16,0,0.660858670870463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,64,128,1,float16,fp8,0,0.6492746671040853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,64,0,1,float16,float16,0,1.199893315633138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,64,128,1,fp8,fp8,0,0.8553546269734701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,64,0,1,float16,fp8,0,1.1828587055206299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,float16,0,0.666048010190328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,64,0,1,fp8,fp8,0,1.107914686203003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,fp8,0,1.1909866333007812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,fp8,0,0.6541546583175659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,float16,0,1.2082506815592449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,64,128,1,fp8,fp8,0,0.8587520122528076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,float16,0,0.6617226600646973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,64,0,1,fp8,fp8,0,1.1116053263346355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,fp8,0,0.6576533317565918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,float16,0,1.2119839986165364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,64,128,1,fp8,fp8,0,0.8631040255228678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,fp8,0,1.1973066329956055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,float16,0,0.6699253718058268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,64,0,1,fp8,fp8,0,1.1223519643147786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,fp8,0,0.663045326868693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,float16,0,1.2013386885325115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,64,128,1,fp8,fp8,0,0.8735199769337972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,float16,0,0.38395198186238605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,fp8,0,1.2090880076090496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,fp8,0,0.6572106679280599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,64,0,1,fp8,fp8,0,1.1254560152689617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,fp8,0,0.38844799995422363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,float16,0,0.6555306514104208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,64,128,1,fp8,fp8,0,0.5044106642405192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,64,0,1,fp8,fp8,0,0.6163520018259684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,64,128,1,float16,float16,0,0.35571734110514325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,64,0,1,float16,float16,0,0.6271359920501709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,64,128,1,float16,fp8,0,0.3488159974416097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,64,128,1,fp8,fp8,0,0.46092267831166583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,64,0,1,float16,fp8,0,0.6241226593653361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,64,0,1,fp8,fp8,0,0.5684373378753662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,float16,0,0.3574506839116414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,fp8,0,0.34882132212320965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,float16,0,0.632144014040629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,64,128,1,fp8,fp8,0,0.4623146851857503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,fp8,0,0.6295040051142374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,64,0,1,fp8,fp8,0,0.5692746639251709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,float16,0,0.3573066790898641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,float16,0,0.6350719928741455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,fp8,0,0.35099732875823975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,64,128,1,fp8,fp8,0,0.46406932671864826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,fp8,0,0.629525343577067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,float16,0,0.3588106632232666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,64,0,1,fp8,fp8,0,0.5747466484705607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,float16,0,0.6358559926350912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,fp8,0,0.3578559954961141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,64,128,1,fp8,fp8,0,0.4657866557439168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,float16,0,0.34855465094248456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,fp8,0,0.6329813400904337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,float16,0,0.21525333325068155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,64,0,1,fp8,fp8,0,0.5767413377761841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,fp8,0,0.2196000019709269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,64,128,1,fp8,fp8,0,0.2834933400154114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,fp8,0,0.35079999764760333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,64,0,1,fp8,fp8,0,0.32897599538167316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,64,128,1,float16,float16,0,0.20247467358907065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,64,0,1,float16,float16,0,0.32872533798217773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,64,128,1,float16,fp8,0,0.19979200760523477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,64,128,1,fp8,fp8,0,0.26234666506449383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,64,0,1,float16,fp8,0,0.3272320032119751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,64,0,1,fp8,fp8,0,0.30846933523813885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,float16,0,0.2032159964243571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,float16,0,0.32735999425252277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,fp8,0,0.2009119987487793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,64,128,1,fp8,fp8,0,0.2649173339207967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,fp8,0,0.32604267199834186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,64,0,1,fp8,fp8,0,0.30985067288080853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,float16,0,0.20408000548680624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,float16,0,0.32969067494074505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,fp8,0,0.2018186648686727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,64,128,1,fp8,fp8,0,0.2648693323135376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,fp8,0,0.3287946581840515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,64,0,1,fp8,fp8,0,0.31229867537816364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,float16,0,0.20434133211771646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,float16,0,0.3317546645800273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,fp8,0,0.20402665932973227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,64,128,1,fp8,fp8,0,0.2664159933725993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,fp8,0,0.3304640054702759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,64,0,1,fp8,fp8,0,0.3118720054626465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,float16,0,0.15017066399256387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,float16,0,0.21313599745432535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,fp8,0,0.1506666640440623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,64,128,1,fp8,fp8,0,0.18609599272410074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,fp8,0,0.21202667554219565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,64,0,1,fp8,fp8,0,0.2002133329709371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,64,128,1,float16,float16,0,0.14662399888038635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,64,0,1,float16,float16,0,0.20964266856511435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,64,128,1,float16,fp8,0,0.14727999766667685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,64,128,1,fp8,fp8,0,0.17855999867121378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,64,0,1,float16,fp8,0,0.20854399601618448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,64,0,1,fp8,fp8,0,0.197818656762441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,float16,0,0.14643733700116476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,float16,0,0.20898133516311646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,fp8,0,0.14671466747919717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,64,128,1,fp8,fp8,0,0.17830399672190347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,fp8,0,0.2084640065828959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,64,0,1,fp8,fp8,0,0.19854400555292764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,fp8,0,0.2098346749941508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,float16,0,0.14826666315396628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,float16,0,0.14703466494878134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,float16,0,0.20970666408538818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,fp8,0,0.14774933457374573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,64,128,1,fp8,fp8,0,0.17747733990351358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,64,0,1,fp8,fp8,0,0.19775466124216715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,float16,0,0.21191465854644775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,fp8,0,0.1493333379427592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,64,128,1,fp8,fp8,0,0.18300267060597739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,fp8,0,0.20941867431004843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,64,0,1,fp8,fp8,0,0.19934932390848795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,64,128,1,float16,float16,0,3.4014291763305664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,64,128,1,float16,fp8,0,3.3342612584431968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,float16,0,3.380181312561035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,64,0,1,float16,float16,0,4.9737599690755205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,64,128,1,fp8,fp8,0,4.355962753295898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,64,0,1,fp8,fp8,0,4.54307746887207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,64,0,1,float16,fp8,0,4.972544034322103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,float16,0,4.967743873596191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,fp8,0,3.345424016316732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,64,128,1,fp8,fp8,0,4.389882723490397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,float16,0,3.4071998596191406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,fp8,0,3.3854026794433594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,fp8,0,4.914501190185547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,64,0,1,fp8,fp8,0,4.632266680399577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,float16,0,5.0216318766276045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,64,128,1,fp8,fp8,0,4.460405349731445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,fp8,0,4.980501174926758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,float16,0,3.4684747060139975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,fp8,0,3.41266663869222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,64,0,1,fp8,fp8,0,4.625232060750325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,float16,0,5.029589335123698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,64,128,1,fp8,fp8,0,4.5516001383463545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,float16,0,1.900810718536377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,fp8,0,1.8808053334554036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,fp8,0,4.991648038228353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,float16,0,2.682671864827474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,64,0,1,fp8,fp8,0,4.746383984883626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,64,128,1,fp8,fp8,0,2.4600693384806314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,fp8,0,2.6829280853271484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,64,128,1,float16,float16,0,1.6703839302062988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,64,128,1,float16,fp8,0,1.652623971303304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,64,0,1,fp8,fp8,0,2.541205406188965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,64,0,1,float16,float16,0,2.4639786084493003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,64,128,1,fp8,fp8,0,2.184037367502848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,64,0,1,fp8,fp8,0,2.270751953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,float16,0,1.6846933364868164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,64,0,1,float16,fp8,0,2.403173287709554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,fp8,0,1.674517313639323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,float16,0,2.4611733754475913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,64,128,1,fp8,fp8,0,2.19378662109375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,float16,0,1.7066879272460938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,64,0,1,fp8,fp8,0,2.2879625956217446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,fp8,0,2.4573280016581216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,fp8,0,1.6631147066752117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,float16,0,2.4704160690307617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,64,128,1,fp8,fp8,0,2.2030399640401206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,float16,0,1.7151039441426594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,fp8,0,2.448527971903483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,64,0,1,fp8,fp8,0,2.2837653160095215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,fp8,0,1.7013707160949707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,float16,0,2.511349360148112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,float16,0,0.9450559616088867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,64,128,1,fp8,fp8,0,2.22707192103068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,fp8,0,0.9401760101318359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,fp8,0,2.469226678212484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,float16,0,1.337813377380371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,64,0,1,fp8,fp8,0,2.3275359471639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,fp8,0,1.3430293401082356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,64,128,1,fp8,fp8,0,1.2281920115152996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,64,0,1,fp8,fp8,0,1.2621066570281982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,64,128,1,float16,float16,0,0.8458346525828043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,64,128,1,float16,fp8,0,0.8341866334279379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,64,0,1,float16,float16,0,1.2468266487121582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,64,128,1,fp8,fp8,0,1.102234681447347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,64,0,1,float16,fp8,0,1.2283146381378174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,64,0,1,fp8,fp8,0,1.15066130956014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,float16,0,0.8537173271179199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,float16,0,1.2464319864908855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,fp8,0,0.8365279833475748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,64,128,1,fp8,fp8,0,1.106496016184489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,fp8,0,1.2323893706003826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,float16,0,1.2581706841786702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,64,0,1,fp8,fp8,0,1.1547253131866455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,float16,0,0.85807998975118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,fp8,0,0.8446186383565267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,64,128,1,fp8,fp8,0,1.1112533410390217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,64,0,1,fp8,fp8,0,1.1541279951731365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,fp8,0,1.2342666784922283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,float16,0,0.8673173586527506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,float16,0,1.2650667031606038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,fp8,0,0.856655995051066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,64,128,1,fp8,fp8,0,1.12718399365743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,float16,0,0.4875626564025879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,fp8,0,1.247114658355713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,64,0,1,fp8,fp8,0,1.1657013098398845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,float16,0,0.6848639647165934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,fp8,0,0.4924480120340983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,64,128,1,fp8,fp8,0,0.6430186827977499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,fp8,0,0.6911679903666178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,64,0,1,fp8,fp8,0,0.6558239857355753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,64,128,1,float16,float16,0,0.44653864701588947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,64,128,1,float16,fp8,0,0.437824010848999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,64,0,1,float16,float16,0,0.6454453468322754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,64,128,1,fp8,fp8,0,0.5760266780853271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,float16,0,0.6465813318888346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,fp8,0,0.4402720133463542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,64,0,1,float16,fp8,0,0.6392480134963989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,64,0,1,fp8,fp8,0,0.5967520078023275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,float16,0,0.44894933700561523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,64,128,1,fp8,fp8,0,0.5819199879964193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,fp8,0,0.6418346563975016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,64,0,1,fp8,fp8,0,0.5963199933369955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,float16,0,0.4504479964574178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,float16,0,0.6517279942830404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,fp8,0,0.44730134805043537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,64,128,1,fp8,fp8,0,0.5888426701227824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,fp8,0,0.6425173282623291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,float16,0,0.453546682993571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,fp8,0,0.6505226691563925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,64,0,1,fp8,fp8,0,0.6019466718037924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,float16,0,0.6487253506978353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,fp8,0,0.4496533473332723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,64,128,1,fp8,fp8,0,0.5928746859232584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,float16,0,0.26339733600616455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,64,0,1,fp8,fp8,0,0.6069120168685913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,float16,0,0.3667519887288411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,fp8,0,0.2681120038032532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,64,128,1,fp8,fp8,0,0.34543466567993164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,64,0,1,float16,float16,0,0.34646932284037274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,fp8,0,0.36691733201344806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,64,128,1,fp8,fp8,0,0.3178559939066569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,64,0,1,fp8,fp8,0,0.34227200349171955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,64,128,1,float16,float16,0,0.24355733394622803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,64,128,1,float16,fp8,0,0.23993066946665445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,64,0,1,fp8,fp8,0,0.3129066626230876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,64,0,1,float16,fp8,0,0.34166399637858075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,float16,0,0.24462932348251343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,float16,0,0.3502346674601237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,fp8,0,0.24088533719380698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,fp8,0,0.34443732102711994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,64,128,1,fp8,fp8,0,0.31860266129175824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,64,0,1,fp8,fp8,0,0.31381332874298096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,float16,0,0.24438399076461792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,float16,0,0.3494453430175781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,fp8,0,0.24288533131281534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,64,128,1,fp8,fp8,0,0.32080533107121784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,fp8,0,0.3463946580886841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,64,0,1,fp8,fp8,0,0.31597334146499634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,float16,0,0.24753600358963013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,float16,0,0.3489706516265869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,fp8,0,0.246288001537323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,64,128,1,fp8,fp8,0,0.3229706684748332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,fp8,0,0.3490399916966756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,64,0,1,fp8,fp8,0,0.31892265876134235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,float16,0,0.15294399857521057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,fp8,0,0.154341330130895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,float16,0,0.196015993754069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,fp8,0,0.19833600521087646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,64,128,1,fp8,fp8,0,0.20133866866429648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,64,0,1,fp8,fp8,0,0.19056000312169394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,64,128,1,float16,float16,0,0.141184002161026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,64,0,1,float16,float16,0,0.18389334281285605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,64,128,1,float16,fp8,0,0.14139733711878458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,64,128,1,fp8,fp8,0,0.18419732650121054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,64,0,1,float16,fp8,0,0.18276800711949667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,64,0,1,fp8,fp8,0,0.17538134256998697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,float16,0,0.1420693298180898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,float16,0,0.1849600076675415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,fp8,0,0.14109333356221518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,64,128,1,fp8,fp8,0,0.1848586599032084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,fp8,0,0.184063990910848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,64,0,1,fp8,fp8,0,0.17575999101003012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,float16,0,0.14260799686113992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,float16,0,0.18491200606028238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,fp8,0,0.14110400279362997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,64,128,1,fp8,fp8,0,0.18574933211008707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,fp8,0,0.18427733580271402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,64,0,1,fp8,fp8,0,0.17630932728449503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,float16,0,0.14458133776982626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,float16,0,0.1856586734453837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,fp8,0,0.14312000075976053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,fp8,0,0.10932266712188721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,64,128,1,fp8,fp8,0,0.1883093317349752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,fp8,0,0.1846133271853129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,64,0,1,fp8,fp8,0,0.1782346765200297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,float16,0,0.10759466886520386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,float16,0,0.127920001745224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,64,128,1,fp8,fp8,0,0.1332533359527588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,fp8,0,0.12813333670298258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,64,0,1,fp8,fp8,0,0.12131200234095256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,64,128,1,float16,float16,0,0.10391466816266377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,64,0,1,float16,float16,0,0.12478400270144145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,64,128,1,float16,fp8,0,0.10426132877667744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,64,128,1,fp8,fp8,0,0.12499733765920003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,64,0,1,float16,fp8,0,0.12538133064905801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,64,0,1,fp8,fp8,0,0.11843732992808025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,float16,0,0.10367467006047566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,float16,0,0.12531200051307678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,fp8,0,0.10414933164914449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,64,128,1,fp8,fp8,0,0.1251359979311625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,fp8,0,0.1251359979311625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,64,0,1,fp8,fp8,0,0.11838933825492859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,float16,0,0.10409599542617798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,float16,0,0.1239520013332367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,fp8,0,0.10365866621335347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,64,128,1,fp8,fp8,0,0.12712533275286356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,fp8,0,0.12588799993197122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,64,0,1,fp8,fp8,0,0.11944533387819926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,float16,0,0.10419733325640361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,float16,0,0.12544533610343933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,fp8,0,0.10410133004188538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,64,128,1,fp8,fp8,0,0.12838932871818542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,fp8,0,0.12475732962290446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,64,0,1,fp8,fp8,0,0.11896533767382304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,64,128,1,float16,float16,0,2.515018622080485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,64,128,1,float16,fp8,0,2.485957304636637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,64,0,1,float16,float16,0,3.2313919067382812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,64,128,1,fp8,fp8,0,3.2188053131103516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,64,0,1,fp8,fp8,0,2.923930803934733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,float16,0,2.5075467427571616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,64,0,1,float16,fp8,0,3.2040799458821616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,fp8,0,2.4644959767659507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,float16,0,3.2292000452677407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,64,128,1,fp8,fp8,0,3.254309336344401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,fp8,0,3.182117462158203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,64,0,1,fp8,fp8,0,2.9867680867513022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,float16,0,2.5191359519958496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,fp8,0,2.4814613660176597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,float16,0,3.237914721171061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,64,128,1,fp8,fp8,0,3.274709383646647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,float16,0,2.544314702351888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,fp8,0,3.217754681905111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,64,0,1,fp8,fp8,0,2.992549260457357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,float16,0,3.284634590148926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,fp8,0,2.5163413683573403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,float16,0,1.4190773963928223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,fp8,0,1.3997759819030762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,64,128,1,fp8,fp8,0,3.3257118860880532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,float16,0,1.7733866373697917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,64,0,1,fp8,fp8,0,3.044912020365397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,fp8,0,3.2373758951822915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,64,128,1,fp8,fp8,0,1.8153120676676433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,fp8,0,1.7726507186889648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,64,0,1,fp8,fp8,0,1.6805920600891113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,64,128,1,float16,float16,0,1.24836270014445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,64,128,1,float16,fp8,0,1.2244959672292073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,64,0,1,float16,float16,0,1.6113972663879395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,64,0,1,fp8,fp8,0,1.4685920079549153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,64,128,1,fp8,fp8,0,1.613386631011963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,64,0,1,float16,fp8,0,1.58515199025472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,float16,0,1.266858657201131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,fp8,0,1.2354826927185059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,float16,0,1.629642645517985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,64,128,1,fp8,fp8,0,1.6244746843973796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,fp8,0,1.5959466298421223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,64,0,1,fp8,fp8,0,1.4859573046366374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,float16,0,1.2605013052622478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,float16,0,1.6217600504557292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,fp8,0,1.2437439759572346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,64,128,1,fp8,fp8,0,1.6358879407246907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,fp8,0,1.6160106658935547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,64,0,1,fp8,fp8,0,1.4893706639607747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,float16,0,1.274821360905965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,float16,0,1.635818640391032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,fp8,0,1.2581813335418701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,64,128,1,fp8,fp8,0,1.6561813354492188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,float16,0,0.7137280305226644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,fp8,0,1.6237759590148926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,64,0,1,fp8,fp8,0,1.5111467043558757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,float16,0,0.8933173020680746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,fp8,0,0.7100160121917725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,64,128,1,fp8,fp8,0,0.9178720315297445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,fp8,0,0.8891200224558512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,64,0,1,fp8,fp8,0,0.840229352315267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,64,128,1,float16,float16,0,0.6431626478830973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,64,0,1,float16,float16,0,0.8230773607889811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,64,128,1,float16,fp8,0,0.6286666790644327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,64,128,1,fp8,fp8,0,0.8289439678192139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,64,0,1,float16,fp8,0,0.8106026649475098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,64,0,1,fp8,fp8,0,0.7528266906738281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,float16,0,0.6428906520207723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,fp8,0,0.6363733212153116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,float16,0,0.8263200124104818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,64,128,1,fp8,fp8,0,0.8359253406524658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,fp8,0,0.8172693252563477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,64,0,1,fp8,fp8,0,0.7555200258890787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,float16,0,0.6463786760965983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,float16,0,0.8275199731191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,fp8,0,0.6386346817016602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,64,128,1,fp8,fp8,0,0.8478346665700277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,fp8,0,0.8240106900533041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,64,0,1,fp8,fp8,0,0.7634613513946533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,float16,0,0.6518986622492472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,float16,0,0.8346292972564697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,fp8,0,0.644597331682841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,64,128,1,fp8,fp8,0,0.8477706909179688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,fp8,0,0.8304959932963053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,float16,0,0.371509313583374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,64,0,1,fp8,fp8,0,0.7703839937845866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,float16,0,0.465280016263326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,fp8,0,0.37373332182566327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,64,128,1,fp8,fp8,0,0.48420266310373944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,fp8,0,0.4671039978663127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,64,0,1,fp8,fp8,0,0.43246400356292725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,64,128,1,float16,float16,0,0.33931199709574383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,64,0,1,float16,float16,0,0.43243201573689777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,64,128,1,float16,fp8,0,0.33432531356811523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,64,128,1,fp8,fp8,0,0.4404640197753906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,64,0,1,float16,fp8,0,0.4288746515909831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,64,0,1,fp8,fp8,0,0.38946131865183514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,float16,0,0.34010132153828937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,float16,0,0.4326293468475342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,fp8,0,0.3368106683095296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,64,128,1,fp8,fp8,0,0.4432479937871297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,fp8,0,0.4288160006205241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,64,0,1,fp8,fp8,0,0.3919359842936198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,float16,0,0.3421066602071126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,float16,0,0.43485331535339355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,fp8,0,0.34067201614379883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,64,128,1,fp8,fp8,0,0.444543997446696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,fp8,0,0.431877334912618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,64,0,1,fp8,fp8,0,0.39375468095143634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,float16,0,0.345578670501709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,float16,0,0.43853334585825604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,fp8,0,0.34271466732025146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,64,128,1,fp8,fp8,0,0.4498026768366496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,fp8,0,0.4374026854832967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,64,0,1,fp8,fp8,0,0.3991893529891968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,float16,0,0.20389866828918457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,float16,0,0.2515733242034912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,fp8,0,0.20623467365900675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,64,128,1,fp8,fp8,0,0.2675413290659587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,fp8,0,0.2536426583925883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,64,0,1,fp8,fp8,0,0.2345973253250122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,64,128,1,float16,float16,0,0.18770132462183634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,64,0,1,float16,float16,0,0.22645332415898642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,64,128,1,float16,fp8,0,0.18478399515151978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,64,128,1,fp8,fp8,0,0.24616533517837524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,64,0,1,float16,fp8,0,0.22157333294550577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,64,0,1,fp8,fp8,0,0.21075199047724405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,float16,0,0.18845333655675253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,float16,0,0.22659200429916382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,fp8,0,0.1864479978879293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,64,128,1,fp8,fp8,0,0.24791999657948813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,fp8,0,0.22353599468866983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,64,0,1,fp8,fp8,0,0.21293334166208902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,float16,0,0.18917866547902426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,float16,0,0.22736533482869467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,fp8,0,0.18778133392333984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,64,128,1,fp8,fp8,0,0.24890132745107016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,fp8,0,0.22401066621144614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,64,0,1,fp8,fp8,0,0.21546665827433267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,float16,0,0.19078399737675986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,float16,0,0.22805333137512207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,fp8,0,0.19262399276097616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,64,128,1,fp8,fp8,0,0.25151999791463214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,fp8,0,0.2284640073776245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,64,0,1,fp8,fp8,0,0.21751999855041504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,float16,0,0.1216319998105367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,float16,0,0.1386293371518453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,fp8,0,0.12255466977755229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,64,128,1,fp8,fp8,0,0.16114133596420288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,fp8,0,0.13989866773287454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,64,0,1,fp8,fp8,0,0.13542399803797403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,64,128,1,float16,float16,0,0.10980799794197083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,64,0,1,float16,float16,0,0.1283626655737559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,64,128,1,float16,fp8,0,0.10975999633471172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,64,128,1,fp8,fp8,0,0.140773336092631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,64,0,1,float16,fp8,0,0.12727999687194824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,float16,0,0.11168533563613892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,64,0,1,fp8,fp8,0,0.12025066216786702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,float16,0,0.12851199507713318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,fp8,0,0.10939733187357585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,64,128,1,fp8,fp8,0,0.142277330160141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,fp8,0,0.12754666805267334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,64,0,1,fp8,fp8,0,0.12110400199890137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,float16,0,0.11170132954915364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,float16,0,0.12892267107963562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,fp8,0,0.11090667049090068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,64,128,1,fp8,fp8,0,0.1456106702486674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,fp8,0,0.12838400403658548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,64,0,1,fp8,fp8,0,0.12315199772516887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,float16,0,0.1128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,float16,0,0.130021333694458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,fp8,0,0.11422933141390483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,64,128,1,fp8,fp8,0,0.14897599816322327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,fp8,0,0.12846933801968893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,64,0,1,fp8,fp8,0,0.1253546675046285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,float16,0,0.08673600355784099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,float16,0,0.09356266260147095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,fp8,0,0.08730133374532063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,64,128,1,fp8,fp8,0,0.10986666878064473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,fp8,0,0.0939520001411438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,64,0,1,fp8,fp8,0,0.08957866827646892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,64,128,1,float16,float16,0,0.08261333405971527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,64,0,1,float16,float16,0,0.0918986697991689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,64,128,1,float16,fp8,0,0.08331199983755748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,64,128,1,fp8,fp8,0,0.0999679962793986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,64,0,1,float16,fp8,0,0.09191999832789104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,64,0,1,fp8,fp8,0,0.08746666709582011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,float16,0,0.08278400202592213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,float16,0,0.09158399701118469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,fp8,0,0.08290666838486989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,64,128,1,fp8,fp8,0,0.10030933221181233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,fp8,0,0.09225599964459737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,64,0,1,fp8,fp8,0,0.08806399504343669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,64,0,1,fp8,fp8,0,0.08802133798599243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,float16,0,0.08298133313655853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,float16,0,0.09117866555849712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,fp8,0,0.08286400139331818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,64,128,1,fp8,fp8,0,0.10064533352851868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,fp8,0,0.09148266911506653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,float16,0,0.08292266726493835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,float16,0,0.09195733070373535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,fp8,0,0.08332266906897227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,64,128,1,fp8,fp8,0,0.10066133737564087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,fp8,0,0.09139200051625569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,64,0,1,fp8,fp8,0,0.08738133311271667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,64,128,1,float16,float16,0,3.0249547958374023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,64,128,1,float16,fp8,0,3.00983460744222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,64,0,1,float16,float16,0,3.44484806060791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,64,0,1,fp8,fp8,0,3.0932480494181314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,64,0,1,float16,fp8,0,3.458693186442057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,64,128,1,fp8,fp8,0,3.962736129760742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,float16,0,3.0389601389567056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,float16,0,3.480031967163086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,fp8,0,3.0561440785725913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,64,128,1,fp8,fp8,0,3.9865598678588867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,64,0,1,fp8,fp8,0,3.1026719411214194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,fp8,0,3.4709494908650718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,float16,0,3.052090644836426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,float16,0,3.5022665659586587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,fp8,0,3.048229217529297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,64,128,1,fp8,fp8,0,4.020170529683431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,64,0,1,fp8,fp8,0,3.1270453135172525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,fp8,0,3.4796374638875327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,float16,0,3.1307414372762046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,fp8,0,3.127610524495443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,float16,0,3.582394599914551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,64,128,1,fp8,fp8,0,4.060175895690918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,float16,0,1.7378506660461426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,fp8,0,3.581823984781901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,fp8,0,1.7007039388020833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,float16,0,1.957274595896403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,64,0,1,fp8,fp8,0,3.1708319981892905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,64,128,1,fp8,fp8,0,2.166602611541748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,fp8,0,1.9262879689534504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,64,0,1,fp8,fp8,0,1.718506654103597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,64,128,1,float16,float16,0,1.5125813484191895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,64,0,1,float16,float16,0,1.729621410369873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,64,128,1,float16,fp8,0,1.5134453773498535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,64,128,1,fp8,fp8,0,1.989232063293457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,64,0,1,float16,fp8,0,1.7342185974121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,64,0,1,fp8,fp8,0,1.5390346844991047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,float16,0,1.5223946571350098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,float16,0,1.745973269144694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,fp8,0,1.5209919611612956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,64,0,1,fp8,fp8,0,1.5528799692789714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,fp8,0,1.7398932774861653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,64,128,1,fp8,fp8,0,2.005861282348633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,float16,0,1.5286399523417156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,float16,0,1.7452853520711262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,fp8,0,1.525594711303711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,fp8,0,1.7428266207377117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,64,0,1,fp8,fp8,0,1.5650399525960286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,64,128,1,fp8,fp8,0,2.0132479667663574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,float16,0,1.5717333157857258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,float16,0,1.7942026456197102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,fp8,0,1.5721333821614583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,fp8,0,1.790511926015218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,64,128,1,fp8,fp8,0,2.0281386375427246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,float16,0,0.8778826395670573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,64,0,1,fp8,fp8,0,1.582047939300537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,float16,0,0.9887093702952067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,fp8,0,0.8584319750467936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,64,128,1,fp8,fp8,0,1.0929919878641765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,fp8,0,0.9726933638254801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,64,0,1,fp8,fp8,0,0.859328031539917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,64,128,1,float16,float16,0,0.7716853618621826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,64,0,1,float16,float16,0,0.8817120393117269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,64,128,1,float16,fp8,0,0.7725120385487875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,64,0,1,float16,fp8,0,0.8822666803995768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,64,128,1,fp8,fp8,0,1.0162346363067627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,64,0,1,fp8,fp8,0,0.7857386271158854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,float16,0,0.7750826676686605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,float16,0,0.884335994720459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,fp8,0,0.7763093312581381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,64,128,1,fp8,fp8,0,1.0132426420847576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,fp8,0,0.8830239772796631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,64,0,1,fp8,fp8,0,0.7827040354410807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,float16,0,0.7809279759724935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,float16,0,0.8896586894989014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,fp8,0,0.7810986836751302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,64,128,1,fp8,fp8,0,1.0136586825052898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,fp8,0,0.8905173142751058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,64,0,1,fp8,fp8,0,0.7876266638437907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,float16,0,0.7862079938252767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,float16,0,0.8955199718475342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,fp8,0,0.7902080217997233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,64,128,1,fp8,fp8,0,1.033999999364217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,fp8,0,0.8978079954783121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,float16,0,0.45129601160685223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,64,0,1,fp8,fp8,0,0.7988800207773844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,float16,0,0.5084746678670248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,fp8,0,0.44366931915283203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,64,128,1,float16,float16,0,0.4023040135701497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,64,128,1,fp8,fp8,0,0.5573600133260092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,fp8,0,0.5034613211949667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,64,0,1,fp8,fp8,0,0.43882131576538086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,64,0,1,float16,float16,0,0.4566986560821533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,64,128,1,float16,fp8,0,0.4004053274790446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,64,128,1,fp8,fp8,0,0.5241119861602783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,64,0,1,float16,fp8,0,0.45664000511169434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,64,0,1,fp8,fp8,0,0.4078506628672282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,64,128,1,fp8,fp8,0,0.527509331703186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,float16,0,0.40216533342997235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,float16,0,0.4607146581013997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,float16,0,0.4034880002339681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,float16,0,0.4606506824493408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,fp8,0,0.4030933380126953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,fp8,0,0.45896001656850177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,64,0,1,fp8,fp8,0,0.40784533818562824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,fp8,0,0.40620799859364826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,64,128,1,fp8,fp8,0,0.527946670850118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,fp8,0,0.4639306863149007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,64,0,1,fp8,fp8,0,0.40801068147023517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,float16,0,0.40806933244069415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,float16,0,0.46425068378448486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,fp8,0,0.4076639811197917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,64,128,1,fp8,fp8,0,0.529258648554484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,fp8,0,0.46428267161051434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,float16,0,0.23923200368881226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,64,0,1,fp8,fp8,0,0.4132266839345296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,float16,0,0.2698933283487956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,fp8,0,0.2365493377049764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,64,128,1,fp8,fp8,0,0.2991679906845093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,fp8,0,0.26636266708374023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,64,0,1,fp8,fp8,0,0.22834134101867676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,64,128,1,float16,float16,0,0.2129759987195333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,64,0,1,float16,float16,0,0.24546666940053305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,64,128,1,float16,fp8,0,0.21555733680725098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,64,128,1,fp8,fp8,0,0.2822773257891337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,64,0,1,float16,fp8,0,0.24514132738113403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,64,0,1,fp8,fp8,0,0.2124320069948832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,float16,0,0.21617066860198975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,float16,0,0.24357332785924277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,fp8,0,0.21684267123540243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,64,128,1,fp8,fp8,0,0.2851360042889913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,fp8,0,0.2452639937400818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,64,0,1,fp8,fp8,0,0.2138986587524414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,float16,0,0.21810134251912436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,float16,0,0.24741333723068237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,fp8,0,0.2183893322944641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,64,128,1,fp8,fp8,0,0.28760000069936115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,fp8,0,0.24766399463017783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,64,0,1,fp8,fp8,0,0.2158506711324056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,float16,0,0.21817066272099814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,float16,0,0.24836800495783487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,fp8,0,0.22040534019470215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,64,128,1,fp8,fp8,0,0.28299200534820557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,fp8,0,0.246671994527181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,64,0,1,fp8,fp8,0,0.21606934070587158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,float16,0,0.13361066579818726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,float16,0,0.14324266711870828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,fp8,0,0.13079999883969626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,64,128,1,fp8,fp8,0,0.16852800051371256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,fp8,0,0.14201066891352335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,64,0,1,fp8,fp8,0,0.12652800480524698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,64,128,1,float16,float16,0,0.11904000242551167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,64,0,1,float16,float16,0,0.12923733393351236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,64,128,1,float16,fp8,0,0.11962133646011353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,64,128,1,fp8,fp8,0,0.15963199734687805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,64,0,1,float16,fp8,0,0.1283519963423411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,64,0,1,fp8,fp8,0,0.11645866433779399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,64,0,1,fp8,fp8,0,0.1167733371257782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,float16,0,0.12132267157236735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,float16,0,0.12993599971135458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,fp8,0,0.11980266372362773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,64,128,1,fp8,fp8,0,0.16003732879956564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,fp8,0,0.12993599971135458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,float16,0,0.12044266859690349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,float16,0,0.13035200039545694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,fp8,0,0.12219199538230896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,64,128,1,fp8,fp8,0,0.16079466541608176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,fp8,0,0.13223999738693237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,64,0,1,fp8,fp8,0,0.11758400003115337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,float16,0,0.1221386690934499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,float16,0,0.13099732995033264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,fp8,0,0.12225600083669026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,64,128,1,fp8,fp8,0,0.16286399960517883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,fp8,0,0.1318933367729187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,64,0,1,fp8,fp8,0,0.1197760005791982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,float16,0,0.07906133433183034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,float16,0,0.0788373351097107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,fp8,0,0.07851199805736542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,64,128,1,fp8,fp8,0,0.10128000378608704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,fp8,0,0.07890133559703827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,64,0,1,fp8,fp8,0,0.07261333366235097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,64,128,1,float16,float16,0,0.06980800131956737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,64,0,1,float16,float16,0,0.07275199890136719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,64,128,1,float16,fp8,0,0.06981866558392842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,64,128,1,fp8,fp8,0,0.08789867162704468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,64,0,1,float16,fp8,0,0.07307733098665874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,64,0,1,fp8,fp8,0,0.06699199974536896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,float16,0,0.07027199864387512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,float16,0,0.07367999851703644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,fp8,0,0.07002133131027222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,64,128,1,fp8,fp8,0,0.08932800094286601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,fp8,0,0.07375999788443248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,64,0,1,fp8,fp8,0,0.06609599788983662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,float16,0,0.07028799752394359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,float16,0,0.07484800120194753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,fp8,0,0.07157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,64,128,1,fp8,fp8,0,0.09040000041325887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,fp8,0,0.07534400125344594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,64,0,1,fp8,fp8,0,0.06780800223350525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,float16,0,0.07247999807198842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,float16,0,0.07458133498827617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,float16,0,0.05082666873931885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,fp8,0,0.07167999943097432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,64,128,1,fp8,fp8,0,0.09424533446629842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,fp8,0,0.07428266604741414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,64,0,1,fp8,fp8,0,0.06810133159160614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,float16,0,0.04961066444714864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,fp8,0,0.050479998191197716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,64,128,1,fp8,fp8,0,0.060405333836873375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,fp8,0,0.05131733417510986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,64,0,1,fp8,fp8,0,0.04624533156553904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,64,128,1,float16,float16,0,0.04689066608746847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,64,0,1,float16,float16,0,0.04795200129350027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,64,128,1,float16,fp8,0,0.04775999983151754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,64,128,1,fp8,fp8,0,0.057904000083605446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,64,0,1,float16,fp8,0,0.047541335225105286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,64,0,1,fp8,fp8,0,0.04446933170159658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,float16,0,0.04773866633574168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,float16,0,0.04749333361784617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,fp8,0,0.04804266492525736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,64,128,1,fp8,fp8,0,0.05795200169086456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,64,0,1,fp8,fp8,0,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,fp8,0,0.04828799764315287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,float16,0,0.04781333108743032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,float16,0,0.0488319993019104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,float16,0,0.048538664976755776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,fp8,0,0.047770669062932335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,64,128,1,fp8,fp8,0,0.05843733251094818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,fp8,0,0.04837866624196371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,64,0,1,fp8,fp8,0,0.044922664761543274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,float16,0,0.04896533489227295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,fp8,0,0.04921066761016846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,64,128,1,fp8,fp8,0,0.05890666445096334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,fp8,0,0.04854399959246317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,64,0,1,fp8,fp8,0,0.045696000258127846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,1,64,128,1,float16,float16,0,2.8521547317504883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,1,64,0,1,float16,float16,0,2.8318773905436196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,1,64,128,1,float16,fp8,0,2.8554986317952475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,1,64,0,1,float16,fp8,0,2.8279574712117515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,1,64,0,1,fp8,fp8,0,2.4824372927347818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,1,64,128,1,fp8,fp8,0,3.7542505264282227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,2,64,128,1,float16,float16,0,2.885109265645345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,2,64,0,1,float16,float16,0,2.876986821492513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,2,64,128,1,float16,fp8,0,2.878053347269694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,2,64,0,1,fp8,fp8,0,2.5036746660868325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,2,64,0,1,float16,fp8,0,2.8687146504720054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,2,64,128,1,fp8,fp8,0,3.7607733408610025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,4,64,128,1,float16,float16,0,2.9157546361287436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,4,64,0,1,float16,float16,0,2.8905760447184243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,4,64,128,1,float16,fp8,0,2.8964319229125977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,4,64,0,1,float16,fp8,0,2.8542188008626304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,4,64,128,1,fp8,fp8,0,3.7770506540934243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,4,64,0,1,fp8,fp8,0,2.5202506383260093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,8,64,128,1,float16,float16,0,2.9846134185791016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,8,64,0,1,float16,float16,0,2.957119941711426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,8,64,128,1,float16,fp8,0,2.966106732686361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,8,64,0,1,float16,fp8,0,2.9543094635009766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,8,64,128,1,fp8,fp8,0,3.8415358861287436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,40,64,128,1,float16,float16,0,1.659114678700765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,40,64,0,1,float16,float16,0,1.6431840260823567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,8,64,0,1,fp8,fp8,0,2.580671946207682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,40,64,128,1,float16,fp8,0,1.627354621887207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,40,64,0,1,float16,fp8,0,1.6115093231201172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,40,64,128,1,fp8,fp8,0,2.0404906272888184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,40,64,0,1,fp8,fp8,0,1.4094079335530598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,1,64,128,1,float16,float16,0,1.4414560000101726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,1,64,0,1,float16,float16,0,1.4193387031555176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,1,64,128,1,float16,fp8,0,1.4328640302022297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,1,64,0,1,fp8,fp8,0,1.2436426480611165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,2,64,128,1,float16,float16,0,1.4443306922912598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,1,64,0,1,float16,fp8,0,1.4176054000854492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,1,64,128,1,fp8,fp8,0,1.8714507420857747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,2,64,0,1,float16,float16,0,1.4259360631306965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,2,64,128,1,float16,fp8,0,1.4504960378011067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,2,64,0,1,float16,fp8,0,1.422368049621582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,2,64,128,1,fp8,fp8,0,1.8875999450683594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,2,64,0,1,fp8,fp8,0,1.2537706693013508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,4,64,0,1,float16,float16,0,1.4328907330830891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,4,64,128,1,float16,float16,0,1.4475092887878418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,4,64,128,1,float16,fp8,0,1.4476265907287598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,4,64,0,1,float16,fp8,0,1.4388373692830403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,4,64,0,1,fp8,fp8,0,1.263760010401408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,4,64,128,1,fp8,fp8,0,1.8912426630655925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,8,64,128,1,float16,float16,0,1.4956053098042805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,8,64,0,1,float16,float16,0,1.4781600634257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,8,64,128,1,float16,fp8,0,1.4909547170003254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,8,64,0,1,float16,fp8,0,1.4744693438212078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,8,64,128,1,fp8,fp8,0,1.9183893203735352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,40,64,128,1,float16,float16,0,0.8344426949818929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,40,64,0,1,float16,float16,0,0.8246346314748129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,8,64,0,1,fp8,fp8,0,1.2893226941426594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,40,64,128,1,float16,fp8,0,0.8218932946523031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,40,64,128,1,fp8,fp8,0,1.0274453163146973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,40,64,0,1,float16,fp8,0,0.8099946975708008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,40,64,0,1,fp8,fp8,0,0.7036320368448893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,1,64,128,1,float16,float16,0,0.7348159948984782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,1,64,0,1,float16,float16,0,0.7200960318247477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,1,64,128,1,float16,fp8,0,0.7338399887084961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,1,64,128,1,fp8,fp8,0,0.9584533373514811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,1,64,0,1,float16,fp8,0,0.7231679757436117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,1,64,0,1,fp8,fp8,0,0.631279985109965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,2,64,128,1,float16,float16,0,0.7359306812286377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,2,64,0,1,float16,float16,0,0.7235626379648844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,2,64,128,1,float16,fp8,0,0.7363893191019694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,2,64,0,1,float16,fp8,0,0.7250879605611166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,2,64,128,1,fp8,fp8,0,0.9543466567993164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,2,64,0,1,fp8,fp8,0,0.6339840094248453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,4,64,128,1,float16,float16,0,0.7421546777089437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,4,64,0,1,float16,float16,0,0.7272853056589762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,4,64,128,1,float16,fp8,0,0.741429328918457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,4,64,128,1,fp8,fp8,0,0.9592586358388265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,4,64,0,1,float16,fp8,0,0.7288053035736084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,4,64,0,1,fp8,fp8,0,0.6343520085016886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,8,64,128,1,float16,float16,0,0.7492372989654541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,8,64,0,1,float16,float16,0,0.7353653113047282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,8,64,128,1,float16,fp8,0,0.7453333536783854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,8,64,128,1,fp8,fp8,0,0.971834659576416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,8,64,0,1,float16,fp8,0,0.7373332977294922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,40,64,128,1,float16,float16,0,0.432586669921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,8,64,0,1,fp8,fp8,0,0.647983988126119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,40,64,0,1,float16,float16,0,0.42496001720428467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,40,64,128,1,float16,fp8,0,0.4249866803487142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,40,64,128,1,fp8,fp8,0,0.5265119870503744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,40,64,0,1,float16,fp8,0,0.41674665609995526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,40,64,0,1,fp8,fp8,0,0.3594133456548055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,1,64,128,1,float16,float16,0,0.3800853490829468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,1,64,0,1,float16,float16,0,0.37142399946848553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,1,64,128,1,float16,fp8,0,0.3800160090128581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,1,64,128,1,fp8,fp8,0,0.49677332242329914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,1,64,0,1,float16,fp8,0,0.37254401048024494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,1,64,0,1,fp8,fp8,0,0.3282666603724162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,2,64,128,1,float16,float16,0,0.3834880193074544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,2,64,0,1,float16,float16,0,0.37438400586446124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,2,64,128,1,float16,fp8,0,0.3821973403294881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,2,64,128,1,fp8,fp8,0,0.49466665585835773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,2,64,0,1,float16,fp8,0,0.37428800264994305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,2,64,0,1,fp8,fp8,0,0.33009066184361774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,4,64,128,1,float16,float16,0,0.38471468289693195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,4,64,0,1,float16,float16,0,0.3766506512959798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,4,64,128,1,float16,fp8,0,0.385535995165507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,4,64,128,1,fp8,fp8,0,0.5010773340861002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,8,64,128,1,float16,fp8,0,0.3872586488723755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,4,64,0,1,float16,fp8,0,0.3768266836802165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,4,64,0,1,fp8,fp8,0,0.32917867104212445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,8,64,128,1,float16,float16,0,0.386735995610555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,8,64,0,1,float16,float16,0,0.3815786838531494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,8,64,128,1,fp8,fp8,0,0.5007946491241455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,8,64,0,1,float16,fp8,0,0.380570650100708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,40,64,128,1,float16,float16,0,0.22910932699839273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,40,64,0,1,fp8,fp8,0,0.19025067488352457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,8,64,0,1,fp8,fp8,0,0.3333599964777629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,40,64,0,1,float16,float16,0,0.22346667448679605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,40,64,128,1,float16,fp8,0,0.22527466217676798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,40,64,128,1,fp8,fp8,0,0.28229333957036334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,40,64,0,1,float16,fp8,0,0.22130666176478067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,1,64,128,1,float16,float16,0,0.2035199999809265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,1,64,0,1,float16,float16,0,0.19847466548283896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,1,64,128,1,float16,fp8,0,0.20451200008392334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,1,64,128,1,fp8,fp8,0,0.26867733399073285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,1,64,0,1,float16,fp8,0,0.19859200716018677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,1,64,0,1,fp8,fp8,0,0.17361599206924438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,2,64,128,1,float16,float16,0,0.2062399983406067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,2,64,0,1,float16,float16,0,0.2002453406651815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,2,64,128,1,float16,fp8,0,0.20567466815312704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,2,64,128,1,fp8,fp8,0,0.26782933870951336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,2,64,0,1,float16,fp8,0,0.20052266120910645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,2,64,0,1,fp8,fp8,0,0.17662400007247925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,4,64,128,1,float16,float16,0,0.20597867170969644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,4,64,0,1,float16,float16,0,0.20102399587631226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,4,64,128,1,float16,fp8,0,0.20645334323247275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,4,64,128,1,fp8,fp8,0,0.2683573365211487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,4,64,0,1,float16,fp8,0,0.2020053267478943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,4,64,0,1,fp8,fp8,0,0.17637866735458374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,8,64,128,1,float16,float16,0,0.20775999625523886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,8,64,0,1,float16,float16,0,0.20364266633987427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,8,64,128,1,float16,fp8,0,0.20756266514460245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,8,64,128,1,fp8,fp8,0,0.26734399795532227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,8,64,0,1,float16,fp8,0,0.20295999447504678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,8,64,0,1,fp8,fp8,0,0.17795199155807495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,40,64,128,1,float16,float16,0,0.12808000048001608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,40,64,0,1,float16,float16,0,0.12518933415412903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,40,64,128,1,float16,fp8,0,0.12691733241081238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,40,64,128,1,fp8,fp8,0,0.1604106624921163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,1,64,128,1,fp8,fp8,0,0.14940266807874045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,40,64,0,1,float16,fp8,0,0.12269866466522217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,40,64,0,1,fp8,fp8,0,0.10636799534161885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,1,64,128,1,float16,float16,0,0.11451199650764465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,1,64,0,1,float16,float16,0,0.10808533430099487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,1,64,128,1,float16,fp8,0,0.11450133721033733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,1,64,0,1,float16,fp8,0,0.1083679993947347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,1,64,0,1,fp8,fp8,0,0.09815999865531921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,2,64,128,1,float16,float16,0,0.11414399743080139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,2,64,0,1,float16,float16,0,0.1086186667283376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,2,64,128,1,float16,fp8,0,0.11428800225257874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,2,64,128,1,fp8,fp8,0,0.1502293348312378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,2,64,0,1,float16,fp8,0,0.10904000202814738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,2,64,0,1,fp8,fp8,0,0.09833066662152608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,4,64,128,1,float16,float16,0,0.11522133151690166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,4,64,0,1,float16,float16,0,0.11017066240310669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,8,64,0,1,float16,float16,0,0.11062933007876079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,4,64,128,1,float16,fp8,0,0.11595732967058818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,4,64,128,1,fp8,fp8,0,0.1508853336175283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,4,64,0,1,float16,fp8,0,0.10998400052388509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,4,64,0,1,fp8,fp8,0,0.09949866930643718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,8,64,128,1,float16,float16,0,0.11595199505488078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,8,64,128,1,float16,fp8,0,0.11678933103879292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,8,64,128,1,fp8,fp8,0,0.1529866655667623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,8,64,0,1,float16,fp8,0,0.11075199643770854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,8,64,0,1,fp8,fp8,0,0.10060266653696696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,40,64,128,1,float16,float16,0,0.07486400008201599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,40,64,0,1,float16,float16,0,0.06949333349863689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,40,64,128,1,float16,fp8,0,0.07381866872310638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,40,64,128,1,fp8,fp8,0,0.09603733817736308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,1,64,128,1,fp8,fp8,0,0.0853706697622935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,40,64,0,1,float16,fp8,0,0.06896533568700154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,40,64,0,1,fp8,fp8,0,0.06331199904282887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,1,64,128,1,float16,float16,0,0.06621333460013072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,1,64,0,1,float16,float16,0,0.06232533355553945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,1,64,128,1,float16,fp8,0,0.06612266600131989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,1,64,0,1,float16,fp8,0,0.06322133541107178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,1,64,0,1,fp8,fp8,0,0.05565333366394043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,2,64,128,1,float16,float16,0,0.06650133430957794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,2,64,0,1,float16,float16,0,0.06303466856479645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,2,64,128,1,float16,fp8,0,0.0664106657107671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,2,64,128,1,fp8,fp8,0,0.08348799745241801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,2,64,0,1,float16,fp8,0,0.06303466856479645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,2,64,0,1,fp8,fp8,0,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,4,64,128,1,float16,float16,0,0.06701866785685222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,4,64,0,1,float16,float16,0,0.06243200103441874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,4,64,128,1,float16,fp8,0,0.06753066678841908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,8,64,128,1,float16,fp8,0,0.06670933465162913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,4,64,128,1,fp8,fp8,0,0.0867039958635966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,4,64,0,1,float16,fp8,0,0.06353599826494853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,4,64,0,1,fp8,fp8,0,0.05724266668160757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,8,64,128,1,float16,float16,0,0.0673333356777827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,8,64,0,1,float16,float16,0,0.06397866706053416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,8,64,128,1,fp8,fp8,0,0.08616532882054646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,8,64,0,1,float16,fp8,0,0.06412266691525777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,8,64,0,1,fp8,fp8,0,0.05817066629727682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,40,64,128,1,float16,float16,0,0.04633066554864248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,40,64,0,1,float16,float16,0,0.04331733286380768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,40,64,128,1,float16,fp8,0,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,40,64,128,1,fp8,fp8,0,0.05612266560395559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,40,64,0,1,float16,fp8,0,0.04342933495839437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,40,64,0,1,fp8,fp8,0,0.03867200016975403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,1,64,128,1,float16,float16,0,0.044213334719340004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,1,64,0,1,float16,float16,0,0.04090133309364319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,1,64,128,1,float16,fp8,0,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,1,64,128,1,fp8,fp8,0,0.05349866549173991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,1,64,0,1,float16,fp8,0,0.0407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,1,64,0,1,fp8,fp8,0,0.03793599953254064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,2,64,128,1,float16,float16,0,0.04403733213742574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,2,64,0,1,float16,float16,0,0.04052799940109253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,2,64,128,1,float16,fp8,0,0.04410133262475332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,2,64,128,1,fp8,fp8,0,0.053632001082102455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,2,64,0,1,float16,fp8,0,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,2,64,0,1,fp8,fp8,0,0.037130666275819145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,4,64,128,1,float16,float16,0,0.04333333174387614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,4,64,0,1,float16,float16,0,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,4,64,128,1,float16,fp8,0,0.044581333796183266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,4,64,128,1,fp8,fp8,0,0.05332799752553304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,4,64,0,1,float16,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,4,64,0,1,fp8,fp8,0,0.03772799919048945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,8,64,128,1,float16,float16,0,0.0448586642742157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,8,64,0,1,float16,float16,0,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,8,64,128,1,float16,fp8,0,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,8,64,128,1,fp8,fp8,0,0.05478399991989136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,8,64,0,1,float16,fp8,0,0.042837331692377724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,8,64,0,1,fp8,fp8,0,0.038106667498747505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,40,64,128,1,float16,float16,0,0.03598399957021078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,40,64,0,1,float16,float16,0,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,40,64,128,1,float16,fp8,0,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,40,64,128,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,40,64,0,1,float16,fp8,0,0.03241066634654999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,40,64,0,1,fp8,fp8,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,1,64,128,1,float16,float16,0,0.034661332766215004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,1,64,0,1,float16,float16,0,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,1,64,128,1,float16,fp8,0,0.03382399926582972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,1,64,128,1,fp8,fp8,0,0.04072533299525579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,1,64,0,1,float16,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,1,64,0,1,fp8,fp8,0,0.027072000006834667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,2,64,128,1,float16,float16,0,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,2,64,0,1,float16,float16,0,0.028602667152881622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,2,64,128,1,float16,fp8,0,0.03449599941571554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,2,64,128,1,fp8,fp8,0,0.03993066648642222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,2,64,0,1,float16,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,2,64,0,1,fp8,fp8,0,0.02699733277161916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,4,64,128,1,float16,float16,0,0.03456533451875051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,4,64,0,1,float16,float16,0,0.030069333811601002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,4,64,128,1,float16,fp8,0,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,4,64,128,1,fp8,fp8,0,0.040463998913764954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,4,64,0,1,float16,fp8,0,0.02975466599067052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,4,64,0,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,8,64,128,1,float16,float16,0,0.03443199892838796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,8,64,0,1,float16,float16,0,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,8,64,128,1,float16,fp8,0,0.03430933256944021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,8,64,128,1,fp8,fp8,0,0.04110399881998698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,8,64,0,1,float16,fp8,0,0.030661332110563915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,8,64,0,1,fp8,fp8,0,0.028197333216667175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,1,64,0,1,float16,float16,0,1.1654613018035889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,1,64,128,1,float16,float16,0,1.208133300145467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,1,64,0,1,fp8,fp8,0,1.0708373387654622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,1,64,128,1,float16,fp8,0,1.2026560306549072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,1,64,0,1,float16,fp8,0,1.1655200322469075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,1,64,128,1,fp8,fp8,0,1.6421012878417969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,2,64,128,1,float16,float16,0,1.2116533120473225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,2,64,0,1,float16,float16,0,1.174832026163737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,2,64,128,1,float16,fp8,0,1.2085013389587402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,2,64,0,1,float16,fp8,0,1.175994634628296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,2,64,128,1,fp8,fp8,0,1.6485493977864583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,2,64,0,1,fp8,fp8,0,1.0809866587320964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,4,64,128,1,float16,float16,0,1.2222613493601482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,4,64,0,1,float16,float16,0,1.1829439798990886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,4,64,128,1,float16,fp8,0,1.2169919808705647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,4,64,128,1,fp8,fp8,0,1.6557440757751465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,4,64,0,1,float16,fp8,0,1.1843252976735432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,4,64,0,1,fp8,fp8,0,1.0917226473490398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,8,64,128,1,float16,float16,0,1.273738702138265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,8,64,0,1,float16,float16,0,1.237615982691447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,8,64,0,1,float16,fp8,0,1.2280159791310628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,8,64,128,1,float16,fp8,0,1.260101318359375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,8,64,128,1,fp8,fp8,0,1.674517313639323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,40,64,128,1,float16,float16,0,0.7172533671061198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,8,64,0,1,fp8,fp8,0,1.1096320152282715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,40,64,0,1,float16,float16,0,0.7059946854909261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,40,64,128,1,float16,fp8,0,0.7024853229522705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,40,64,128,1,fp8,fp8,0,0.8919626871744791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,40,64,0,1,float16,fp8,0,0.6887093385060629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,40,64,0,1,fp8,fp8,0,0.605674664179484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,1,64,128,1,float16,float16,0,0.6123573382695516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,1,64,0,1,float16,float16,0,0.5955040057500204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,1,64,128,1,float16,fp8,0,0.6104426781336466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,1,64,128,1,fp8,fp8,0,0.8288533687591553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,1,64,0,1,float16,fp8,0,0.5937013228734335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,1,64,0,1,fp8,fp8,0,0.5417973200480143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,2,64,128,1,float16,float16,0,0.6164746681849161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,2,64,0,1,float16,float16,0,0.600272019704183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,2,64,128,1,float16,fp8,0,0.61516801516215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,2,64,128,1,fp8,fp8,0,0.8289813200632731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,2,64,0,1,float16,fp8,0,0.6001866658528646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,2,64,0,1,fp8,fp8,0,0.5433066685994467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,4,64,128,1,float16,float16,0,0.6179999907811483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,4,64,0,1,float16,float16,0,0.6021386782328287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,4,64,128,1,float16,fp8,0,0.6206933259963989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,4,64,128,1,fp8,fp8,0,0.8334026336669922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,4,64,0,1,float16,fp8,0,0.6027520100275675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,8,64,0,1,float16,float16,0,0.6105493307113647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,4,64,0,1,fp8,fp8,0,0.5502506494522095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,8,64,128,1,float16,float16,0,0.6248693466186523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,8,64,128,1,float16,fp8,0,0.6286826531092325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,8,64,128,1,fp8,fp8,0,0.8453386624654134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,8,64,0,1,float16,fp8,0,0.6158453226089478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,40,64,128,1,float16,float16,0,0.3726773262023926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,8,64,0,1,fp8,fp8,0,0.5556426843007406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,40,64,0,1,float16,float16,0,0.36689066886901855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,40,64,128,1,float16,fp8,0,0.3663040002187093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,40,64,128,1,fp8,fp8,0,0.45530664920806885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,40,64,0,1,float16,fp8,0,0.36105600992838544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,40,64,0,1,fp8,fp8,0,0.31413867076237995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,1,64,128,1,float16,float16,0,0.317631999651591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,1,64,0,1,float16,float16,0,0.3090026577313741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,1,64,128,1,float16,fp8,0,0.31843199332555133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,1,64,128,1,fp8,fp8,0,0.4247893492380778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,1,64,0,1,float16,fp8,0,0.3107893268267314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,1,64,0,1,fp8,fp8,0,0.28313066562016803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,2,64,128,1,float16,float16,0,0.3190240065256755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,2,64,0,1,float16,float16,0,0.3134346604347229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,2,64,128,1,float16,fp8,0,0.3203466733296712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,2,64,128,1,fp8,fp8,0,0.4265066782633464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,2,64,0,1,float16,fp8,0,0.3121119936307271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,2,64,0,1,fp8,fp8,0,0.28359999259312946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,4,64,128,1,float16,float16,0,0.3222080071767171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,4,64,0,1,float16,float16,0,0.31256532669067383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,4,64,128,1,float16,fp8,0,0.3216320077578227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,4,64,128,1,fp8,fp8,0,0.43038400014241535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,4,64,0,1,float16,fp8,0,0.3150986631711324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,8,64,128,1,fp8,fp8,0,0.4298666715621948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,4,64,0,1,fp8,fp8,0,0.28738667567571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,8,64,128,1,float16,float16,0,0.32701865832010907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,8,64,0,1,float16,float16,0,0.3182719945907593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,8,64,128,1,float16,fp8,0,0.3270240028699239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,8,64,0,1,float16,fp8,0,0.3185279965400696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,8,64,0,1,fp8,fp8,0,0.2898826599121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,40,64,128,1,float16,float16,0,0.19882132609685263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,40,64,0,1,float16,float16,0,0.19604265689849854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,40,64,128,1,float16,fp8,0,0.19571733474731445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,40,64,128,1,fp8,fp8,0,0.2397813399632772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,40,64,0,1,float16,fp8,0,0.19287467002868652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,40,64,0,1,fp8,fp8,0,0.16941867272059122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,1,64,128,1,float16,float16,0,0.1681706706682841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,1,64,0,1,float16,float16,0,0.16378666957219443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,1,64,128,1,float16,fp8,0,0.1686613361040751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,1,64,128,1,fp8,fp8,0,0.22315732638041177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,1,64,0,1,float16,fp8,0,0.1637493371963501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,1,64,0,1,fp8,fp8,0,0.15481066703796387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,2,64,128,1,float16,float16,0,0.16901866594950357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,2,64,0,1,float16,float16,0,0.1645813286304474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,2,64,128,1,float16,fp8,0,0.16993065675099692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,2,64,128,1,fp8,fp8,0,0.22483734289805093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,2,64,0,1,float16,fp8,0,0.16595199704170227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,2,64,0,1,fp8,fp8,0,0.15457066893577576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,4,64,128,1,float16,float16,0,0.17069333791732788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,4,64,0,1,float16,float16,0,0.16685867309570312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,4,64,128,1,float16,fp8,0,0.17092265685399374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,4,64,0,1,float16,fp8,0,0.1674506664276123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,4,64,128,1,fp8,fp8,0,0.225765327612559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,4,64,0,1,fp8,fp8,0,0.15627200404802957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,8,64,128,1,float16,float16,0,0.17283199230829874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,8,64,0,1,float16,float16,0,0.1685333251953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,8,64,128,1,float16,fp8,0,0.17314666509628296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,8,64,128,1,fp8,fp8,0,0.22562134265899658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,40,64,128,1,float16,fp8,0,0.10711999734242757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,8,64,0,1,float16,fp8,0,0.16895999511082968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,8,64,0,1,fp8,fp8,0,0.15773866573969522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,40,64,128,1,float16,float16,0,0.10939199725786845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,40,64,0,1,float16,float16,0,0.11213333408037822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,40,64,128,1,fp8,fp8,0,0.13168533643086752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,40,64,0,1,float16,fp8,0,0.1104159951210022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,40,64,0,1,fp8,fp8,0,0.09628267089525859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,1,64,128,1,float16,float16,0,0.09177066882451375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,1,64,0,1,float16,float16,0,0.09578133622805278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,1,64,128,1,float16,fp8,0,0.09244267145792644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,1,64,128,1,fp8,fp8,0,0.12077333529790242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,1,64,0,1,float16,fp8,0,0.09531199932098389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,1,64,0,1,fp8,fp8,0,0.08641599615414937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,2,64,128,1,float16,float16,0,0.09275733431180318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,2,64,0,1,fp8,fp8,0,0.08772800366083781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,2,64,0,1,float16,float16,0,0.09585600097974141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,2,64,128,1,float16,fp8,0,0.09360532959302266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,2,64,128,1,fp8,fp8,0,0.12175466616948445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,2,64,0,1,float16,fp8,0,0.09578133622805278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,4,64,128,1,float16,float16,0,0.093231995900472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,4,64,0,1,float16,float16,0,0.09658132990201314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,4,64,128,1,float16,fp8,0,0.09423999985059102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,4,64,128,1,fp8,fp8,0,0.12222933769226074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,4,64,0,1,float16,fp8,0,0.09703999757766724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,4,64,0,1,fp8,fp8,0,0.08825600147247314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,8,64,128,1,float16,float16,0,0.09458133578300476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,40,64,0,1,float16,float16,0,0.06423466900984447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,8,64,0,1,float16,float16,0,0.09773332873980205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,8,64,128,1,float16,fp8,0,0.09488532940546672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,8,64,128,1,fp8,fp8,0,0.1241333285967509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,8,64,0,1,float16,fp8,0,0.09839466214179993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,40,64,0,1,fp8,fp8,0,0.05630399783452352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,8,64,0,1,fp8,fp8,0,0.08935466408729553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,40,64,128,1,float16,float16,0,0.0643093337615331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,40,64,128,1,float16,fp8,0,0.06352533400058746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,40,64,128,1,fp8,fp8,0,0.07492266595363617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,40,64,0,1,float16,fp8,0,0.06320000191529591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,1,64,128,1,float16,float16,0,0.057616000374158226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,1,64,0,1,float16,float16,0,0.05740800003210703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,1,64,128,1,float16,fp8,0,0.05755733450253805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,1,64,128,1,fp8,fp8,0,0.06651199857393901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,1,64,0,1,float16,fp8,0,0.05734399954477946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,1,64,0,1,fp8,fp8,0,0.04948799808820089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,2,64,0,1,fp8,fp8,0,0.049813335140546165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,2,64,128,1,float16,float16,0,0.05795733133951823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,2,64,0,1,float16,float16,0,0.057914664347966514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,2,64,128,1,float16,fp8,0,0.05751466751098633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,2,64,128,1,fp8,fp8,0,0.06762666503588359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,4,64,0,1,float16,fp8,0,0.05890133480230967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,2,64,0,1,float16,fp8,0,0.05704533557097117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,4,64,128,1,float16,float16,0,0.058287998040517174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,4,64,0,1,float16,float16,0,0.057904000083605446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,4,64,128,1,float16,fp8,0,0.05851200222969055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,8,64,128,1,fp8,fp8,0,0.06895466645558675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,4,64,128,1,fp8,fp8,0,0.06926933427651723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,4,64,0,1,fp8,fp8,0,0.051370665431022644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,40,64,128,1,float16,float16,0,0.04138666639725367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,8,64,128,1,float16,float16,0,0.059061333537101746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,8,64,0,1,float16,float16,0,0.05821333328882853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,8,64,128,1,float16,fp8,0,0.058506667613983154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,8,64,0,1,float16,fp8,0,0.058693334460258484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,40,64,0,1,fp8,fp8,0,0.0367253323396047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,8,64,0,1,fp8,fp8,0,0.05266666909058889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,40,64,0,1,float16,float16,0,0.039877332746982574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,40,64,128,1,float16,fp8,0,0.04119999955097834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,40,64,128,1,fp8,fp8,0,0.0472320020198822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,40,64,0,1,float16,fp8,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,1,64,128,1,float16,float16,0,0.03772799919048945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,1,64,0,1,float16,float16,0,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,1,64,128,1,float16,fp8,0,0.03819733361403147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,1,64,128,1,fp8,fp8,0,0.04568533102671305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,1,64,0,1,float16,fp8,0,0.037402667105197906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,1,64,0,1,fp8,fp8,0,0.03515200068553289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,2,64,128,1,float16,float16,0,0.03835200021664301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,2,64,0,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,2,64,128,1,float16,fp8,0,0.03828266759713491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,2,64,128,1,fp8,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,2,64,0,1,float16,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,2,64,0,1,fp8,fp8,0,0.035962666074434914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,4,64,128,1,float16,float16,0,0.038405333956082664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,4,64,0,1,float16,float16,0,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,4,64,128,1,float16,fp8,0,0.03862400104602178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,4,64,128,1,fp8,fp8,0,0.0459146648645401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,4,64,0,1,float16,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,4,64,0,1,fp8,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,8,64,128,1,float16,float16,0,0.03855466594298681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,8,64,0,1,float16,float16,0,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,8,64,128,1,float16,fp8,0,0.039279999832312264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,8,64,128,1,fp8,fp8,0,0.047168001532554626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,8,64,0,1,float16,fp8,0,0.038693333665529885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,8,64,0,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,40,64,128,1,float16,float16,0,0.02994133283694585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,40,64,0,1,float16,float16,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,40,64,128,1,float16,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,40,64,128,1,fp8,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,40,64,0,1,float16,fp8,0,0.029904000461101532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,40,64,0,1,fp8,fp8,0,0.028725333511829376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,1,64,128,1,float16,float16,0,0.028170667588710785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,1,64,0,1,float16,float16,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,1,64,128,1,float16,fp8,0,0.027829334139823914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,1,64,128,1,fp8,fp8,0,0.03387200087308884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,1,64,0,1,float16,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,1,64,0,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,2,64,128,1,float16,float16,0,0.027893332143624622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,2,64,0,1,float16,float16,0,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,2,64,128,1,float16,fp8,0,0.027615999182065327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,2,64,128,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,2,64,0,1,float16,fp8,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,2,64,0,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,4,64,128,1,float16,float16,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,4,64,0,1,float16,float16,0,0.02754666656255722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,4,64,128,1,float16,fp8,0,0.0283146674434344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,4,64,128,1,fp8,fp8,0,0.034741332133611046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,4,64,0,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,4,64,0,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,8,64,128,1,float16,float16,0,0.028010666370391846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,8,64,0,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,8,64,128,1,float16,fp8,0,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,8,64,128,1,fp8,fp8,0,0.03446933378775915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,8,64,0,1,float16,fp8,0,0.027829334139823914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,8,64,0,1,fp8,fp8,0,0.02794133375088374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,40,64,128,1,float16,float16,0,0.020213333268960316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,40,64,0,1,float16,float16,0,0.019717333217461903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,40,64,128,1,float16,fp8,0,0.020367999871571858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,40,64,128,1,fp8,fp8,0,0.024853333830833435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,40,64,0,1,float16,fp8,0,0.019989332805077236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,40,64,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,1,64,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,1,64,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,1,64,128,1,float16,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,1,64,128,1,fp8,fp8,0,0.024682665864626568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,1,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,1,64,0,1,fp8,fp8,0,0.02014933278163274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,2,64,128,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,2,64,0,1,fp8,fp8,0,0.020784000555674236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,2,64,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,2,64,128,1,float16,fp8,0,0.019727999965349834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,2,64,128,1,fp8,fp8,0,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,2,64,0,1,float16,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,4,64,128,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,4,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,4,64,128,1,float16,fp8,0,0.01982933282852173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,4,64,128,1,fp8,fp8,0,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,8,64,128,1,fp8,fp8,0,0.024336000283559162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,4,64,0,1,float16,fp8,0,0.019589333484570186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,4,64,0,1,fp8,fp8,0,0.02059200033545494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,8,64,128,1,float16,float16,0,0.01979200045267741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,8,64,0,1,float16,float16,0,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,8,64,128,1,float16,fp8,0,0.020069333414236706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,8,64,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,8,64,0,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,1,64,128,1,float16,float16,0,0.44261332352956134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,1,64,0,1,float16,float16,0,0.4459093411763509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,1,64,128,1,float16,fp8,0,0.4431680043538411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,1,64,128,1,fp8,fp8,0,0.537770668665568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,1,64,0,1,float16,fp8,0,0.44606932004292804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,1,64,0,1,fp8,fp8,0,0.5382826725641886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,2,64,128,1,float16,float16,0,0.44603200753529865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,2,64,128,1,float16,fp8,0,0.44577598571777344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,2,64,0,1,float16,float16,0,0.44788801670074463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,2,64,128,1,fp8,fp8,0,0.5396480162938436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,2,64,0,1,float16,fp8,0,0.4453973372777303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,2,64,0,1,fp8,fp8,0,0.5398186842600504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,4,64,128,1,float16,float16,0,0.4524746735890706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,4,64,0,1,float16,float16,0,0.45206932226816815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,4,64,128,1,float16,fp8,0,0.4496266841888428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,4,64,128,1,fp8,fp8,0,0.5427999893824259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,4,64,0,1,float16,fp8,0,0.45218666394551593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,4,64,0,1,fp8,fp8,0,0.54258131980896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,8,64,128,1,float16,fp8,0,0.46169598897298175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,8,64,128,1,float16,float16,0,0.4584266742070516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,8,64,0,1,float16,float16,0,0.4591199954350789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,8,64,0,1,float16,fp8,0,0.4594026803970337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,8,64,128,1,fp8,fp8,0,0.5541653235753378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,8,64,0,1,fp8,fp8,0,0.5528159936269125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,40,64,128,1,float16,float16,0,0.2860320011774699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,40,64,0,1,float16,float16,0,0.2863626678784688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,40,64,128,1,float16,fp8,0,0.27808000644048053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,40,64,128,1,fp8,fp8,0,0.3087199926376343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,40,64,0,1,float16,fp8,0,0.27738134066263836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,40,64,0,1,fp8,fp8,0,0.3110239903132121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,1,64,128,1,float16,float16,0,0.23041067520777384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,1,64,0,1,float16,float16,0,0.2299306591351827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,1,64,128,1,float16,fp8,0,0.23102933168411255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,2,64,0,1,float16,float16,0,0.231605331103007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,2,64,128,1,float16,fp8,0,0.23262399435043335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,1,64,128,1,fp8,fp8,0,0.28039999802907306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,1,64,0,1,float16,fp8,0,0.23163733879725137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,1,64,0,1,fp8,fp8,0,0.2796906630198161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,2,64,128,1,float16,float16,0,0.23170133431752524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,2,64,128,1,fp8,fp8,0,0.27988799413045246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,2,64,0,1,float16,fp8,0,0.23362666368484497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,2,64,0,1,fp8,fp8,0,0.2805546720822652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,4,64,128,1,float16,float16,0,0.23490132888158163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,4,64,0,1,float16,float16,0,0.23560533920923868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,4,64,128,1,float16,fp8,0,0.23492799202601114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,4,64,128,1,fp8,fp8,0,0.282970666885376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,4,64,0,1,float16,fp8,0,0.2355253299077352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,4,64,0,1,fp8,fp8,0,0.28732800483703613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,8,64,128,1,float16,float16,0,0.23884799083073935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,8,64,0,1,float16,float16,0,0.23876800139745077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,8,64,128,1,float16,fp8,0,0.23930132389068604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,8,64,128,1,fp8,fp8,0,0.2872053384780884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,8,64,0,1,float16,fp8,0,0.23799999554951987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,8,64,0,1,fp8,fp8,0,0.28591465950012207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,40,64,128,1,float16,float16,0,0.1518933375676473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,40,64,0,1,float16,float16,0,0.15333867073059082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,40,64,128,1,float16,fp8,0,0.14918933312098184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,40,64,128,1,fp8,fp8,0,0.16865599155426025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,40,64,0,1,float16,fp8,0,0.1495146652062734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,40,64,0,1,fp8,fp8,0,0.16645866632461548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,1,64,128,1,float16,float16,0,0.12317333618799846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,1,64,0,1,float16,float16,0,0.12364799777666728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,1,64,128,1,float16,fp8,0,0.12317867080370586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,1,64,128,1,fp8,fp8,0,0.15289066235224405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,1,64,0,1,float16,fp8,0,0.12386133273442586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,2,64,128,1,fp8,fp8,0,0.15373333295186362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,1,64,0,1,fp8,fp8,0,0.15314666430155435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,2,64,128,1,float16,float16,0,0.12346667051315308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,2,64,0,1,float16,float16,0,0.12456533312797546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,2,64,128,1,float16,fp8,0,0.1251306633154551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,2,64,0,1,float16,fp8,0,0.12462932864824931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,4,64,128,1,float16,float16,0,0.1263040006160736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,2,64,0,1,fp8,fp8,0,0.15365866820017496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,4,64,0,1,float16,float16,0,0.12616533041000366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,4,64,128,1,float16,fp8,0,0.12621333201726279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,4,64,128,1,fp8,fp8,0,0.1550986667474111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,4,64,0,1,float16,fp8,0,0.12569066882133484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,4,64,0,1,fp8,fp8,0,0.1551093359788259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,8,64,128,1,float16,float16,0,0.12841066718101501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,8,64,0,1,float16,float16,0,0.1279039978981018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,8,64,128,1,float16,fp8,0,0.1291306714216868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,8,64,128,1,fp8,fp8,0,0.15706132849057516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,40,64,128,1,fp8,fp8,0,0.09611733754475911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,8,64,0,1,float16,fp8,0,0.12782399853070578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,8,64,0,1,fp8,fp8,0,0.156442662080129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,40,64,128,1,float16,float16,0,0.08290133376916249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,40,64,0,1,float16,float16,0,0.08451199531555176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,40,64,128,1,float16,fp8,0,0.08230400085449219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,40,64,0,1,float16,fp8,0,0.08240533371766408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,1,64,0,1,float16,fp8,0,0.06994133194287618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,40,64,0,1,fp8,fp8,0,0.09617599844932556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,1,64,128,1,float16,float16,0,0.06889600058396657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,1,64,0,1,float16,float16,0,0.06878933310508728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,1,64,128,1,float16,fp8,0,0.06869866450627644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,1,64,128,1,fp8,fp8,0,0.08573333422342937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,1,64,0,1,fp8,fp8,0,0.08628267049789429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,2,64,128,1,float16,float16,0,0.06897599995136261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,2,64,0,1,float16,float16,0,0.06969066460927327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,2,64,128,1,float16,fp8,0,0.07003733515739441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,2,64,128,1,fp8,fp8,0,0.08661333719889323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,2,64,0,1,float16,fp8,0,0.06969066460927327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,4,64,0,1,float16,fp8,0,0.07085866729418437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,2,64,0,1,fp8,fp8,0,0.08687466382980347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,4,64,128,1,float16,float16,0,0.06944533189137776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,4,64,0,1,float16,float16,0,0.07001066704591115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,4,64,128,1,float16,fp8,0,0.07012266914049785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,4,64,128,1,fp8,fp8,0,0.08806932965914409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,4,64,0,1,fp8,fp8,0,0.08774399757385254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,8,64,128,1,float16,float16,0,0.07082666456699371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,8,64,0,1,float16,float16,0,0.07074666519959767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,8,64,128,1,float16,fp8,0,0.07192533214886983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,8,64,128,1,fp8,fp8,0,0.08730133374532063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,8,64,0,1,float16,fp8,0,0.07181866466999054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,8,64,0,1,fp8,fp8,0,0.08798399567604065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,40,64,128,1,float16,float16,0,0.04644800225893656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,40,64,0,1,float16,float16,0,0.047210668524106346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,40,64,128,1,float16,fp8,0,0.04565866788228353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,40,64,128,1,fp8,fp8,0,0.05562133093674978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,40,64,0,1,float16,fp8,0,0.045781334241231285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,40,64,0,1,fp8,fp8,0,0.05515199899673462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,1,64,128,1,float16,float16,0,0.04072533299525579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,1,64,0,1,float16,float16,0,0.040378667414188385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,1,64,128,1,float16,fp8,0,0.04015466570854187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,1,64,128,1,fp8,fp8,0,0.0495413343111674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,1,64,0,1,float16,fp8,0,0.04001600046952566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,1,64,0,1,fp8,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,2,64,128,1,float16,float16,0,0.040448000033696495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,2,64,0,1,float16,float16,0,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,2,64,128,1,float16,fp8,0,0.04028266668319702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,2,64,128,1,fp8,fp8,0,0.04994133114814758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,2,64,0,1,float16,fp8,0,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,4,64,0,1,float16,fp8,0,0.04114133367935816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,2,64,0,1,fp8,fp8,0,0.050479998191197716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,4,64,128,1,float16,float16,0,0.04078399886687597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,4,64,0,1,float16,float16,0,0.040762667854626976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,4,64,128,1,float16,fp8,0,0.04120533416668574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,4,64,128,1,fp8,fp8,0,0.05064000189304352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,4,64,0,1,fp8,fp8,0,0.05097599824269613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,8,64,128,1,float16,float16,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,8,64,0,1,float16,float16,0,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,8,64,128,1,float16,fp8,0,0.04205866654713949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,8,64,128,1,fp8,fp8,0,0.05212800204753876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,8,64,0,1,float16,fp8,0,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,8,64,0,1,fp8,fp8,0,0.051669334371884666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,40,64,128,1,float16,float16,0,0.02865600089232127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,40,64,0,1,fp8,fp8,0,0.03669333209594091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,40,64,0,1,float16,float16,0,0.028880000114440918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,40,64,128,1,float16,fp8,0,0.030165334542592365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,40,64,128,1,fp8,fp8,0,0.03677866607904434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,40,64,0,1,float16,fp8,0,0.03001066545645396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,1,64,128,1,float16,float16,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,1,64,0,1,float16,float16,0,0.026591998835404713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,1,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,1,64,128,1,fp8,fp8,0,0.03498133271932602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,1,64,0,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,1,64,0,1,fp8,fp8,0,0.03486400097608566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,2,64,128,1,float16,float16,0,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,2,64,0,1,float16,float16,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,2,64,128,1,float16,fp8,0,0.02646933247645696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,2,64,128,1,fp8,fp8,0,0.0351946676770846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,2,64,0,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,2,64,0,1,fp8,fp8,0,0.03513066718975703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,4,64,128,1,float16,float16,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,4,64,0,1,float16,float16,0,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,4,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,4,64,128,1,fp8,fp8,0,0.034789333740870156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,4,64,0,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,4,64,0,1,fp8,fp8,0,0.034671999514102936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,8,64,128,1,float16,float16,0,0.027989332874615986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,8,64,0,1,float16,float16,0,0.027701333165168762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,8,64,128,1,float16,fp8,0,0.027829334139823914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,8,64,128,1,fp8,fp8,0,0.03601066768169403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,8,64,0,1,float16,fp8,0,0.02757333219051361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,8,64,0,1,fp8,fp8,0,0.0358240008354187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,40,64,128,1,float16,float16,0,0.022805333137512207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,40,64,0,1,float16,float16,0,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,40,64,128,1,float16,fp8,0,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,40,64,128,1,fp8,fp8,0,0.0283146674434344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,40,64,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,40,64,0,1,fp8,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,1,64,128,1,float16,float16,0,0.021541332205136616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,1,64,0,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,1,64,128,1,float16,fp8,0,0.022053333620230358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,1,64,128,1,fp8,fp8,0,0.02685333291689555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,1,64,0,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,1,64,0,1,fp8,fp8,0,0.026613332331180573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,2,64,128,1,float16,float16,0,0.021727999051411945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,2,64,0,1,float16,float16,0,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,4,64,0,1,float16,float16,0,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,4,64,128,1,float16,fp8,0,0.02195200075705846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,2,64,128,1,float16,fp8,0,0.021712000171343487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,2,64,128,1,fp8,fp8,0,0.02629333237806956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,2,64,0,1,float16,fp8,0,0.02180800090233485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,2,64,0,1,fp8,fp8,0,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,4,64,128,1,float16,float16,0,0.021935999393463135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,4,64,128,1,fp8,fp8,0,0.0268053337931633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,4,64,0,1,float16,fp8,0,0.021802666286627453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,8,64,0,1,float16,fp8,0,0.022570667167504627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,4,64,0,1,fp8,fp8,0,0.02699200063943863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,8,64,128,1,float16,float16,0,0.022181332111358643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,8,64,0,1,float16,float16,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,8,64,128,1,float16,fp8,0,0.021754667162895203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,8,64,128,1,fp8,fp8,0,0.0276853342851003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,8,64,0,1,fp8,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,40,64,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,40,64,0,1,float16,float16,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,40,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,40,64,128,1,fp8,fp8,0,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,40,64,0,1,float16,fp8,0,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,40,64,0,1,fp8,fp8,0,0.02033599962790807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,1,64,128,1,float16,float16,0,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,1,64,0,1,float16,float16,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,1,64,128,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,1,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,1,64,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,1,64,0,1,fp8,fp8,0,0.020207999895016353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,2,64,128,1,float16,float16,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,2,64,0,1,float16,float16,0,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,2,64,128,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,2,64,128,1,fp8,fp8,0,0.01970133309563001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,2,64,0,1,float16,fp8,0,0.01661866654952367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,2,64,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,4,64,128,1,float16,float16,0,0.016303999970356624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,4,64,0,1,float16,float16,0,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,4,64,128,1,float16,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,4,64,128,1,fp8,fp8,0,0.020037333170572918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,4,64,0,1,float16,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,4,64,0,1,fp8,fp8,0,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,8,64,128,1,float16,float16,0,0.016399999459584553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,8,64,0,1,float16,float16,0,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,8,64,128,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,8,64,128,1,fp8,fp8,0,0.02075200031201045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,8,64,0,1,float16,fp8,0,0.01642666632930438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,8,64,0,1,fp8,fp8,0,0.020010666300853092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,40,64,128,1,float16,float16,0,0.014298666268587112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,40,64,0,1,float16,float16,0,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,40,64,128,1,float16,fp8,0,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,40,64,128,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,40,64,0,1,float16,fp8,0,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,40,64,0,1,fp8,fp8,0,0.019834666202465694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,1,64,128,1,float16,float16,0,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,1,64,0,1,float16,float16,0,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,1,64,128,1,float16,fp8,0,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,1,64,128,1,fp8,fp8,0,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,1,64,0,1,float16,fp8,0,0.014362666755914688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,1,64,0,1,fp8,fp8,0,0.01829333355029424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,2,64,128,1,float16,float16,0,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,2,64,0,1,float16,float16,0,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,2,64,128,1,float16,fp8,0,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,2,64,128,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,2,64,0,1,float16,fp8,0,0.014010666559139887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,4,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,2,64,0,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,4,64,128,1,float16,float16,0,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,4,64,0,1,float16,float16,0,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,4,64,128,1,float16,fp8,0,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,4,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,4,64,0,1,fp8,fp8,0,0.018672000616788864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,8,64,128,1,float16,float16,0,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,8,64,0,1,float16,float16,0,0.014015999933083853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,8,64,128,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,8,64,128,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,8,64,0,1,float16,fp8,0,0.013951999445756277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,1,64,0,1,float16,float16,0,0.21249600251515707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,1,64,128,1,float16,fp8,0,0.21255467335383096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,8,64,0,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,1,64,128,1,float16,float16,0,0.21271467208862305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,1,64,0,1,fp8,fp8,0,0.37865598996480304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,1,64,128,1,fp8,fp8,0,0.38225066661834717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,1,64,0,1,float16,fp8,0,0.2129653294881185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,2,64,128,1,float16,float16,0,0.21414399147033691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,2,64,0,1,float16,float16,0,0.21422400077184042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,2,64,128,1,float16,fp8,0,0.214303990205129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,2,64,0,1,float16,fp8,0,0.21331733465194702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,2,64,128,1,fp8,fp8,0,0.3775093158086141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,2,64,0,1,fp8,fp8,0,0.381877342859904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,4,64,128,1,float16,float16,0,0.21782932678858438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,4,64,0,1,float16,float16,0,0.21741332610448202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,4,64,128,1,float16,fp8,0,0.21739200750986734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,4,64,0,1,float16,fp8,0,0.21702933311462402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,4,64,128,1,fp8,fp8,0,0.3818879922231038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,4,64,0,1,fp8,fp8,0,0.3824853499730428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,8,64,128,1,float16,float16,0,0.22342934211095175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,8,64,128,1,float16,fp8,0,0.2222613294919332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,8,64,0,1,float16,float16,0,0.2218559980392456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,8,64,128,1,fp8,fp8,0,0.38315733273824054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,8,64,0,1,float16,fp8,0,0.22073066234588623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,8,64,0,1,fp8,fp8,0,0.38501866658528644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,40,64,128,1,float16,float16,0,0.14335466424624124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,40,64,0,1,float16,float16,0,0.14243200421333313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,40,64,128,1,float16,fp8,0,0.13876266280810037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,40,64,128,1,fp8,fp8,0,0.21663999557495117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,40,64,0,1,float16,fp8,0,0.13883733749389648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,40,64,0,1,fp8,fp8,0,0.21583465735117593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,1,64,128,1,float16,float16,0,0.11321066816647847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,1,64,0,1,float16,float16,0,0.11375466982523601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,1,64,128,1,float16,fp8,0,0.11406399806340535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,1,64,128,1,fp8,fp8,0,0.20119466384251913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,1,64,0,1,float16,fp8,0,0.11364266276359558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,1,64,0,1,fp8,fp8,0,0.20222399632136026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,2,64,128,1,float16,float16,0,0.11401599645614624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,2,64,0,1,float16,float16,0,0.11530133088429768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,2,64,128,1,float16,fp8,0,0.11441600322723389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,2,64,128,1,fp8,fp8,0,0.20191999276479086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,2,64,0,1,float16,fp8,0,0.115365336338679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,2,64,0,1,fp8,fp8,0,0.20152533054351807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,4,64,128,1,float16,float16,0,0.11635200182596843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,4,64,0,1,float16,float16,0,0.11639466881752014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,4,64,128,1,float16,fp8,0,0.11682132879892985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,4,64,128,1,fp8,fp8,0,0.20383467276891074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,4,64,0,1,float16,fp8,0,0.11658133069674174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,4,64,0,1,fp8,fp8,0,0.20398932695388794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,8,64,128,1,float16,float16,0,0.11979200442632039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,8,64,0,1,float16,float16,0,0.11834667126337688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,8,64,128,1,float16,fp8,0,0.11930666367212932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,8,64,128,1,fp8,fp8,0,0.20456532637278238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,8,64,0,1,float16,fp8,0,0.11877333124478658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,8,64,0,1,fp8,fp8,0,0.2039253314336141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,40,64,128,1,float16,float16,0,0.07814933359622955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,40,64,0,1,float16,float16,0,0.07793599863847096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,40,64,128,1,float16,fp8,0,0.07714133461316426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,1,64,128,1,float16,fp8,0,0.06325866778691609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,40,64,128,1,fp8,fp8,0,0.12085866928100586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,40,64,0,1,float16,fp8,0,0.07707733412583669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,40,64,0,1,fp8,fp8,0,0.1211946705977122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,1,64,128,1,float16,float16,0,0.06358933448791504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,1,64,0,1,float16,float16,0,0.06333866715431213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,1,64,128,1,fp8,fp8,0,0.11109866698582967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,1,64,0,1,float16,fp8,0,0.06369066735108693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,1,64,0,1,fp8,fp8,0,0.11055466532707214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,2,64,128,1,float16,float16,0,0.06303999821345012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,2,64,0,1,float16,float16,0,0.0640533318122228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,2,64,128,1,float16,fp8,0,0.06369066735108693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,2,64,128,1,fp8,fp8,0,0.11176000038782756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,2,64,0,1,float16,fp8,0,0.06451733410358429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,2,64,0,1,fp8,fp8,0,0.11143466830253601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,4,64,128,1,float16,float16,0,0.06491200129191081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,4,64,0,1,float16,float16,0,0.06390933195749919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,4,64,128,1,float16,fp8,0,0.06390400230884552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,4,64,128,1,fp8,fp8,0,0.11365333199501038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,4,64,0,1,float16,fp8,0,0.06507733464241028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,4,64,0,1,fp8,fp8,0,0.11236799756685893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,8,64,128,1,float16,float16,0,0.06560533245404561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,8,64,0,1,float16,float16,0,0.06493866443634033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,8,64,128,1,float16,fp8,0,0.06580266853173573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,8,64,128,1,fp8,fp8,0,0.11329600214958191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,8,64,0,1,float16,fp8,0,0.06496533254782359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,8,64,0,1,fp8,fp8,0,0.11406399806340535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,40,64,128,1,float16,float16,0,0.044405331214269005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,40,64,0,1,float16,float16,0,0.0444160004456838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,1,64,128,1,float16,float16,0,0.03791466603676478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,40,64,128,1,float16,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,40,64,128,1,fp8,fp8,0,0.06901866694291432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,40,64,0,1,float16,fp8,0,0.04249600072701772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,40,64,0,1,fp8,fp8,0,0.06817066669464111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,1,64,0,1,float16,float16,0,0.037530665596326195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,1,64,128,1,float16,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,2,64,0,1,float16,float16,0,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,1,64,128,1,fp8,fp8,0,0.06267199913660686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,1,64,0,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,1,64,0,1,fp8,fp8,0,0.06338133414586385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,2,64,128,1,float16,float16,0,0.038245332737763725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,2,64,128,1,float16,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,2,64,128,1,fp8,fp8,0,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,4,64,128,1,float16,fp8,0,0.03905600061019262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,2,64,0,1,float16,fp8,0,0.037632000943024956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,2,64,0,1,fp8,fp8,0,0.06233066817124685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,4,64,128,1,float16,float16,0,0.038549333810806274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,4,64,0,1,float16,float16,0,0.03894400099913279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,4,64,128,1,fp8,fp8,0,0.06478400031725566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,4,64,0,1,float16,fp8,0,0.03895466774702072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,4,64,0,1,fp8,fp8,0,0.0637600024541219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,8,64,0,1,float16,fp8,0,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,8,64,128,1,float16,float16,0,0.03874133278926214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,8,64,0,1,float16,float16,0,0.039018665750821434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,8,64,128,1,float16,fp8,0,0.038480001191298165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,8,64,128,1,fp8,fp8,0,0.06524799764156342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,8,64,0,1,fp8,fp8,0,0.06457599997520447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,40,64,128,1,float16,float16,0,0.026719999810059864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,40,64,0,1,float16,float16,0,0.026506667335828144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,40,64,128,1,float16,fp8,0,0.026911998788515728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,40,64,128,1,fp8,fp8,0,0.041349334021409355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,40,64,0,1,float16,fp8,0,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,40,64,0,1,fp8,fp8,0,0.04107200105985006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,1,64,128,1,float16,float16,0,0.02475200096766154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,1,64,0,1,float16,float16,0,0.024522667129834492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,1,64,128,1,float16,fp8,0,0.024661332368850708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,1,64,128,1,fp8,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,1,64,0,1,float16,fp8,0,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,2,64,0,1,float16,fp8,0,0.024698667228221893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,1,64,0,1,fp8,fp8,0,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,2,64,128,1,float16,float16,0,0.02476266771554947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,2,64,0,1,float16,float16,0,0.024346667031447094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,2,64,128,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,2,64,128,1,fp8,fp8,0,0.03957866628964742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,2,64,0,1,fp8,fp8,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,4,64,128,1,float16,float16,0,0.02475733309984207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,4,64,0,1,float16,float16,0,0.02447466552257538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,4,64,128,1,float16,fp8,0,0.024586667617162068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,4,64,128,1,fp8,fp8,0,0.040394666294256844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,4,64,0,1,float16,fp8,0,0.024432001014550526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,4,64,0,1,fp8,fp8,0,0.04035733391841253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,8,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,8,64,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,8,64,128,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,8,64,128,1,fp8,fp8,0,0.04037333279848099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,8,64,0,1,float16,fp8,0,0.02443733314673106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,8,64,0,1,fp8,fp8,0,0.04093866546948751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,40,64,128,1,float16,float16,0,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,40,64,0,1,float16,float16,0,0.02073066681623459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,40,64,128,1,float16,fp8,0,0.020373333245515823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,40,64,128,1,fp8,fp8,0,0.03048533449570338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,40,64,0,1,float16,fp8,0,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,40,64,0,1,fp8,fp8,0,0.030602666238943737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,1,64,128,1,float16,float16,0,0.018133333573738735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,1,64,0,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,1,64,128,1,float16,fp8,0,0.018031999468803406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,1,64,128,1,fp8,fp8,0,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,1,64,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,1,64,0,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,2,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,2,64,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,2,64,128,1,float16,fp8,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,2,64,128,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,2,64,0,1,float16,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,2,64,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,4,64,128,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,4,64,0,1,float16,float16,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,4,64,128,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,4,64,128,1,fp8,fp8,0,0.028864001234372456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,4,64,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,4,64,0,1,fp8,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,8,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,8,64,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,8,64,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,40,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,8,64,128,1,fp8,fp8,0,0.029818666477998097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,8,64,0,1,float16,fp8,0,0.01966399947802226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,8,64,0,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,40,64,128,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,40,64,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,40,64,128,1,fp8,fp8,0,0.024090667565663654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,40,64,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,40,64,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,1,64,0,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,1,64,128,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,1,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,1,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,1,64,128,1,fp8,fp8,0,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,1,64,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,2,64,128,1,float16,float16,0,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,2,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,2,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,2,64,128,1,fp8,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,2,64,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,2,64,0,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,4,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,4,64,0,1,float16,float16,0,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,4,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,4,64,128,1,fp8,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,4,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,4,64,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,8,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,8,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,8,64,128,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,8,64,128,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,8,64,0,1,float16,fp8,0,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,8,64,0,1,fp8,fp8,0,0.023578666150569916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,40,64,128,1,float16,float16,0,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,40,64,0,1,float16,float16,0,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,40,64,128,1,float16,fp8,0,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,40,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,40,64,0,1,float16,fp8,0,0.013690666606028875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,40,64,0,1,fp8,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,1,64,128,1,float16,float16,0,0.013274667163689932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,1,64,0,1,float16,float16,0,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,1,64,128,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,1,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,1,64,0,1,float16,fp8,0,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,1,64,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,2,64,128,1,float16,float16,0,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,2,64,0,1,float16,float16,0,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,2,64,128,1,float16,fp8,0,0.013744000345468521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,2,64,128,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,2,64,0,1,float16,fp8,0,0.01331199953953425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,2,64,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,4,64,128,1,float16,float16,0,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,4,64,0,1,float16,float16,0,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,4,64,128,1,float16,fp8,0,0.01340266689658165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,4,64,128,1,fp8,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,4,64,0,1,float16,fp8,0,0.013471999516089758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,4,64,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,8,64,128,1,float16,float16,0,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,8,64,0,1,float16,float16,0,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,8,64,128,1,float16,fp8,0,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,8,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,8,64,0,1,float16,fp8,0,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,8,64,0,1,fp8,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,40,64,128,1,float16,float16,0,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,40,64,0,1,float16,float16,0,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,40,64,128,1,float16,fp8,0,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,40,64,128,1,fp8,fp8,0,0.01836799954374631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,40,64,0,1,float16,fp8,0,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,40,64,0,1,fp8,fp8,0,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,1,64,128,1,float16,float16,0,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,1,64,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,1,64,128,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,1,64,128,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,1,64,0,1,float16,fp8,0,0.01246400053302447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,1,64,0,1,fp8,fp8,0,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,2,64,128,1,float16,float16,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,2,64,0,1,float16,float16,0,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,2,64,128,1,float16,fp8,0,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,2,64,128,1,fp8,fp8,0,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,2,64,0,1,float16,fp8,0,0.012346666306257248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,2,64,0,1,fp8,fp8,0,0.018378666291634243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,4,64,128,1,float16,float16,0,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,4,64,0,1,float16,float16,0,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,4,64,128,1,float16,fp8,0,0.012437333663304647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,4,64,128,1,fp8,fp8,0,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,4,64,0,1,float16,fp8,0,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,4,64,0,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,8,64,128,1,float16,float16,0,0.012543999900420507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,8,64,0,1,float16,float16,0,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,8,64,128,1,float16,fp8,0,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,8,64,128,1,fp8,fp8,0,0.018672000616788864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,8,64,0,1,float16,fp8,0,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,8,64,0,1,fp8,fp8,0,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,1,64,128,1,float16,float16,0,0.1290613313515981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,1,64,0,1,float16,float16,0,0.12852266430854797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,1,64,128,1,float16,fp8,0,0.12897066275278726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,1,64,0,1,float16,fp8,0,0.12813333670298258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,1,64,128,1,fp8,fp8,0,0.3200426697731018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,1,64,0,1,fp8,fp8,0,0.31798932949701947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,2,64,128,1,float16,float16,0,0.12828266620635986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,2,64,0,1,float16,float16,0,0.12889066338539124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,2,64,128,1,float16,fp8,0,0.12903466820716858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,2,64,128,1,fp8,fp8,0,0.3192266623179118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,2,64,0,1,float16,fp8,0,0.12916266918182373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,2,64,0,1,fp8,fp8,0,0.31700799862543744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,4,64,128,1,float16,float16,0,0.13142933448155722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,4,64,0,1,float16,float16,0,0.13147200147310892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,4,64,128,1,float16,fp8,0,0.1301813324292501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,4,64,128,1,fp8,fp8,0,0.31938666105270386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,4,64,0,1,float16,fp8,0,0.13127467036247253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,4,64,0,1,fp8,fp8,0,0.3207520047823588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,8,64,128,1,float16,float16,0,0.13235732913017273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,8,64,0,1,float16,float16,0,0.13301333785057068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,8,64,128,1,float16,fp8,0,0.1318933367729187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,8,64,128,1,fp8,fp8,0,0.32050132751464844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,8,64,0,1,float16,fp8,0,0.13300800323486328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,40,64,128,1,float16,float16,0,0.0825493335723877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,8,64,0,1,fp8,fp8,0,0.321397324403127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,40,64,0,1,float16,float16,0,0.08317333459854126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,40,64,128,1,float16,fp8,0,0.08076266447703044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,40,64,0,1,float16,fp8,0,0.0801333338022232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,40,64,128,1,fp8,fp8,0,0.18127999703089395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,40,64,0,1,fp8,fp8,0,0.17945067087809244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,1,64,128,1,float16,float16,0,0.07014933228492737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,1,64,0,1,float16,float16,0,0.06959466636180878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,1,64,128,1,float16,fp8,0,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,1,64,128,1,fp8,fp8,0,0.17018665870030722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,1,64,0,1,float16,fp8,0,0.06942399839560191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,2,64,128,1,float16,fp8,0,0.07054399947325389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,1,64,0,1,fp8,fp8,0,0.16897066434224448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,2,64,128,1,float16,float16,0,0.07041066884994507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,2,64,0,1,float16,float16,0,0.07065600156784058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,2,64,128,1,fp8,fp8,0,0.17122133572896323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,2,64,0,1,float16,fp8,0,0.07070933282375336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,2,64,0,1,fp8,fp8,0,0.16946667432785034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,4,64,128,1,float16,float16,0,0.07062399884064992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,4,64,0,1,float16,float16,0,0.07055999835332234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,4,64,128,1,float16,fp8,0,0.07144000132878621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,4,64,128,1,fp8,fp8,0,0.1721973419189453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,4,64,0,1,float16,fp8,0,0.07151466608047485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,4,64,0,1,fp8,fp8,0,0.1721120079358419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,8,64,128,1,float16,float16,0,0.07162133355935414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,8,64,0,1,float16,float16,0,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,8,64,128,1,float16,fp8,0,0.07247466842333476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,8,64,128,1,fp8,fp8,0,0.17121066649754843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,8,64,0,1,float16,fp8,0,0.07197866837183635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,8,64,0,1,fp8,fp8,0,0.17250667015711466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,40,64,128,1,float16,float16,0,0.045040001471837364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,40,64,0,1,float16,float16,0,0.044821331898371376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,40,64,128,1,float16,fp8,0,0.04222933451334635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,40,64,128,1,fp8,fp8,0,0.09896000226338704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,40,64,0,1,float16,fp8,0,0.0428959975639979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,40,64,0,1,fp8,fp8,0,0.09802132844924927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,1,64,128,1,float16,float16,0,0.039146666725476585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,1,64,0,1,float16,float16,0,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,1,64,128,1,float16,fp8,0,0.039306665460268654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,1,64,128,1,fp8,fp8,0,0.09317333499590556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,1,64,0,1,float16,fp8,0,0.03874133278926214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,2,64,128,1,fp8,fp8,0,0.093941330909729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,1,64,0,1,fp8,fp8,0,0.09363733728726704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,2,64,128,1,float16,float16,0,0.03953066716591517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,2,64,0,1,float16,float16,0,0.03937600056330363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,4,64,128,1,float16,fp8,0,0.040394666294256844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,2,64,128,1,float16,fp8,0,0.03977066775163015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,2,64,0,1,float16,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,2,64,0,1,fp8,fp8,0,0.09387733538945515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,4,64,128,1,float16,float16,0,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,4,64,0,1,float16,float16,0,0.04113066693147024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,4,64,128,1,fp8,fp8,0,0.09454933802286784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,4,64,0,1,float16,fp8,0,0.04064533362785975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,4,64,0,1,fp8,fp8,0,0.09463999668757121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,8,64,128,1,float16,float16,0,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,8,64,0,1,float16,float16,0,0.04091733445723852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,8,64,128,1,float16,fp8,0,0.04121066629886627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,8,64,128,1,fp8,fp8,0,0.09397332866986592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,8,64,0,1,float16,fp8,0,0.04134399940570196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,8,64,0,1,fp8,fp8,0,0.094842662413915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,40,64,128,1,float16,float16,0,0.027834666272004444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,1,64,128,1,float16,float16,0,0.026858667532602947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,40,64,0,1,float16,float16,0,0.02755733331044515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,40,64,128,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,40,64,128,1,fp8,fp8,0,0.05746666590372721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,40,64,0,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,40,64,0,1,fp8,fp8,0,0.05628266433874766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,1,64,0,1,float16,float16,0,0.026661333938439686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,1,64,128,1,float16,fp8,0,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,2,64,128,1,float16,fp8,0,0.026101333399613697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,1,64,128,1,fp8,fp8,0,0.05666666726271311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,1,64,0,1,float16,fp8,0,0.026661333938439686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,2,64,0,1,fp8,fp8,0,0.054885332783063255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,1,64,0,1,fp8,fp8,0,0.055904000997543335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,2,64,128,1,float16,float16,0,0.026842666169007618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,2,64,0,1,float16,float16,0,0.026591998835404713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,2,64,128,1,fp8,fp8,0,0.055957332253456116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,2,64,0,1,float16,fp8,0,0.027002667387326557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,4,64,0,1,fp8,fp8,0,0.05643199880917867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,4,64,128,1,float16,float16,0,0.026778665681680042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,4,64,0,1,float16,float16,0,0.026543999711672466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,4,64,128,1,float16,fp8,0,0.026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,4,64,128,1,fp8,fp8,0,0.055018668373425804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,4,64,0,1,float16,fp8,0,0.02701333413521449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,8,64,128,1,float16,float16,0,0.027002667387326557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,8,64,0,1,float16,float16,0,0.026949333647886913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,8,64,128,1,float16,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,8,64,128,1,fp8,fp8,0,0.05667200187842051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,8,64,0,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,8,64,0,1,fp8,fp8,0,0.056090667843818665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,40,64,128,1,float16,float16,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,40,64,0,1,float16,float16,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,40,64,128,1,float16,fp8,0,0.01987733319401741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,40,64,128,1,fp8,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,40,64,0,1,float16,fp8,0,0.01985599969824155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,40,64,0,1,fp8,fp8,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,1,64,128,1,float16,float16,0,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,1,64,0,1,float16,float16,0,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,1,64,128,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,1,64,128,1,fp8,fp8,0,0.03581333408753077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,2,64,128,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,1,64,0,1,float16,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,1,64,0,1,fp8,fp8,0,0.03596800069014231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,2,64,128,1,float16,float16,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,2,64,0,1,float16,float16,0,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,2,64,128,1,fp8,fp8,0,0.03587199995915095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,2,64,0,1,float16,fp8,0,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,2,64,0,1,fp8,fp8,0,0.036090667049090065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,4,64,128,1,float16,float16,0,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,4,64,0,1,float16,float16,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,4,64,128,1,float16,fp8,0,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,8,64,0,1,float16,float16,0,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,4,64,128,1,fp8,fp8,0,0.03605333218971888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,4,64,0,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,8,64,0,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,4,64,0,1,fp8,fp8,0,0.036415999134381614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,8,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,8,64,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,8,64,128,1,fp8,fp8,0,0.03658666710058848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,8,64,0,1,fp8,fp8,0,0.03654933224121729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,40,64,128,1,float16,float16,0,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,40,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,40,64,128,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,1,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,40,64,128,1,fp8,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,40,64,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,40,64,0,1,fp8,fp8,0,0.02792533238728841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,1,64,128,1,float16,float16,0,0.014202666779359182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,1,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,1,64,128,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,1,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,1,64,0,1,fp8,fp8,0,0.0268053337931633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,2,64,128,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,2,64,0,1,fp8,fp8,0,0.026485333840052288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,2,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,2,64,128,1,float16,fp8,0,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,2,64,128,1,fp8,fp8,0,0.026954665780067444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,4,64,128,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,2,64,0,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,4,64,128,1,float16,float16,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,4,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,4,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,4,64,0,1,float16,fp8,0,0.01444799949725469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,4,64,0,1,fp8,fp8,0,0.026842666169007618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,8,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,8,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,8,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,8,64,128,1,fp8,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,8,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,8,64,0,1,fp8,fp8,0,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,40,64,128,1,float16,float16,0,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,40,64,0,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,40,64,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,40,64,128,1,float16,fp8,0,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,40,64,128,1,fp8,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,40,64,0,1,float16,fp8,0,0.014309333016475042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,1,64,128,1,float16,float16,0,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,1,64,0,1,float16,float16,0,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,2,64,0,1,float16,float16,0,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,1,64,128,1,float16,fp8,0,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,1,64,128,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,1,64,0,1,float16,fp8,0,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,1,64,0,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,2,64,128,1,float16,float16,0,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,2,64,128,1,float16,fp8,0,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,2,64,128,1,fp8,fp8,0,0.022687998910744984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,2,64,0,1,float16,fp8,0,0.01403733342885971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,2,64,0,1,fp8,fp8,0,0.023813332120577495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,4,64,128,1,float16,float16,0,0.014053333550691605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,4,64,0,1,float16,float16,0,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,4,64,128,1,float16,fp8,0,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,4,64,128,1,fp8,fp8,0,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,4,64,0,1,float16,fp8,0,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,4,64,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,8,64,128,1,float16,float16,0,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,8,64,0,1,float16,float16,0,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,8,64,128,1,float16,fp8,0,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,8,64,128,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,8,64,0,1,float16,fp8,0,0.014058666924635569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,8,64,0,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,40,64,128,1,float16,float16,0,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,40,64,0,1,float16,float16,0,0.011690666278203329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,40,64,128,1,float16,fp8,0,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,40,64,128,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,40,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,40,64,0,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,1,64,128,1,float16,float16,0,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,1,64,0,1,float16,float16,0,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,1,64,128,1,float16,fp8,0,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,1,64,128,1,fp8,fp8,0,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,1,64,0,1,float16,fp8,0,0.013562666873137156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,1,64,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,2,64,128,1,float16,float16,0,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,2,64,0,1,float16,float16,0,0.0122079998254776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,2,64,128,1,float16,fp8,0,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,2,64,128,1,fp8,fp8,0,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,2,64,0,1,float16,fp8,0,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,2,64,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,4,64,128,1,float16,float16,0,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,4,64,0,1,float16,float16,0,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,4,64,128,1,float16,fp8,0,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,4,64,128,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,4,64,0,1,float16,fp8,0,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,4,64,0,1,fp8,fp8,0,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,8,64,128,1,float16,float16,0,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,8,64,0,1,float16,float16,0,0.01392000044385592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,8,64,128,1,float16,fp8,0,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,8,64,128,1,fp8,fp8,0,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,8,64,0,1,float16,fp8,0,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,8,64,0,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,40,64,128,1,float16,float16,0,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,40,64,0,1,float16,float16,0,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,40,64,128,1,float16,fp8,0,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,1,64,0,1,float16,float16,0,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,40,64,128,1,fp8,fp8,0,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,40,64,0,1,float16,fp8,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,40,64,0,1,fp8,fp8,0,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,1,64,128,1,float16,float16,0,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,1,64,128,1,float16,fp8,0,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,1,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,1,64,0,1,float16,fp8,0,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,1,64,0,1,fp8,fp8,0,0.018042666216691334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,2,64,128,1,float16,float16,0,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,2,64,0,1,fp8,fp8,0,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,2,64,0,1,float16,float16,0,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,2,64,128,1,float16,fp8,0,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,2,64,128,1,fp8,fp8,0,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,2,64,0,1,float16,fp8,0,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,4,64,128,1,float16,float16,0,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,4,64,0,1,float16,float16,0,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,4,64,128,1,float16,fp8,0,0.014058666924635569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,4,64,128,1,fp8,fp8,0,0.01821333294113477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,4,64,0,1,float16,fp8,0,0.012618667135636011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,4,64,0,1,fp8,fp8,0,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,8,64,128,1,float16,float16,0,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,8,64,0,1,float16,float16,0,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,8,64,128,1,float16,fp8,0,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,8,64,128,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,8,64,0,1,float16,fp8,0,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,8,64,0,1,fp8,fp8,0,0.01846933364868164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,1,64,128,1,float16,float16,0,0.09470933675765991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,1,64,0,1,float16,float16,0,0.09360000491142273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,1,64,128,1,float16,fp8,0,0.09450133641560872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,1,64,128,1,fp8,fp8,0,0.2866026759147644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,1,64,0,1,float16,fp8,0,0.09454933802286784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,1,64,0,1,fp8,fp8,0,0.2874079942703247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,2,64,128,1,float16,float16,0,0.09497066338857015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,2,64,0,1,float16,float16,0,0.09454933802286784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,2,64,128,1,float16,fp8,0,0.09490133325258891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,2,64,128,1,fp8,fp8,0,0.2863146662712097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,2,64,0,1,float16,fp8,0,0.0950933297475179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,4,64,128,1,float16,float16,0,0.09546666344006856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,2,64,0,1,fp8,fp8,0,0.2875093420346578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,4,64,0,1,float16,float16,0,0.09582933783531189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,4,64,128,1,float16,fp8,0,0.09607999523480733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,4,64,128,1,fp8,fp8,0,0.2879573305447896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,4,64,0,1,float16,fp8,0,0.09611733754475911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,4,64,0,1,fp8,fp8,0,0.2871786753336589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,8,64,128,1,float16,float16,0,0.0969493289788564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,8,64,0,1,float16,float16,0,0.09703999757766724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,8,64,128,1,float16,fp8,0,0.09694400429725647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,8,64,128,1,fp8,fp8,0,0.2903040051460266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,8,64,0,1,float16,fp8,0,0.09689066807428996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,40,64,128,1,float16,float16,0,0.054757331808408104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,40,64,0,1,float16,float16,0,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,8,64,0,1,fp8,fp8,0,0.28856533765792847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,40,64,128,1,float16,fp8,0,0.053247998158137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,1,64,128,1,float16,fp8,0,0.050848002235094704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,40,64,128,1,fp8,fp8,0,0.15963199734687805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,40,64,0,1,float16,fp8,0,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,40,64,0,1,fp8,fp8,0,0.15843733151753744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,1,64,128,1,float16,float16,0,0.05160533388455709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,1,64,0,1,float16,float16,0,0.05142400165398916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,1,64,128,1,fp8,fp8,0,0.15083733201026917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,1,64,0,1,float16,fp8,0,0.051167999704678856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,1,64,0,1,fp8,fp8,0,0.15100266536076865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,2,64,128,1,float16,float16,0,0.05220800141493479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,2,64,0,1,float16,float16,0,0.052373334765434265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,4,64,0,1,float16,float16,0,0.05239466826121012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,2,64,128,1,float16,fp8,0,0.05148266752560934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,2,64,128,1,fp8,fp8,0,0.150709331035614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,2,64,0,1,float16,fp8,0,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,2,64,0,1,fp8,fp8,0,0.1516480048497518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,4,64,128,1,float16,float16,0,0.05298133194446564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,4,64,128,1,float16,fp8,0,0.053226664662361145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,4,64,0,1,float16,fp8,0,0.05226666728655497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,4,64,128,1,fp8,fp8,0,0.15292800466219583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,4,64,0,1,fp8,fp8,0,0.1536799967288971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,8,64,128,1,float16,float16,0,0.05328000088532766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,8,64,0,1,float16,float16,0,0.053269331653912864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,8,64,128,1,float16,fp8,0,0.05328533550103506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,8,64,128,1,fp8,fp8,0,0.15475733081499735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,8,64,0,1,float16,fp8,0,0.05365866422653198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,8,64,0,1,fp8,fp8,0,0.15235732992490134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,40,64,128,1,float16,float16,0,0.033071999748547874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,40,64,0,1,float16,float16,0,0.03265066693226496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,40,64,128,1,float16,fp8,0,0.032618666688601174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,40,64,128,1,fp8,fp8,0,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,40,64,0,1,float16,fp8,0,0.03288000077009201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,40,64,0,1,fp8,fp8,0,0.08726400136947632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,1,64,128,1,float16,float16,0,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,1,64,0,1,float16,float16,0,0.03253333270549774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,1,64,128,1,float16,fp8,0,0.03249600032965342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,1,64,128,1,fp8,fp8,0,0.08689066767692566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,1,64,0,1,float16,fp8,0,0.032245332996050514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,2,64,0,1,float16,fp8,0,0.032442666590213776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,1,64,0,1,fp8,fp8,0,0.08610666791598003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,2,64,128,1,float16,float16,0,0.03218133250872294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,4,64,128,1,float16,float16,0,0.03239466746648153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,2,64,0,1,float16,float16,0,0.03190399954716364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,2,64,128,1,float16,fp8,0,0.03259733319282532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,2,64,128,1,fp8,fp8,0,0.08545066912968953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,2,64,0,1,fp8,fp8,0,0.08586666981379192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,4,64,0,1,float16,float16,0,0.03299200038115183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,4,64,128,1,float16,fp8,0,0.03266666581233343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,4,64,128,1,fp8,fp8,0,0.08548266688982646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,4,64,0,1,float16,fp8,0,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,4,64,0,1,fp8,fp8,0,0.0867146650950114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,8,64,128,1,float16,float16,0,0.033088001112143196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,8,64,0,1,float16,float16,0,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,8,64,128,1,float16,fp8,0,0.03314133236805598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,8,64,128,1,fp8,fp8,0,0.08659733335177104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,8,64,0,1,float16,fp8,0,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,8,64,0,1,fp8,fp8,0,0.08559999863306682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,40,64,128,1,float16,float16,0,0.022437334060668945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,40,64,0,1,float16,float16,0,0.02239466706911723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,40,64,128,1,float16,fp8,0,0.022634667654832203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,40,64,128,1,fp8,fp8,0,0.05307200054327647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,40,64,0,1,float16,fp8,0,0.022458667556444805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,40,64,0,1,fp8,fp8,0,0.054416000843048096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,1,64,128,1,float16,float16,0,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,1,64,0,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,1,64,128,1,float16,fp8,0,0.02203733225663503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,2,64,128,1,float16,fp8,0,0.02201066662867864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,2,64,128,1,fp8,fp8,0,0.05110933383305868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,1,64,128,1,fp8,fp8,0,0.052069331208864846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,1,64,0,1,float16,fp8,0,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,1,64,0,1,fp8,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,2,64,128,1,float16,float16,0,0.02221333235502243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,2,64,0,1,float16,float16,0,0.021925332645575207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,2,64,0,1,float16,fp8,0,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,2,64,0,1,fp8,fp8,0,0.05202666421731313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,4,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,4,64,0,1,float16,float16,0,0.02181866765022278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,4,64,128,1,float16,fp8,0,0.022175999979178112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,4,64,128,1,fp8,fp8,0,0.05204799771308899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,4,64,0,1,float16,fp8,0,0.022319999833901722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,4,64,0,1,fp8,fp8,0,0.05221866567929586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,8,64,128,1,float16,float16,0,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,8,64,0,1,float16,float16,0,0.022015998760859173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,8,64,128,1,float16,fp8,0,0.021829334398110706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,8,64,128,1,fp8,fp8,0,0.051498666405677795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,8,64,0,1,float16,fp8,0,0.022122666239738464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,8,64,0,1,fp8,fp8,0,0.052602668603261314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,40,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,40,64,0,1,float16,float16,0,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,40,64,128,1,float16,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,1,64,128,1,float16,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,40,64,128,1,fp8,fp8,0,0.033728001018365227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,40,64,0,1,float16,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,40,64,0,1,fp8,fp8,0,0.03408000121514002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,1,64,128,1,float16,float16,0,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,1,64,0,1,float16,float16,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,1,64,128,1,fp8,fp8,0,0.03306666761636734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,1,64,0,1,float16,fp8,0,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,1,64,0,1,fp8,fp8,0,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,2,64,128,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,2,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,2,64,128,1,float16,fp8,0,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,2,64,128,1,fp8,fp8,0,0.03299200038115183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,2,64,0,1,float16,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,2,64,0,1,fp8,fp8,0,0.033039999504884086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,4,64,128,1,float16,float16,0,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,4,64,0,1,float16,float16,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,4,64,128,1,float16,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,4,64,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,4,64,0,1,float16,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,4,64,0,1,fp8,fp8,0,0.0330826664964358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,8,64,128,1,float16,float16,0,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,8,64,0,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,8,64,128,1,float16,fp8,0,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,8,64,128,1,fp8,fp8,0,0.03382933388153712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,8,64,0,1,float16,fp8,0,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,8,64,0,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,40,64,128,1,float16,float16,0,0.01357866699496905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,40,64,0,1,float16,float16,0,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,40,64,128,1,float16,fp8,0,0.01413333291808764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,40,64,128,1,fp8,fp8,0,0.0268053337931633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,40,64,0,1,float16,fp8,0,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,40,64,0,1,fp8,fp8,0,0.026906666656335194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,1,64,128,1,float16,float16,0,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,1,64,0,1,float16,float16,0,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,1,64,128,1,float16,fp8,0,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,1,64,128,1,fp8,fp8,0,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,1,64,0,1,float16,fp8,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,1,64,0,1,fp8,fp8,0,0.02659733345111211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,2,64,128,1,float16,float16,0,0.013749333719412485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,2,64,0,1,float16,float16,0,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,2,64,128,1,float16,fp8,0,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,2,64,128,1,fp8,fp8,0,0.026543999711672466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,2,64,0,1,float16,fp8,0,0.01413333291808764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,2,64,0,1,fp8,fp8,0,0.02643733223279317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,4,64,128,1,float16,float16,0,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,4,64,0,1,float16,float16,0,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,4,64,128,1,float16,fp8,0,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,4,64,128,1,fp8,fp8,0,0.026330667237440746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,4,64,0,1,float16,fp8,0,0.014346666634082794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,4,64,0,1,fp8,fp8,0,0.02649066597223282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,8,64,128,1,float16,float16,0,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,8,64,0,1,float16,float16,0,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,8,64,128,1,float16,fp8,0,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,8,64,128,1,fp8,fp8,0,0.026522666215896606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,8,64,0,1,float16,fp8,0,0.013744000345468521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,8,64,0,1,fp8,fp8,0,0.026672000686327618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,40,64,128,1,float16,float16,0,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,40,64,0,1,float16,float16,0,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,40,64,128,1,float16,fp8,0,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,40,64,128,1,fp8,fp8,0,0.02222399910291036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,40,64,0,1,float16,fp8,0,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,40,64,0,1,fp8,fp8,0,0.021903999149799347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,1,64,0,1,float16,fp8,0,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,1,64,128,1,float16,float16,0,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,1,64,0,1,float16,float16,0,0.012437333663304647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,1,64,128,1,float16,fp8,0,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,2,64,128,1,fp8,fp8,0,0.022448000808556873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,1,64,128,1,fp8,fp8,0,0.021984001000722248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,1,64,0,1,fp8,fp8,0,0.022384000321229298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,2,64,128,1,float16,float16,0,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,2,64,0,1,float16,float16,0,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,2,64,128,1,float16,fp8,0,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,2,64,0,1,float16,fp8,0,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,2,64,0,1,fp8,fp8,0,0.0227360005180041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,4,64,128,1,float16,float16,0,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,4,64,0,1,float16,float16,0,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,4,64,128,1,float16,fp8,0,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,4,64,128,1,fp8,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,4,64,0,1,float16,fp8,0,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,4,64,0,1,fp8,fp8,0,0.02203733225663503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,8,64,128,1,float16,float16,0,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,8,64,0,1,float16,float16,0,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,8,64,128,1,float16,fp8,0,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,8,64,128,1,fp8,fp8,0,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,8,64,0,1,float16,fp8,0,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,8,64,0,1,fp8,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,40,64,128,1,float16,float16,0,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,40,64,0,1,float16,float16,0,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,40,64,128,1,float16,fp8,0,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,40,64,128,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,40,64,0,1,float16,fp8,0,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,40,64,0,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,1,64,0,1,fp8,fp8,0,0.018672000616788864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,1,64,128,1,float16,float16,0,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,1,64,0,1,float16,float16,0,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,1,64,128,1,float16,fp8,0,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,1,64,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,1,64,0,1,float16,fp8,0,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,2,64,128,1,float16,float16,0,0.011663999408483505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,2,64,0,1,float16,float16,0,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,2,64,128,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,2,64,128,1,fp8,fp8,0,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,2,64,0,1,float16,fp8,0,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,2,64,0,1,fp8,fp8,0,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,4,64,128,1,float16,float16,0,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,4,64,0,1,float16,float16,0,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,4,64,128,1,float16,fp8,0,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,4,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,4,64,0,1,float16,fp8,0,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,4,64,0,1,fp8,fp8,0,0.019808000574509304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,8,64,128,1,float16,float16,0,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,8,64,0,1,float16,float16,0,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,40,64,0,1,float16,float16,0,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,8,64,128,1,float16,fp8,0,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,8,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,8,64,0,1,float16,fp8,0,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,8,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,40,64,128,1,float16,float16,0,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,40,64,128,1,float16,fp8,0,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,40,64,128,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,40,64,0,1,float16,fp8,0,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,40,64,0,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,1,64,128,1,float16,float16,0,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,1,64,0,1,float16,float16,0,0.011957333733638128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,1,64,128,1,float16,fp8,0,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,1,64,128,1,fp8,fp8,0,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,1,64,0,1,float16,fp8,0,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,1,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,2,64,128,1,float16,float16,0,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,2,64,0,1,float16,float16,0,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,2,64,128,1,float16,fp8,0,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,2,64,128,1,fp8,fp8,0,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,2,64,0,1,float16,fp8,0,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,2,64,0,1,fp8,fp8,0,0.018325333793958027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,4,64,128,1,float16,float16,0,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,4,64,0,1,float16,float16,0,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,4,64,128,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,4,64,128,1,fp8,fp8,0,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,8,64,128,1,float16,fp8,0,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,4,64,0,1,float16,fp8,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,4,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,8,64,128,1,float16,float16,0,0.011957333733638128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,8,64,0,1,float16,float16,0,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,8,64,128,1,fp8,fp8,0,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,8,64,0,1,float16,fp8,0,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,8,64,0,1,fp8,fp8,0,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,float16,0,2.81987730662028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,fp8,0,2.775845209757487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,64,128,1,fp8,fp8,0,3.6550881067911782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,float16,0,2.8557812372843423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,fp8,0,2.812037467956543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,64,128,1,fp8,fp8,0,3.6973867416381836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,float16,0,2.863722801208496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,fp8,0,2.8322505950927734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,64,0,1,fp8,fp8,0,17.186368306477863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,float16,0,18.900848388671875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,fp8,0,19.30568567911784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,64,128,1,fp8,fp8,0,3.7218240102132163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,float16,0,19.105973561604817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,float16,0,2.9165919621785483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,fp8,0,19.213179270426433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,64,0,1,fp8,fp8,0,17.260133107503254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,fp8,0,2.9061333338419595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,64,128,1,fp8,fp8,0,3.7945334116617837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,float16,0,19.295088450113933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,float16,0,1.602176030476888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,fp8,0,1.58951997756958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,64,128,1,fp8,fp8,0,2.1063626607259116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,64,0,1,fp8,fp8,0,17.2738774617513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,fp8,0,19.309632619222004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,float16,0,9.800933202107748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,float16,0,1.4338720639546711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,float16,0,19.182708740234375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,fp8,0,1.4157546361287434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,64,0,1,fp8,fp8,0,8.983029047648111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,fp8,0,9.87118403116862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,64,128,1,fp8,fp8,0,1.8793333371480305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,64,0,1,fp8,fp8,0,17.324724833170574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,float16,0,1.4598666826883953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,fp8,0,19.477279663085938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,fp8,0,1.4303466478983562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,64,128,1,fp8,fp8,0,1.9068640073140461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,float16,0,1.459232012430827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,float16,0,9.6189333597819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,fp8,0,1.4438613255818684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,fp8,0,9.574581146240234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,64,128,1,fp8,fp8,0,1.9302453994750977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,64,0,1,fp8,fp8,0,8.737594604492188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,float16,0,1.482741355895996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,float16,0,9.70798428853353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,64,0,1,fp8,fp8,0,8.77177619934082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,fp8,0,9.664437611897787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,fp8,0,1.4687520662943523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,float16,0,9.707743962605795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,64,128,1,fp8,fp8,0,1.9570506413777669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,float16,0,0.8341120084126791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,fp8,0,0.8490986824035645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,64,128,1,fp8,fp8,0,1.1043893496195476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,64,0,1,fp8,fp8,0,8.792240142822266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,fp8,0,9.525744120279947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,float16,0,0.7857279777526855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,float16,0,9.654666900634766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,float16,0,4.972554524739583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,fp8,0,0.774458646774292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,64,0,1,fp8,fp8,0,4.547130584716797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,64,128,1,fp8,fp8,0,1.012346665064494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,fp8,0,5.011989275614421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,64,0,1,fp8,fp8,0,8.824090957641602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,fp8,0,9.60972785949707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,float16,0,0.7815093199412028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,fp8,0,0.7766559918721517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,float16,0,4.885130564371745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,64,128,1,fp8,fp8,0,1.0139306386311848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,fp8,0,4.975210825602214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,float16,0,0.7912106513977051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,64,0,1,fp8,fp8,0,4.434005419413249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,fp8,0,0.7800959746042887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,64,128,1,fp8,fp8,0,1.0255786577860515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,float16,0,4.936741193135579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,64,0,1,fp8,fp8,0,4.44264539082845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,float16,0,0.7959093252817789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,fp8,0,4.896725336710612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,fp8,0,0.7900853157043457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,64,128,1,fp8,fp8,0,1.0254080295562744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,float16,0,4.91373856862386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,float16,0,0.5532799959182739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,64,0,1,fp8,fp8,0,4.44429874420166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,fp8,0,0.553551991780599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,fp8,0,4.931445439656575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,64,128,1,fp8,fp8,0,0.6723732948303223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,float16,0,4.946949323018392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,float16,0,2.620053291320801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,float16,0,0.553061326344808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,64,0,1,fp8,fp8,0,4.497359911600749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,fp8,0,4.920207977294922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,fp8,0,0.5502986510594686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,fp8,0,2.617919921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,64,0,1,fp8,fp8,0,2.4311680793762207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,64,128,1,fp8,fp8,0,0.6691093444824219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,float16,0,0.5501919984817505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,float16,0,2.640063921610514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,fp8,0,0.5536426703135172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,64,128,1,fp8,fp8,0,0.6694080034891764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,fp8,0,2.636192003885905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,64,0,1,fp8,fp8,0,2.4472373326619468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,float16,0,2.6151466369628906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,float16,0,0.5529546737670898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,fp8,0,0.5509599844614664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,fp8,0,2.6157867113749185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,64,0,1,fp8,fp8,0,2.4371946652730307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,64,128,1,fp8,fp8,0,0.6737866401672363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,float16,0,0.5511573155721029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,float16,0,2.6361494064331055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,fp8,0,0.5527626673380533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,fp8,0,2.6163786252339682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,64,0,1,fp8,fp8,0,2.4182186126708984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,64,128,1,fp8,fp8,0,0.6707893212636312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,float16,0,2.6174186070760093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,float16,0,2.1018880208333335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,fp8,0,2.637872060139974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,64,0,1,fp8,fp8,0,2.4149600664774575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,fp8,0,2.056608041127523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,64,128,1,fp8,fp8,0,2.7689812978108725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,float16,0,2.1416160265604653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,fp8,0,2.098954677581787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,64,128,1,fp8,fp8,0,2.8089065551757812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,float16,0,2.1429386138916016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,float16,0,11.075050354003906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,fp8,0,2.1182986895243325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,64,0,1,fp8,fp8,0,10.033760070800781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,fp8,0,10.967600504557291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,float16,0,11.293956756591797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,64,128,1,fp8,fp8,0,2.8162558873494468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,float16,0,2.190586725870768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,fp8,0,11.180623372395834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,64,0,1,fp8,fp8,0,10.080256144205729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,fp8,0,2.172522703806559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,64,128,1,fp8,fp8,0,2.8523359298706055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,float16,0,11.132474263509115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,float16,0,1.183077335357666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,fp8,0,1.192032019297282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,64,128,1,fp8,fp8,0,1.5576106707255046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,64,0,1,fp8,fp8,0,10.098709106445312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,fp8,0,11.021748860677084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,float16,0,1.0761653582255046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,float16,0,5.763381322224935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,fp8,0,1.0557013352711995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,float16,0,11.339071909586588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,64,128,1,fp8,fp8,0,1.4148640632629395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,64,0,1,fp8,fp8,0,10.131930669148764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,64,0,1,fp8,fp8,0,5.231157302856445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,fp8,0,5.694677352905273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,fp8,0,11.16823959350586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,float16,0,1.0902079741160076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,fp8,0,1.0650346279144287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,float16,0,5.567850748697917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,64,128,1,fp8,fp8,0,1.4446934064229329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,float16,0,1.0862346490224202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,fp8,0,5.640815734863281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,64,0,1,fp8,fp8,0,5.118554751078288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,fp8,0,1.0867359638214111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,64,128,1,fp8,fp8,0,1.4268426895141602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,float16,0,5.604490915934245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,fp8,0,5.594271977742513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,float16,0,1.1112693150838215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,64,0,1,fp8,fp8,0,5.1298828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,fp8,0,1.0952106316884358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,float16,0,5.57421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,64,128,1,fp8,fp8,0,1.4570666948954265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,float16,0,0.6322453419367472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,64,0,1,fp8,fp8,0,5.140549341837565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,fp8,0,0.6425280173619589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,fp8,0,5.6825815836588545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,64,128,1,fp8,fp8,0,0.8352106412251791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,float16,0,5.650346755981445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,float16,0,2.9002345403035483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,float16,0,0.5949440002441406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,fp8,0,0.5853066841761271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,fp8,0,5.6139678955078125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,64,0,1,fp8,fp8,0,5.114975929260254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,64,128,1,fp8,fp8,0,0.7680533727010092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,fp8,0,2.9187679290771484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,64,0,1,fp8,fp8,0,2.7135254542032876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,float16,0,2.865621248881022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,float16,0,0.6025013526280721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,fp8,0,0.5887519915898641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,64,0,1,fp8,fp8,0,2.638042608896891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,64,128,1,fp8,fp8,0,0.7716586589813232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,fp8,0,2.8589226404825845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,float16,0,0.603488008181254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,float16,0,2.8450291951497397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,fp8,0,0.5923360188802084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,64,128,1,fp8,fp8,0,0.7747413317362467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,fp8,0,2.83896541595459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,64,0,1,fp8,fp8,0,2.625823974609375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,float16,0,0.6039466857910156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,float16,0,2.8473545710245767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,fp8,0,0.5943040053049723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,fp8,0,2.8991146087646484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,64,128,1,fp8,fp8,0,0.7797066370646158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,64,0,1,fp8,fp8,0,2.6271626154581704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,float16,0,0.41913068294525146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,float16,0,2.875455856323242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,fp8,0,0.4225813150405884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,64,128,1,fp8,fp8,0,0.5099946657816569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,fp8,0,2.872591972351074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,float16,0,1.587626616160075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,64,0,1,fp8,fp8,0,2.6363253593444824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,float16,0,0.42104001839955646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,fp8,0,1.5733920733133953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,fp8,0,0.4199093182881673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,64,0,1,fp8,fp8,0,1.4081172943115234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,float16,0,1.577114741007487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,64,128,1,fp8,fp8,0,0.5110400120417277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,fp8,0,1.5687947273254395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,float16,0,0.4196586608886719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,64,0,1,fp8,fp8,0,1.4081226984659831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,fp8,0,0.42078932126363117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,64,128,1,fp8,fp8,0,0.5087626775105795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,float16,0,1.574277400970459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,float16,0,0.41978665192921955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,fp8,0,1.5879732767740886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,fp8,0,0.4200213352839152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,64,0,1,fp8,fp8,0,1.3899466196695964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,64,128,1,fp8,fp8,0,0.5082346598307291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,float16,0,1.5870025952657063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,float16,0,0.4203093449274699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,fp8,0,1.5767466227213542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,fp8,0,0.419866681098938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,64,0,1,fp8,fp8,0,1.3989599545796711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,64,128,1,fp8,fp8,0,0.5086666742960612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,float16,0,1.5796373685201008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,fp8,0,1.5760480562845867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,64,0,1,fp8,fp8,0,1.4080479939778645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,float16,0,1.7340000470479329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,fp8,0,1.7054452896118164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,64,128,1,fp8,fp8,0,2.284634590148926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,float16,0,1.7671945889790852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,fp8,0,1.7487306594848633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,64,128,1,fp8,fp8,0,2.318079948425293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,float16,0,1.7839466730753581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,float16,0,7.8863786061604815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,64,0,1,fp8,fp8,0,7.214597066243489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,fp8,0,7.792298634847005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,fp8,0,1.7620320320129395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,float16,0,7.979589462280273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,64,128,1,fp8,fp8,0,2.3495465914408364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,64,0,1,fp8,fp8,0,7.197717030843099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,fp8,0,7.9155839284261065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,float16,0,1.808677355448405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,fp8,0,1.801370620727539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,float16,0,7.99504025777181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,64,128,1,fp8,fp8,0,2.3607254028320312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,float16,0,0.981274684270223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,fp8,0,0.9814720153808594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,64,0,1,fp8,fp8,0,7.273162841796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,64,128,1,fp8,fp8,0,1.3090933163960774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,fp8,0,7.982314427693685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,float16,0,8.008085250854492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,float16,0,4.142565409342448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,float16,0,0.8993173440297445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,64,0,1,fp8,fp8,0,7.249807993570964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,fp8,0,8.009749094645182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,fp8,0,0.8887893358866373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,fp8,0,4.104720115661621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,64,128,1,fp8,fp8,0,1.1820000012715657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,64,0,1,fp8,fp8,0,3.7703145345052085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,float16,0,3.995786666870117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,float16,0,0.918399969736735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,fp8,0,0.8956906795501709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,64,128,1,fp8,fp8,0,1.1909440358479817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,fp8,0,3.9605706532796225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,float16,0,0.9124746322631836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,64,0,1,fp8,fp8,0,3.6299734115600586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,fp8,0,0.9009813467661539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,float16,0,4.050085385640462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,64,128,1,fp8,fp8,0,1.1976373195648193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,fp8,0,3.966613451639811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,64,0,1,fp8,fp8,0,3.677290598551432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,float16,0,0.9221973419189453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,float16,0,4.0204213460286455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,fp8,0,0.913477341334025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,64,0,1,fp8,fp8,0,3.6482985814412436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,64,128,1,fp8,fp8,0,1.2145439783732097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,float16,0,0.5336693525314331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,fp8,0,4.043162663777669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,fp8,0,0.5355573495229086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,float16,0,4.020607948303223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,64,128,1,fp8,fp8,0,0.7025386492411295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,float16,0,2.0871893564860025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,float16,0,0.49513065814971924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,64,0,1,fp8,fp8,0,3.6682348251342773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,fp8,0,4.045605341593425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,fp8,0,0.49117334683736164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,fp8,0,2.0965919494628906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,64,0,1,fp8,fp8,0,1.9629546801249187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,64,128,1,fp8,fp8,0,0.6414719820022583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,float16,0,2.0614399909973145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,float16,0,0.5006933212280273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,fp8,0,0.49394134680430096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,fp8,0,2.0381387074788413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,64,0,1,fp8,fp8,0,1.9013333320617676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,64,128,1,fp8,fp8,0,0.6446293195088705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,float16,0,0.5033493439356486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,float16,0,2.060090700785319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,fp8,0,0.4978613456090291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,64,128,1,fp8,fp8,0,0.6464320023854574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,fp8,0,2.054405371348063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,64,0,1,fp8,fp8,0,1.8956906000773113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,float16,0,2.0455573399861655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,float16,0,0.5087999900182089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,fp8,0,0.5037386814753214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,fp8,0,2.0598559379577637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,64,0,1,fp8,fp8,0,1.9096959431966145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,64,128,1,fp8,fp8,0,0.6543999910354614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,float16,0,2.053925355275472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,float16,0,0.3550453186035156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,fp8,0,2.048101266225179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,64,128,1,fp8,fp8,0,0.43162135283152264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,fp8,0,0.35370667775472003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,64,0,1,fp8,fp8,0,1.9274400075276692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,float16,0,1.156325340270996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,float16,0,0.35448535283406574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,fp8,0,1.1500213146209717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,fp8,0,0.35231999556223553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,64,0,1,fp8,fp8,0,1.0202986399332683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,64,128,1,fp8,fp8,0,0.43016000588734943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,float16,0,1.151594638824463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,float16,0,0.3544960021972656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,fp8,0,1.1610079606374104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,64,0,1,fp8,fp8,0,1.0240853627522786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,fp8,0,0.35233068466186523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,64,128,1,fp8,fp8,0,0.4322293202082316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,float16,0,1.1483360131581624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,fp8,0,1.1513386567433674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,float16,0,0.3564000129699707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,64,0,1,fp8,fp8,0,1.0192906856536865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,fp8,0,0.3540746768315633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,64,128,1,fp8,fp8,0,0.42918399969736737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,64,0,1,fp8,fp8,0,1.0197813510894775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,float16,0,1.1595253149668376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,float16,0,0.3534986575444539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,fp8,0,1.1607413291931152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,fp8,0,0.3545866807301839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,fp8,0,1.1542293230692546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,64,128,1,fp8,fp8,0,0.4308106501897176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,float16,0,1.1531093120574951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,64,0,1,fp8,fp8,0,1.0195199648539226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,float16,0,2.7648852666219077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,fp8,0,2.720149358113607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,64,128,1,fp8,fp8,0,3.5879999796549478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,float16,0,2.8012587229410806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,fp8,0,2.789722760518392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,64,128,1,fp8,fp8,0,3.664127985636393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,float16,0,2.838677406311035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,float16,0,10.413466771443685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,64,0,1,fp8,fp8,0,9.47439448038737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,fp8,0,10.423088073730469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,float16,0,10.531829198201498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,fp8,0,2.810842514038086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,64,0,1,fp8,fp8,0,9.521562576293945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,64,128,1,fp8,fp8,0,3.6969385147094727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,fp8,0,10.334869384765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,float16,0,2.885279973347982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,fp8,0,2.8359413146972656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,float16,0,10.56006940205892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,64,128,1,fp8,fp8,0,3.741840044657389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,float16,0,1.553925355275472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,64,0,1,fp8,fp8,0,9.625354766845703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,fp8,0,1.534549395243327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,fp8,0,10.545647939046225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,64,128,1,fp8,fp8,0,2.0243733723958335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,float16,0,1.3865119616190593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,float16,0,10.594554901123047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,float16,0,5.462501525878906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,64,0,1,fp8,fp8,0,9.602426528930664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,fp8,0,1.3629333178202312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,fp8,0,10.413482666015625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,64,0,1,fp8,fp8,0,4.972922643025716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,fp8,0,5.3919626871744795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,64,128,1,fp8,fp8,0,1.8369867006937664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,float16,0,1.4088586171468098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,fp8,0,1.3782505989074707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,64,128,1,fp8,fp8,0,1.8626880645751953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,float16,0,5.322170575459798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,64,0,1,fp8,fp8,0,4.764357248942058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,float16,0,1.406549294789632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,fp8,0,5.270394643147786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,fp8,0,1.3990027109781902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,float16,0,5.20469856262207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,64,128,1,fp8,fp8,0,1.862709363301595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,64,0,1,fp8,fp8,0,4.792101224263509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,fp8,0,5.2762346267700195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,float16,0,1.4345386823018391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,fp8,0,1.4095946947733562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,float16,0,5.271141370137532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,64,128,1,fp8,fp8,0,1.9071839650472004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,64,0,1,fp8,fp8,0,4.84446398417155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,fp8,0,5.234229405721028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,float16,0,0.7876959641774496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,fp8,0,0.7949173450469971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,float16,0,5.341925303141276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,64,128,1,fp8,fp8,0,1.0467413266499836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,float16,0,2.70686403910319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,float16,0,0.733568032582601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,fp8,0,5.301786740620931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,64,0,1,fp8,fp8,0,4.833200136820476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,fp8,0,2.713258743286133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,fp8,0,0.7168800036112467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,64,0,1,fp8,fp8,0,2.538431962331136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,64,128,1,fp8,fp8,0,0.952128012975057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,float16,0,2.6478613217671714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,float16,0,0.7305119832356771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,fp8,0,0.7155946890513102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,64,128,1,fp8,fp8,0,0.9575680096944174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,64,0,1,fp8,fp8,0,2.435530662536621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,fp8,0,2.6378773053487143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,float16,0,0.7294133504231771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,float16,0,2.696549415588379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,fp8,0,0.7181013425191244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,fp8,0,2.6486239433288574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,64,0,1,fp8,fp8,0,2.456362724304199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,64,128,1,fp8,fp8,0,0.9644800027211508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,float16,0,0.7357866764068604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,float16,0,2.6692959467569985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,fp8,0,0.7333866755167643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,64,128,1,fp8,fp8,0,0.9744160175323486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,float16,0,0.4297120173772176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,fp8,0,2.6235040028889975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,64,0,1,fp8,fp8,0,2.461498737335205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,float16,0,2.6673971811930337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,fp8,0,0.4377760092417399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,float16,0,1.403509298960368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,64,128,1,fp8,fp8,0,0.5712800025939941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,fp8,0,1.40993070602417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,64,0,1,fp8,fp8,0,2.452613353729248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,fp8,0,2.6188000043233237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,float16,0,0.40462398529052734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,64,0,1,fp8,fp8,0,1.33351469039917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,fp8,0,0.39628799756368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,float16,0,1.3838879267374675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,64,128,1,fp8,fp8,0,0.5261226495107015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,fp8,0,1.3694079717000325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,64,0,1,fp8,fp8,0,1.2763893604278564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,float16,0,0.40174400806427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,fp8,0,0.39872535069783527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,64,128,1,fp8,fp8,0,0.5212693214416504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,float16,0,1.393130620320638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,float16,0,0.40513066450754803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,fp8,0,1.3709492683410645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,64,0,1,fp8,fp8,0,1.2861440181732178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,fp8,0,0.39857598145802814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,64,128,1,fp8,fp8,0,0.5281440019607544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,float16,0,1.3803253173828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,float16,0,0.40983466307322186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,fp8,0,1.375717322031657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,64,0,1,fp8,fp8,0,1.2770666281382244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,fp8,0,0.40640532970428467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,64,128,1,fp8,fp8,0,0.527946670850118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,float16,0,1.3828585942586262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,float16,0,0.28911999861399335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,64,0,1,fp8,fp8,0,1.2849653561909993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,fp8,0,1.3804480234781902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,fp8,0,0.7898186842600504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,float16,0,0.28888533512751263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,64,0,1,fp8,fp8,0,0.7010239760080973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,fp8,0,0.28853867451349896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,float16,0,0.7946613629659017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,64,128,1,fp8,fp8,0,0.3527573347091675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,fp8,0,0.28758400678634644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,float16,0,0.7944160302480062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,64,128,1,fp8,fp8,0,0.3511039813359578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,fp8,0,0.8007040023803711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,64,0,1,fp8,fp8,0,0.701802651087443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,float16,0,0.2870560089747111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,fp8,0,0.2882879972457886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,float16,0,0.7951680024464926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,64,128,1,fp8,fp8,0,0.35257065296173096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,fp8,0,0.7938346862792969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,float16,0,0.28920533259709674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,64,0,1,fp8,fp8,0,0.6988106568654379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,64,0,1,fp8,fp8,0,0.7002240022023519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,fp8,0,0.28751466671625775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,float16,0,0.7918293476104736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,64,128,1,fp8,fp8,0,0.35206401348114014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,fp8,0,0.7938880125681559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,float16,0,0.28801600138346356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,float16,0,0.7994240125020345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,fp8,0,0.2885813315709432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,64,128,1,fp8,fp8,0,0.35288532574971515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,fp8,0,0.7941493193308512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,64,0,1,fp8,fp8,0,0.7022240161895752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,float16,0,2.0509866078694663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,fp8,0,2.0139946937561035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,64,128,1,fp8,fp8,0,2.7056961059570312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,float16,0,2.096463998158773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,fp8,0,2.0745439529418945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,float16,0,6.215237299601237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,64,128,1,fp8,fp8,0,2.7531468073527017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,64,0,1,fp8,fp8,0,5.6670347849528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,fp8,0,6.096096038818359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,float16,0,6.291765213012695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,float16,0,2.127994696299235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,fp8,0,2.091562589009603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,fp8,0,6.310618718465169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,64,128,1,fp8,fp8,0,2.760469436645508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,64,0,1,fp8,fp8,0,5.6756642659505205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,float16,0,2.142687956492106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,float16,0,6.2121016184488935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,fp8,0,2.1277440388997397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,64,128,1,fp8,fp8,0,2.80291748046875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,64,0,1,fp8,fp8,0,5.766192118326823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,fp8,0,6.256682713826497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,float16,0,1.1554880142211914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,fp8,0,1.1436693668365479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,64,128,1,fp8,fp8,0,1.5168639818827312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,float16,0,3.220143953959147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,float16,0,6.346415837605794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,fp8,0,6.311642964680989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,float16,0,1.034821351369222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,64,0,1,fp8,fp8,0,5.772799809773763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,fp8,0,1.017093340555827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,64,0,1,fp8,fp8,0,3.033562660217285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,fp8,0,3.260314623514811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,64,128,1,fp8,fp8,0,1.3671092987060547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,float16,0,3.0753653844197593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,float16,0,1.0418293476104736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,fp8,0,1.031551996866862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,64,128,1,fp8,fp8,0,1.3854187329610188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,fp8,0,3.0841547648111978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,64,0,1,fp8,fp8,0,2.8562987645467124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,float16,0,1.046341339747111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,float16,0,3.0855468114217124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,fp8,0,1.0394133726755779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,fp8,0,3.084730784098307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,64,128,1,fp8,fp8,0,1.3912426630655925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,64,0,1,fp8,fp8,0,2.865637461344401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,float16,0,3.1574185689290366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,fp8,0,3.1092745463053384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,64,0,1,fp8,fp8,0,2.881381352742513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,float16,0,1.0594186782836914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,fp8,0,1.0556639830271404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,64,128,1,fp8,fp8,0,1.4121813774108887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,float16,0,0.5997706651687622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,float16,0,3.135893185933431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,fp8,0,0.6022293170293173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,float16,0,1.6215839385986328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,64,128,1,fp8,fp8,0,0.7902186711629232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,fp8,0,3.0862932205200195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,64,0,1,fp8,fp8,0,2.932901382446289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,fp8,0,1.646176020304362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,float16,0,0.5535253286361694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,64,0,1,fp8,fp8,0,1.5550187428792317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,float16,0,1.5835572878519695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,fp8,0,0.5379679997762045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,64,128,1,fp8,fp8,0,0.7221386432647705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,float16,0,0.5504853328069051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,fp8,0,1.577328046162923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,fp8,0,0.5456106662750244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,64,0,1,fp8,fp8,0,1.488917350769043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,float16,0,1.603530724843343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,64,128,1,fp8,fp8,0,0.7195359865824381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,64,0,1,fp8,fp8,0,1.4764426549275715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,fp8,0,0.5485493342081705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,fp8,0,1.5760265986124675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,float16,0,0.5525973240534464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,64,128,1,fp8,fp8,0,0.7258880138397217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,float16,0,1.5885920524597168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,float16,0,0.558357318242391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,fp8,0,1.5818880399068196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,fp8,0,0.5516693194707235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,64,0,1,fp8,fp8,0,1.4688746134440105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,64,128,1,fp8,fp8,0,0.7338720162709554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,float16,0,1.6074612935384114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,float16,0,0.3264266649881999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,fp8,0,0.3315626581509908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,fp8,0,1.5903840065002441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,float16,0,0.8654452959696451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,float16,0,0.3083626627922058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,64,0,1,fp8,fp8,0,1.4891200065612793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,64,128,1,fp8,fp8,0,0.4331466754277547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,fp8,0,0.8658026854197184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,64,0,1,fp8,fp8,0,0.7859413623809814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,fp8,0,0.3041386604309082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,float16,0,0.8377119700113932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,64,128,1,fp8,fp8,0,0.4019893407821655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,fp8,0,0.8341173330942789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,64,0,1,fp8,fp8,0,0.7512906392415365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,float16,0,0.30716800689697266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,fp8,0,0.30636799335479736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,float16,0,0.8396746317545573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,64,128,1,fp8,fp8,0,0.4014879862467448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,fp8,0,0.836031993230184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,float16,0,0.31036800146102905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,64,128,1,fp8,fp8,0,0.40320531527201336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,64,0,1,fp8,fp8,0,0.759493350982666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,float16,0,0.8424479961395264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,fp8,0,0.30523733297983807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,fp8,0,0.8442613283793131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,64,0,1,fp8,fp8,0,0.7545973459879557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,64,128,1,fp8,fp8,0,0.40731199582417804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,float16,0,0.3123679955800374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,fp8,0,0.3107146620750427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,float16,0,0.8450506528218588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,float16,0,0.22244266668955484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,fp8,0,0.8403893311818441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,float16,0,0.47388267517089844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,64,0,1,fp8,fp8,0,0.7625760237375895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,fp8,0,0.22405334313710532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,64,128,1,fp8,fp8,0,0.2733866572380066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,64,0,1,fp8,fp8,0,0.4405173460642497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,fp8,0,0.4742773373921712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,float16,0,0.22161600987116495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,float16,0,0.4707466761271159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,64,0,1,fp8,fp8,0,0.4407466650009155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,float16,0,0.22155733903249106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,fp8,0,0.22267200549443564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,64,128,1,fp8,fp8,0,0.27190399169921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,fp8,0,0.47202134132385254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,float16,0,0.46996267636617023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,fp8,0,0.22190399964650473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,64,128,1,fp8,fp8,0,0.27153066794077557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,fp8,0,0.47022398312886554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,64,0,1,fp8,fp8,0,0.44275200366973877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,float16,0,0.222543994585673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,fp8,0,0.22291199366251627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,float16,0,0.4718559980392456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,64,128,1,fp8,fp8,0,0.2731893261273702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,fp8,0,0.471343994140625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,64,0,1,fp8,fp8,0,0.4408426682154338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,float16,0,0.22217067082722983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,float16,0,0.4729439814885457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,fp8,0,0.22338666518529257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,64,128,1,fp8,fp8,0,0.27261332670847577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,fp8,0,0.47164801756540936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,64,0,1,fp8,fp8,0,0.4426453510920207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,float16,0,2.714357376098633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,fp8,0,2.675919850667318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,64,128,1,fp8,fp8,0,3.5611359278361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,float16,0,2.7751200993855796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,float16,0,6.064383824666341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,fp8,0,2.743098576863607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,64,0,1,fp8,fp8,0,5.561557133992513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,fp8,0,6.093701044718425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,64,128,1,fp8,fp8,0,3.6061013539632163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,float16,0,6.113440195719401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,float16,0,2.810725212097168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,fp8,0,2.7758026123046875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,64,0,1,fp8,fp8,0,5.650608062744141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,fp8,0,6.0784962972005205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,64,128,1,fp8,fp8,0,3.653125445048014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,float16,0,2.853893280029297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,float16,0,6.1343841552734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,64,0,1,fp8,fp8,0,5.725077311197917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,fp8,0,6.1438242594401045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,fp8,0,2.8097972869873047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,64,128,1,fp8,fp8,0,3.7026240030924478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,float16,0,1.542234738667806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,fp8,0,1.5236053466796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,float16,0,6.157583872477214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,64,128,1,fp8,fp8,0,1.9964213371276855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,float16,0,3.2074667612711587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,fp8,0,6.235488255818685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,float16,0,1.3536267280578613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,fp8,0,3.1840426127115884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,64,0,1,fp8,fp8,0,5.711642583211263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,fp8,0,1.3224159876505535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,64,0,1,fp8,fp8,0,3.0393813451131186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,64,128,1,fp8,fp8,0,1.7971733411153157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,float16,0,3.017519950866699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,float16,0,1.3786932627360027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,fp8,0,2.9952214558919272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,fp8,0,1.3514080047607422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,64,0,1,fp8,fp8,0,2.8190720876057944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,64,128,1,fp8,fp8,0,1.8221279780069988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,float16,0,3.012666702270508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,float16,0,1.388362725575765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,fp8,0,3.022869427998861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,64,0,1,fp8,fp8,0,2.8579734166463218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,fp8,0,1.360746701558431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,64,128,1,fp8,fp8,0,1.8376693725585938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,float16,0,3.0629920959472656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,float16,0,1.4068214098612468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,fp8,0,1.3918879826863606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,64,0,1,fp8,fp8,0,2.8463786443074546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,fp8,0,3.0073493321736655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,64,128,1,fp8,fp8,0,1.8538026809692383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,float16,0,0.7657919724782308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,float16,0,3.0827948252360025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,fp8,0,0.7626667022705078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,float16,0,1.5936106046040852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,fp8,0,3.0797707239786782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,64,128,1,fp8,fp8,0,1.0076746940612793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,64,0,1,fp8,fp8,0,2.897189458211263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,float16,0,0.7003946304321289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,fp8,0,1.6006293296813965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,64,0,1,fp8,fp8,0,1.5289440155029297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,64,128,1,fp8,fp8,0,0.9114399751027426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,float16,0,1.5305172602335613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,float16,0,0.6993333498636881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,fp8,0,1.530186653137207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,fp8,0,0.6876373291015625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,64,0,1,fp8,fp8,0,1.4368267059326172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,64,128,1,fp8,fp8,0,0.9177652994791666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,fp8,0,0.6865386962890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,float16,0,1.5346666971842449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,fp8,0,1.5252000490824382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,float16,0,0.7056960264841715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,fp8,0,0.6928106943766276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,64,0,1,fp8,fp8,0,1.4407572746276855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,float16,0,1.550048033396403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,64,128,1,fp8,fp8,0,0.9224426746368408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,float16,0,0.7124799887339274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,fp8,0,1.5311946868896484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,64,0,1,fp8,fp8,0,1.4396905899047852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,fp8,0,0.6969866752624512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,64,128,1,fp8,fp8,0,0.9442773660024008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,float16,0,1.5460586547851562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,float16,0,0.4036159912745158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,fp8,0,0.40619198481241864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,float16,0,0.8328479925791422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,fp8,0,1.5369280179341633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,64,0,1,fp8,fp8,0,1.4592053095499675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,64,128,1,fp8,fp8,0,0.5349226792653402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,fp8,0,0.8377866744995117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,64,0,1,fp8,fp8,0,0.7974080244700114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,float16,0,0.37510399023691815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,fp8,0,0.36794666449228924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,64,0,1,fp8,fp8,0,0.7466186682383219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,float16,0,0.7996160189310709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,64,128,1,fp8,fp8,0,0.48811201254526776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,fp8,0,0.7943253517150879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,float16,0,0.37677331765492755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,fp8,0,0.3720266819000244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,float16,0,0.8012906710306803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,64,128,1,fp8,fp8,0,0.49133865038553876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,fp8,0,0.7949066956837972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,64,0,1,fp8,fp8,0,0.7502240339914957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,float16,0,0.3781973520914714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,fp8,0,0.3731253147125244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,fp8,0,0.8009119828542074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,float16,0,0.8031626542409261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,64,128,1,fp8,fp8,0,0.49586665630340576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,float16,0,0.3784960110982259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,64,0,1,fp8,fp8,0,0.7521866957346598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,fp8,0,0.37703998883565265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,float16,0,0.8066346645355225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,64,128,1,fp8,fp8,0,0.5021173159281412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,fp8,0,0.8024319807688395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,float16,0,0.226800004641215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,64,0,1,fp8,fp8,0,0.7582560380299886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,float16,0,0.4458560148874919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,fp8,0,0.23068799575169882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,float16,0,0.4371253252029419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,64,128,1,fp8,fp8,0,0.30155734221140545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,fp8,0,0.4522240161895752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,64,0,1,fp8,fp8,0,0.3909440040588379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,64,0,1,fp8,fp8,0,0.4100639820098877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,float16,0,0.21266667048136392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,fp8,0,0.20907733837763467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,64,128,1,fp8,fp8,0,0.277344008286794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,fp8,0,0.43330665429433185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,float16,0,0.21374932924906412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,float16,0,0.43450133005777997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,fp8,0,0.21150932709376016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,64,128,1,fp8,fp8,0,0.28108266989390057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,fp8,0,0.4333386818567912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,64,128,1,fp8,fp8,0,0.27967466910680133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,64,0,1,fp8,fp8,0,0.3882186810175578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,float16,0,0.2140000065167745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,float16,0,0.4368319908777873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,fp8,0,0.2122933268547058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,float16,0,0.43862398465474445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,fp8,0,0.43672533830006915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,64,0,1,fp8,fp8,0,0.3906986713409424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,float16,0,0.2160373330116272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,fp8,0,0.21374932924906412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,64,128,1,fp8,fp8,0,0.2825919985771179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,fp8,0,0.43827199935913086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,64,0,1,fp8,fp8,0,0.39410134156545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,float16,0,0.15834133823712668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,float16,0,0.2577280004819234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,fp8,0,0.1579253375530243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,64,128,1,fp8,fp8,0,0.19516799847284952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,fp8,0,0.25700799624125165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,64,0,1,fp8,fp8,0,0.24186132351557413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,float16,0,0.15495999654134116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,float16,0,0.2550613284111023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,fp8,0,0.15448533495267233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,64,128,1,fp8,fp8,0,0.18620800971984863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,fp8,0,0.2544959982236226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,64,0,1,fp8,fp8,0,0.24008532365163168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,float16,0,0.15495466192563376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,float16,0,0.25433599948883057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,fp8,0,0.1564586659272512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,64,128,1,fp8,fp8,0,0.18890132506688437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,fp8,0,0.2542720039685567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,64,0,1,fp8,fp8,0,0.2405280073483785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,float16,0,0.15573867162068686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,float16,0,0.25464532772699994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,fp8,0,0.15545599659283957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,64,128,1,fp8,fp8,0,0.19273600975672403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,fp8,0,0.2552799979845683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,64,0,1,fp8,fp8,0,0.24145066738128662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,float16,0,0.15667200088500977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,float16,0,0.2560960054397583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,fp8,0,0.15591466426849365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,64,128,1,fp8,fp8,0,0.19330666462580362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,fp8,0,0.25593600670496625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,64,0,1,fp8,fp8,0,0.24222399791081747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,float16,0,2.0117600758870444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,float16,0,3.7411254247029624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,fp8,0,1.979541301727295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,64,128,1,fp8,fp8,0,2.6558292706807456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,float16,0,2.0564640363057456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,fp8,0,3.704970677693685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,64,0,1,fp8,fp8,0,3.457882563273112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,fp8,0,2.017925262451172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,64,128,1,fp8,fp8,0,2.7255306243896484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,float16,0,3.8230454126993814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,float16,0,2.068357308705648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,fp8,0,3.7264906565348306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,64,0,1,fp8,fp8,0,3.5310932795206704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,fp8,0,2.036095937093099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,float16,0,3.815349260965983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,64,128,1,fp8,fp8,0,2.695749282836914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,float16,0,2.093850612640381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,fp8,0,3.775360107421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,64,0,1,fp8,fp8,0,3.5539306004842124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,fp8,0,2.0790346463521323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,64,128,1,fp8,fp8,0,2.739434560139974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,float16,0,1.137168010075887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,float16,0,3.809424082438151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,fp8,0,1.1280319690704346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,fp8,0,3.789210637410482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,64,128,1,fp8,fp8,0,1.4980799357096355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,float16,0,1.998095989227295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,64,0,1,fp8,fp8,0,3.5613012313842773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,float16,0,1.0154399871826172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,fp8,0,1.9699145952860515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,64,0,1,fp8,fp8,0,1.904047966003418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,fp8,0,0.9948639869689941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,float16,0,1.8716799418131511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,64,128,1,fp8,fp8,0,1.3335572878519695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,fp8,0,1.8518719673156738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,float16,0,1.023792028427124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,64,0,1,fp8,fp8,0,1.7363573710123699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,fp8,0,1.0020426909128826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,float16,0,1.8654932975769043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,64,128,1,fp8,fp8,0,1.3482507069905598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,fp8,0,1.8510133425394695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,64,0,1,fp8,fp8,0,1.7601866722106934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,float16,0,1.0280799865722656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,fp8,0,1.0083946386973064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,float16,0,1.8870399792989094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,64,128,1,fp8,fp8,0,1.351626714070638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,float16,0,1.0320693651835124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,fp8,0,1.8555413881937664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,64,0,1,fp8,fp8,0,1.7722667058308919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,fp8,0,1.0268212954203289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,float16,0,1.8889600435892742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,64,128,1,fp8,fp8,0,1.3700106938680012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,float16,0,0.574282685915629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,fp8,0,0.578165332476298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,fp8,0,1.8719360033671062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,float16,0,1.0023252964019775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,64,0,1,fp8,fp8,0,1.7821280161539714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,64,128,1,fp8,fp8,0,0.7668266296386719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,fp8,0,1.0140746434529622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,float16,0,0.5242933432261149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,64,0,1,fp8,fp8,0,0.9628000259399414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,fp8,0,0.5165493488311768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,float16,0,0.9570079644521078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,64,128,1,fp8,fp8,0,0.6900266806284586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,fp8,0,0.9482613404591879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,64,0,1,fp8,fp8,0,0.8986240228017172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,float16,0,0.5307679971059164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,float16,0,0.9614986578623453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,fp8,0,0.5204266707102457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,64,128,1,fp8,fp8,0,0.6945333480834961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,fp8,0,0.9562933444976807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,64,0,1,fp8,fp8,0,0.8979146480560303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,float16,0,0.5311946471532186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,fp8,0,0.5258506536483765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,float16,0,0.9697706699371338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,64,128,1,fp8,fp8,0,0.6990986665089926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,fp8,0,0.956010659535726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,float16,0,0.5353120168050131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,64,0,1,fp8,fp8,0,0.9006826877593994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,float16,0,0.9663200378417969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,fp8,0,0.5324639876683553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,64,128,1,fp8,fp8,0,0.7110719680786133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,float16,0,0.3089279929796855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,fp8,0,0.9674879709879557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,64,0,1,fp8,fp8,0,0.9119306405385336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,float16,0,0.5268053213755289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,fp8,0,0.3110613425572713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,64,128,1,fp8,fp8,0,0.41097601254781085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,fp8,0,0.5341600179672241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,64,0,1,fp8,fp8,0,0.4965440034866333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,float16,0,0.28641066948572796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,float16,0,0.5060746669769287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,fp8,0,0.2791200081507365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,64,128,1,fp8,fp8,0,0.37457601229349774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,fp8,0,0.5027253230412801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,64,0,1,fp8,fp8,0,0.46278401215871173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,float16,0,0.2863466739654541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,float16,0,0.5120213429133097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,fp8,0,0.2809973359107971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,64,128,1,fp8,fp8,0,0.3766453266143799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,fp8,0,0.5018453200658163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,64,0,1,fp8,fp8,0,0.4628533522288005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,float16,0,0.28893333673477173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,fp8,0,0.2850773334503174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,float16,0,0.510591983795166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,64,128,1,fp8,fp8,0,0.37949331601460773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,fp8,0,0.5051946640014648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,64,0,1,fp8,fp8,0,0.46143468221028644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,float16,0,0.2913600007692973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,float16,0,0.5146239995956421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,64,0,1,fp8,fp8,0,0.4666293462117513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,fp8,0,0.28964799642562866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,64,128,1,fp8,fp8,0,0.3832533359527588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,fp8,0,0.510095993677775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,float16,0,0.2756906747817993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,float16,0,0.17633066574732462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,fp8,0,0.1797013282775879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,64,128,1,fp8,fp8,0,0.23641065756479898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,fp8,0,0.2802613377571106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,64,0,1,fp8,fp8,0,0.2690773407618205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,float16,0,0.1641759971777598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,float16,0,0.26523200670878094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,fp8,0,0.16195733348528543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,64,128,1,fp8,fp8,0,0.21687465906143188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,fp8,0,0.26346667607625324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,64,0,1,fp8,fp8,0,0.25177067518234253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,float16,0,0.16458666324615479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,float16,0,0.26579733689626056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,fp8,0,0.16338133811950684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,64,128,1,fp8,fp8,0,0.21701333920160928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,fp8,0,0.2626933256785075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,64,0,1,fp8,fp8,0,0.2509066661198934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,float16,0,0.16565866271654764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,fp8,0,0.16366400321324667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,float16,0,0.26583999395370483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,64,128,1,fp8,fp8,0,0.21873599290847778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,fp8,0,0.2640053431193034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,64,0,1,fp8,fp8,0,0.2531893253326416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,float16,0,0.16631999611854553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,float16,0,0.2675039966901143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,fp8,0,0.16768000523249307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,64,128,1,fp8,fp8,0,0.219925324122111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,fp8,0,0.26816533009211224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,64,0,1,fp8,fp8,0,0.2564319968223572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,float16,0,0.12490133444468181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,float16,0,0.1727786660194397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,fp8,0,0.12412800391515096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,64,128,1,fp8,fp8,0,0.15558933218320212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,fp8,0,0.1735573410987854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,64,0,1,fp8,fp8,0,0.16338666280110678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,float16,0,0.12138133247693379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,float16,0,0.17006399234135947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,fp8,0,0.12130666772524516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,64,128,1,fp8,fp8,0,0.14408000310262045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,fp8,0,0.16894400119781494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,64,0,1,fp8,fp8,0,0.16150933504104614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,float16,0,0.12521066268285116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,float16,0,0.16889599959055582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,fp8,0,0.12153066198031108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,fp8,0,0.12165866295496623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,64,128,1,fp8,fp8,0,0.14896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,fp8,0,0.1696853240331014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,64,0,1,fp8,fp8,0,0.16181866327921549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,float16,0,0.12069333593050639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,float16,0,0.16990933815638223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,64,128,1,fp8,fp8,0,0.14717866977055868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,fp8,0,0.17065600554148355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,64,0,1,fp8,fp8,0,0.16215999921162924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,float16,0,0.12196266651153564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,float16,0,0.17082667350769043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,fp8,0,0.12195733189582825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,64,128,1,fp8,fp8,0,0.15313067038853964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,fp8,0,0.17084799210230509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,64,0,1,fp8,fp8,0,0.16219733158747354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,float16,0,2.6869331995646157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,fp8,0,2.6392265955607095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,float16,0,3.9093119303385415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,64,128,1,fp8,fp8,0,3.457610766092936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,float16,0,2.653781255086263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,64,0,1,fp8,fp8,0,3.621210734049479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,fp8,0,3.873546600341797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,float16,0,3.90396785736084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,fp8,0,2.6399787267049155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,64,128,1,fp8,fp8,0,3.5504531860351562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,float16,0,2.6895198822021484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,64,0,1,fp8,fp8,0,3.680357297261556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,fp8,0,3.8897972106933594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,fp8,0,2.6700318654378257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,float16,0,3.9186986287434897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,64,128,1,fp8,fp8,0,3.566608111063639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,fp8,0,3.908773422241211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,float16,0,2.7387253443400064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,64,0,1,fp8,fp8,0,3.7017974853515625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,fp8,0,2.7130346298217773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,float16,0,4.002367973327637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,64,128,1,fp8,fp8,0,3.5809494654337564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,float16,0,1.5004266103108723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,fp8,0,1.4849173227945964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,fp8,0,3.969162623087565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,64,128,1,fp8,fp8,0,1.957765261332194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,float16,0,2.120021343231201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,fp8,0,2.1227146784464517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,64,0,1,fp8,fp8,0,3.743743896484375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,float16,0,1.3240533669789631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,64,0,1,fp8,fp8,0,2.043743928273519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,fp8,0,1.3115519682566326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,float16,0,1.9354133605957031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,64,128,1,fp8,fp8,0,1.731061299641927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,fp8,0,1.913658618927002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,float16,0,1.3370614051818848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,64,0,1,fp8,fp8,0,1.8294399579366047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,fp8,0,1.3177119890848796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,float16,0,1.9537332852681477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,64,128,1,fp8,fp8,0,1.7743733723958333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,fp8,0,1.927029291788737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,float16,0,1.3433173497517903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,64,0,1,fp8,fp8,0,1.8273599942525227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,fp8,0,1.3397386868794758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,float16,0,1.9618293444315593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,64,128,1,fp8,fp8,0,1.7595252990722656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,fp8,0,1.94378662109375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,float16,0,1.3544692993164062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,64,0,1,fp8,fp8,0,1.860581398010254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,fp8,0,1.342890739440918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,float16,0,1.9725066820780437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,float16,0,0.7517066796620687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,64,128,1,fp8,fp8,0,1.798682689666748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,fp8,0,0.7503573099772135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,float16,0,1.0601812998453777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,fp8,0,1.962490717569987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,64,0,1,fp8,fp8,0,1.8615946769714355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,64,128,1,fp8,fp8,0,0.9803199768066406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,float16,0,0.6813546816507975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,fp8,0,0.6621973514556885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,fp8,0,1.059007962544759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,64,0,1,fp8,fp8,0,1.0184906323750813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,float16,0,0.994208017985026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,64,128,1,fp8,fp8,0,0.8923253218332926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,fp8,0,0.9756426811218262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,64,0,1,fp8,fp8,0,0.923354705174764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,float16,0,0.6807466348012289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,float16,0,0.988917350769043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,fp8,0,0.668842633565267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,64,128,1,fp8,fp8,0,0.8946293195088705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,fp8,0,0.9865972995758057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,64,0,1,fp8,fp8,0,0.9294880231221517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,float16,0,0.6828052997589111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,float16,0,1.0012853145599365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,fp8,0,0.6749493281046549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,64,128,1,fp8,fp8,0,0.8961493174235026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,fp8,0,0.9825546741485596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,float16,0,0.6930027008056641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,64,0,1,fp8,fp8,0,0.9343679745992025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,float16,0,1.0052213668823242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,fp8,0,0.6838506857554117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,64,128,1,fp8,fp8,0,0.9095786412556967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,float16,0,0.3893226782480876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,fp8,0,1.0021386941274006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,64,0,1,fp8,fp8,0,0.946074644724528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,float16,0,0.5511893431345621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,fp8,0,0.39283732573191327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,64,128,1,fp8,fp8,0,0.5189173221588135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,fp8,0,0.5516266822814941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,64,0,1,fp8,fp8,0,0.5287893215815226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,float16,0,0.3564586639404297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,float16,0,0.5174026489257812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,fp8,0,0.3500426610310872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,64,128,1,fp8,fp8,0,0.4713919957478841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,fp8,0,0.5101226568222046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,64,0,1,fp8,fp8,0,0.4841173489888509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,float16,0,0.35893865426381427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,float16,0,0.5190720160802206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,fp8,0,0.3537386655807495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,64,128,1,fp8,fp8,0,0.4729653199513753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,fp8,0,0.5108160177866617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,64,0,1,fp8,fp8,0,0.48927466074625653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,float16,0,0.3598346710205078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,64,0,1,fp8,fp8,0,0.48972801367441815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,float16,0,0.5222026507059733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,fp8,0,0.3563946485519409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,64,128,1,fp8,fp8,0,0.4748426675796509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,fp8,0,0.5149866739908854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,float16,0,0.36345601081848145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,float16,0,0.5262453158696493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,fp8,0,0.362064003944397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,64,128,1,fp8,fp8,0,0.48199466864267987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,fp8,0,0.5235679944356283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,64,0,1,fp8,fp8,0,0.4951680103937785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,float16,0,0.2127573291460673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,float16,0,0.2941333254178365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,fp8,0,0.21748799085617065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,64,128,1,fp8,fp8,0,0.2842613259951274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,fp8,0,0.29923200607299805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,64,0,1,fp8,fp8,0,0.2771786650021871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,float16,0,0.19584532578786215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,float16,0,0.2783573269844055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,fp8,0,0.19267199436823526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,64,128,1,fp8,fp8,0,0.2619626720746358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,fp8,0,0.2759946584701538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,64,0,1,fp8,fp8,0,0.25439999500910443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,float16,0,0.19814932346343994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,fp8,0,0.2787626584370931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,float16,0,0.2801706592241923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,fp8,0,0.19403733809789023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,64,128,1,fp8,fp8,0,0.2616479992866516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,64,0,1,fp8,fp8,0,0.2551893393198649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,float16,0,0.2002506653467814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,float16,0,0.2840213378270467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,fp8,0,0.19777067502339682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,64,128,1,fp8,fp8,0,0.2616533239682515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,fp8,0,0.2793440024058024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,64,0,1,fp8,fp8,0,0.25603200991948444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,64,128,1,fp8,fp8,0,0.26766933997472125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,float16,0,0.20147200425465903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,float16,0,0.2860479950904846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,fp8,0,0.20029334227244058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,fp8,0,0.28216532866160077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,float16,0,0.12472533186276753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,64,0,1,fp8,fp8,0,0.25918400287628174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,float16,0,0.15851199626922607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,fp8,0,0.12759466965993246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,64,128,1,fp8,fp8,0,0.16709866126378378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,fp8,0,0.1604213317235311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,64,0,1,fp8,fp8,0,0.15549866358439127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,float16,0,0.11519466837247212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,float16,0,0.14803733428319296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,fp8,0,0.11485866705576579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,64,128,1,fp8,fp8,0,0.14867200454076132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,fp8,0,0.14853866895039877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,64,0,1,fp8,fp8,0,0.14204266667366028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,float16,0,0.11642666657765706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,float16,0,0.1491200029850006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,fp8,0,0.11508267124493916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,64,128,1,fp8,fp8,0,0.14619200428326926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,fp8,0,0.14789866407712302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,64,0,1,fp8,fp8,0,0.14261333147684732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,float16,0,0.11643200119336446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,float16,0,0.14959466457366943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,fp8,0,0.11717866857846577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,fp8,0,0.11780266960461934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,64,128,1,fp8,fp8,0,0.15357333421707153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,fp8,0,0.14883733789126077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,64,0,1,fp8,fp8,0,0.14260799686113992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,float16,0,0.11803733309110005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,float16,0,0.1513706644376119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,64,128,1,fp8,fp8,0,0.15819199879964194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,64,128,1,fp8,fp8,0,0.11504000425338745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,fp8,0,0.15124266346295676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,64,0,1,fp8,fp8,0,0.14733333388964334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,float16,0,0.09091732899347942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,float16,0,0.10440533359845479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,fp8,0,0.0881119966506958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,fp8,0,0.08986133337020874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,fp8,0,0.10452266534169515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,64,0,1,fp8,fp8,0,0.10109866658846538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,float16,0,0.08805867036183675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,float16,0,0.10328533252080281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,64,128,1,fp8,fp8,0,0.10434133807818095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,fp8,0,0.10288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,64,0,1,fp8,fp8,0,0.09925333658854167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,float16,0,0.08683733145395915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,float16,0,0.10269866387049358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,fp8,0,0.08780266841252644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,64,128,1,fp8,fp8,0,0.10473066568374634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,fp8,0,0.1032373309135437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,64,0,1,fp8,fp8,0,0.09920000036557515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,float16,0,0.08694400389989217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,float16,0,0.10274666547775269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,fp8,0,0.08707200487454732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,64,128,1,fp8,fp8,0,0.10452266534169515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,fp8,0,0.10262933373451233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,64,0,1,fp8,fp8,0,0.09897599617640178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,float16,0,0.08840533097585042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,float16,0,0.10331733028093974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,fp8,0,0.08731733759244283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,64,128,1,fp8,fp8,0,0.10379733641942342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,fp8,0,0.10202133655548096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,64,0,1,fp8,fp8,0,0.09962667028109233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,float16,0,1.9821492830912273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,float16,0,2.531482696533203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,fp8,0,1.9578933715820312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,64,128,1,fp8,fp8,0,2.5844853719075522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,fp8,0,2.528010686238607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,float16,0,1.9789867401123047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,64,0,1,fp8,fp8,0,2.3513333002726235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,fp8,0,1.9500800768534343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,float16,0,2.5567092895507812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,64,128,1,fp8,fp8,0,2.5970986684163413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,fp8,0,2.533173402150472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,float16,0,1.9910826683044434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,64,0,1,fp8,fp8,0,2.371333281199137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,fp8,0,1.962959925333659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,float16,0,2.5553332964579263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,64,128,1,fp8,fp8,0,2.619589328765869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,fp8,0,2.5333919525146484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,float16,0,2.019829273223877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,64,0,1,fp8,fp8,0,2.405120054880778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,fp8,0,1.9981013933817546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,float16,0,2.5860800743103027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,float16,0,1.113807996114095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,64,128,1,fp8,fp8,0,2.6768906911214194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,fp8,0,2.5782292683919272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,float16,0,1.411450703938802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,fp8,0,1.1039520104726155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,64,0,1,fp8,fp8,0,2.4601972897847495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,64,128,1,fp8,fp8,0,1.455359935760498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,fp8,0,1.3892000516255696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,float16,0,0.9879519939422607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,64,0,1,fp8,fp8,0,1.3426666259765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,fp8,0,0.9691946506500244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,float16,0,1.2847893238067627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,64,128,1,fp8,fp8,0,1.2984533309936523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,fp8,0,1.25328532854716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,64,0,1,fp8,fp8,0,1.1807573636372883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,float16,0,0.9974026679992676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,fp8,0,0.9821226596832275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,float16,0,1.281002680460612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,64,128,1,fp8,fp8,0,1.3223360379536946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,64,0,1,fp8,fp8,0,1.1947999795277913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,float16,0,0.9998613198598226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,fp8,0,1.2674612998962402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,float16,0,1.2856427033742268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,fp8,0,0.9897813002268473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,64,128,1,fp8,fp8,0,1.3173173268636067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,fp8,0,1.2820106347401936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,64,0,1,fp8,fp8,0,1.2032746473948162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,float16,0,1.0136106808980305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,fp8,0,1.006160020828247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,float16,0,1.2928746541341145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,64,128,1,fp8,fp8,0,1.3392213185628254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,64,0,1,fp8,fp8,0,1.2249226570129395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,float16,0,0.5681279897689819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,fp8,0,1.2901386419932048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,float16,0,0.7070826689402262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,fp8,0,0.5642720063527426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,64,128,1,fp8,fp8,0,0.740330696105957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,fp8,0,0.7045546372731527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,64,0,1,fp8,fp8,0,0.6795252958933512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,float16,0,0.5094079971313477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,float16,0,0.657535990079244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,fp8,0,0.5016533136367798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,64,128,1,fp8,fp8,0,0.6693653265635172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,fp8,0,0.643397331237793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,64,0,1,fp8,fp8,0,0.6103786627451578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,float16,0,0.5105280081431071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,float16,0,0.6566293239593506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,fp8,0,0.5057493448257446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,64,128,1,fp8,fp8,0,0.6801919937133789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,fp8,0,0.6513226826985677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,64,0,1,fp8,fp8,0,0.61299200852712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,float16,0,0.5143680175145467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,float16,0,0.6626666784286499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,fp8,0,0.506330649058024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,float16,0,0.5230079889297485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,64,128,1,fp8,fp8,0,0.6802612940470377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,fp8,0,0.6544320185979208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,64,0,1,fp8,fp8,0,0.6210293372472128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,float16,0,0.6643999814987183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,fp8,0,0.5158506631851196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,64,128,1,fp8,fp8,0,0.6895146369934082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,fp8,0,0.6578239997227987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,64,0,1,fp8,fp8,0,0.6257919867833456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,float16,0,0.2985866665840149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,float16,0,0.37301866213480633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,fp8,0,0.3018133242925008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,64,128,1,fp8,fp8,0,0.396565318107605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,fp8,0,0.37323200702667236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,64,0,1,fp8,fp8,0,0.3518933455149333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,fp8,0,0.3441760142644246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,float16,0,0.2739786704381307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,float16,0,0.272325336933136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,float16,0,0.3457653522491455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,fp8,0,0.2691306670506795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,64,128,1,fp8,fp8,0,0.35947732130686444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,64,0,1,fp8,fp8,0,0.315829336643219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,float16,0,0.34947200616200763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,float16,0,0.27719465891520184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,fp8,0,0.26915733019510907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,64,128,1,fp8,fp8,0,0.3612746795018514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,64,128,1,fp8,fp8,0,0.3656426668167114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,fp8,0,0.3479626576105754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,64,0,1,fp8,fp8,0,0.32151466608047485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,fp8,0,0.34275201956431073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,64,0,1,fp8,fp8,0,0.31817599137624103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,float16,0,0.3497120141983032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,fp8,0,0.2714879910151164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,float16,0,0.2771573265393575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,64,0,1,fp8,fp8,0,0.3242986599604289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,float16,0,0.3532533248265584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,fp8,0,0.2770506739616394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,64,128,1,fp8,fp8,0,0.36724265416463214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,fp8,0,0.3505226771036784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,float16,0,0.16461333632469177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,float16,0,0.1997013290723165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,fp8,0,0.1676479975382487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,64,128,1,fp8,fp8,0,0.2230986754099528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,fp8,0,0.2014133334159851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,64,0,1,fp8,fp8,0,0.1895680030186971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,float16,0,0.1524853308995565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,float16,0,0.17996267477671304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,fp8,0,0.14936000108718872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,float16,0,0.18075199921925864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,64,128,1,fp8,fp8,0,0.20225600401560465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,fp8,0,0.17844265699386597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,64,0,1,fp8,fp8,0,0.1734666625658671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,float16,0,0.15156267086664835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,fp8,0,0.1497066617012024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,64,128,1,fp8,fp8,0,0.20189332962036133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,fp8,0,0.17841599384943643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,64,0,1,fp8,fp8,0,0.17405333121617636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,float16,0,0.1541973352432251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,float16,0,0.18245333433151245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,fp8,0,0.15051199992497763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,64,128,1,fp8,fp8,0,0.20409067471822104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,fp8,0,0.1788640022277832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,64,0,1,fp8,fp8,0,0.17518933614095053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,float16,0,0.1564479966958364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,float16,0,0.18458133935928345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,fp8,0,0.15448533495267233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,64,128,1,fp8,fp8,0,0.20651199420293173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,fp8,0,0.18211199839909872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,64,0,1,fp8,fp8,0,0.17805333932240805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,float16,0,0.0995146632194519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,float16,0,0.11205333471298218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,fp8,0,0.10046399633089702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,64,128,1,fp8,fp8,0,0.13245333234469095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,fp8,0,0.11217066645622253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,64,0,1,fp8,fp8,0,0.10984533031781514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,float16,0,0.08947733044624329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,float16,0,0.10267200072606404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,fp8,0,0.0899679958820343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,64,128,1,fp8,fp8,0,0.11486933628718059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,fp8,0,0.10297066966692607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,64,0,1,fp8,fp8,0,0.09910399715105693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,float16,0,0.08963732918103536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,float16,0,0.10286399722099304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,fp8,0,0.08917867143948872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,64,128,1,fp8,fp8,0,0.11455999811490376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,fp8,0,0.10312533378601074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,64,0,1,fp8,fp8,0,0.09920533498128255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,float16,0,0.09149332841237386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,float16,0,0.1039573351542155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,fp8,0,0.0897653301556905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,64,128,1,fp8,fp8,0,0.11822932958602905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,fp8,0,0.10269332925478618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,fp8,0,0.1033066709836324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,64,0,1,fp8,fp8,0,0.10038933157920837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,float16,0,0.09313600262006123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,float16,0,0.1048479974269867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,fp8,0,0.0920799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,64,128,1,fp8,fp8,0,0.1211893359820048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,64,0,1,fp8,fp8,0,0.1019040048122406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,float16,0,0.07157866656780243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,float16,0,0.07656533519426982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,fp8,0,0.07035733262697856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,64,128,1,fp8,fp8,0,0.08603733777999878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,fp8,0,0.07753600180149078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,64,0,1,fp8,fp8,0,0.07496533294518788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,float16,0,0.06996800005435944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,float16,0,0.07504533231258392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,64,128,1,fp8,fp8,0,0.08662933111190796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,fp8,0,0.06930666665236156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,fp8,0,0.07589866717656453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,64,0,1,fp8,fp8,0,0.07330666482448578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,float16,0,0.07029866675535838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,float16,0,0.07572799921035767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,fp8,0,0.06981333096822102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,64,128,1,fp8,fp8,0,0.0867146650950114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,fp8,0,0.075573335091273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,64,0,1,fp8,fp8,0,0.07341866691907246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,float16,0,0.06987200180689494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,float16,0,0.07574399809042613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,fp8,0,0.06960533559322357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,64,128,1,fp8,fp8,0,0.08737066388130188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,fp8,0,0.07604800164699554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,64,0,1,fp8,fp8,0,0.07246933380762736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,float16,0,0.06914666791756947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,float16,0,0.0758240024248759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,fp8,0,0.07003200054168701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,64,128,1,fp8,fp8,0,0.08349333206812541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,fp8,0,0.0758133331934611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,64,0,1,fp8,fp8,0,0.0736053337653478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,float16,0,2.380341370900472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,float16,0,2.7147998809814453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,fp8,0,2.3718934059143066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,fp8,0,2.705871899922689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,64,0,1,fp8,fp8,0,2.4842185974121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,64,128,1,fp8,fp8,0,3.183978716532389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,float16,0,2.4137867291768393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,fp8,0,2.385845343271891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,float16,0,2.765157381693522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,64,128,1,fp8,fp8,0,3.2092533111572266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,64,0,1,fp8,fp8,0,2.5230933825174966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,fp8,0,2.7259254455566406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,float16,0,2.443823973337809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,float16,0,2.7959092458089194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,fp8,0,2.4138827323913574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,64,128,1,fp8,fp8,0,3.2676426569620767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,fp8,0,2.74510924021403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,64,0,1,fp8,fp8,0,2.5324479738871255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,float16,0,2.4851786295572915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,fp8,0,2.4712533950805664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,float16,0,2.8508265813191733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,64,128,1,fp8,fp8,0,3.303567886352539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,fp8,0,2.8047145207722983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,float16,0,1.3635412851969402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,float16,0,1.5266879399617512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,fp8,0,1.333253304163615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,64,0,1,fp8,fp8,0,2.6355627377827964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,64,128,1,fp8,fp8,0,1.7316853205362956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,fp8,0,1.5018399556477864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,float16,0,1.1968426704406738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,64,0,1,fp8,fp8,0,1.372879981994629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,float16,0,1.3646346728007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,fp8,0,1.198789358139038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,64,128,1,fp8,fp8,0,1.6012852986653645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,fp8,0,1.3754773139953613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,64,0,1,fp8,fp8,0,1.2512693405151367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,float16,0,1.2059840361277263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,float16,0,1.3742613792419434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,fp8,0,1.20469864209493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,64,128,1,fp8,fp8,0,1.6097920735677083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,fp8,0,1.3708213170369465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,64,0,1,fp8,fp8,0,1.2594880263010662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,float16,0,1.2231573263804119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,float16,0,1.3851733207702637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,fp8,0,1.2109013398488362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,64,128,1,fp8,fp8,0,1.6409707069396973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,fp8,0,1.3737813631693523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,64,0,1,fp8,fp8,0,1.2703200181325276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,float16,0,1.2524800300598145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,float16,0,1.4231947263081868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,fp8,0,1.249066670735677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,64,128,1,fp8,fp8,0,1.6659626960754395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,float16,0,0.685861349105835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,64,0,1,fp8,fp8,0,1.3099413712819417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,fp8,0,1.3991519610087078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,float16,0,0.7711093425750732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,fp8,0,0.6759146849314371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,64,128,1,fp8,fp8,0,0.8796586990356445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,fp8,0,0.7692373593648275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,64,0,1,fp8,fp8,0,0.6978346506754557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,float16,0,0.6131999890009562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,float16,0,0.6968479951222738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,fp8,0,0.6128106514612833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,64,128,1,fp8,fp8,0,0.8161386648813883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,fp8,0,0.6978507041931152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,64,0,1,fp8,fp8,0,0.6354399919509888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,float16,0,0.6228479941685995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,float16,0,0.7040853500366211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,fp8,0,0.6170239845911661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,64,128,1,fp8,fp8,0,0.818885326385498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,fp8,0,0.7010773022969564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,64,0,1,fp8,fp8,0,0.6372960011164347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,float16,0,0.6205706596374512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,float16,0,0.7065280278523763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,fp8,0,0.6229759852091471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,64,128,1,fp8,fp8,0,0.8248106638590494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,float16,0,0.7088159720102946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,fp8,0,0.7033013502756754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,64,0,1,fp8,fp8,0,0.6434186697006226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,float16,0,0.6270080010096232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,fp8,0,0.6264319817225138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,64,128,1,fp8,fp8,0,0.8422186374664307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,fp8,0,0.7130186557769775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,float16,0,0.3582506577173869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,64,0,1,fp8,fp8,0,0.6609119971593221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,64,0,1,fp8,fp8,0,0.35891199111938477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,float16,0,0.40273598829905194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,float16,0,0.32149332761764526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,fp8,0,0.35340265432993573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,64,128,1,fp8,fp8,0,0.4580853382746379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,64,0,1,fp8,fp8,0,0.3338773250579834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,fp8,0,0.396229346593221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,float16,0,0.3647679885228475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,fp8,0,0.32441065708796185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,64,128,1,fp8,fp8,0,0.4270399808883667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,fp8,0,0.3664533297220866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,float16,0,0.32274667421976727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,float16,0,0.3658986488978068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,fp8,0,0.3242186705271403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,64,128,1,fp8,fp8,0,0.429967999458313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,fp8,0,0.3657173315684001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,64,0,1,fp8,fp8,0,0.3349706729253133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,float16,0,0.3246240019798279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,float16,0,0.36934932072957355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,fp8,0,0.3251146674156189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,64,128,1,fp8,fp8,0,0.43410666783650714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,fp8,0,0.3693813482920329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,64,0,1,fp8,fp8,0,0.3349706729253133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,float16,0,0.32866134246190387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,float16,0,0.3715039889017741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,fp8,0,0.32918934027353924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,64,128,1,fp8,fp8,0,0.43212799231211346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,fp8,0,0.3742613395055135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,float16,0,0.19365866978963217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,64,0,1,fp8,fp8,0,0.3377813498179118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,float16,0,0.2172693411509196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,float16,0,0.17350933949152628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,fp8,0,0.19177599747975668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,64,128,1,fp8,fp8,0,0.2456159989039103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,fp8,0,0.21455466747283936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,fp8,0,0.19633599122365317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,64,0,1,fp8,fp8,0,0.18829333782196045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,float16,0,0.19597333669662476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,fp8,0,0.1745013395945231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,64,128,1,fp8,fp8,0,0.23156267404556274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,64,0,1,fp8,fp8,0,0.17515732844670615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,float16,0,0.1735573410987854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,float16,0,0.19593065977096558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,fp8,0,0.1747200091679891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,64,128,1,fp8,fp8,0,0.232640008131663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,fp8,0,0.1978399952252706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,64,0,1,fp8,fp8,0,0.173418660958608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,float16,0,0.17659199237823486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,64,0,1,fp8,fp8,0,0.1758133371671041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,float16,0,0.1981119910875956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,fp8,0,0.17540266116460165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,64,128,1,fp8,fp8,0,0.2342026631037394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,fp8,0,0.19885865847269693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,float16,0,0.17780266205469766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,float16,0,0.20203200976053873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,fp8,0,0.17896000544230142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,64,128,1,fp8,fp8,0,0.23544534047444662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,fp8,0,0.201855997244517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,64,0,1,fp8,fp8,0,0.17583467562993368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,float16,0,0.11079999804496765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,float16,0,0.11533332864443462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,fp8,0,0.10865066448847453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,64,128,1,fp8,fp8,0,0.1435413360595703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,fp8,0,0.11429333686828613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,64,0,1,fp8,fp8,0,0.10418132940928142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,float16,0,0.0978666643301646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,float16,0,0.10318400462468465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,fp8,0,0.09820266564687093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,64,128,1,fp8,fp8,0,0.13125866651535034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,fp8,0,0.10253866513570149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,64,0,1,fp8,fp8,0,0.09558932979901631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,float16,0,0.09820800026257832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,float16,0,0.10365333159764607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,fp8,0,0.09898666540781657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,fp8,0,0.09968533118565877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,64,128,1,fp8,fp8,0,0.13086400429407755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,fp8,0,0.10441600282986958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,64,0,1,fp8,fp8,0,0.09576533238093059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,float16,0,0.0981760025024414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,float16,0,0.10522133111953735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,64,128,1,fp8,fp8,0,0.1325493355592092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,fp8,0,0.10601066549619038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,64,0,1,fp8,fp8,0,0.09680533409118652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,float16,0,0.10072533289591472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,float16,0,0.10761599739392598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,fp8,0,0.10140800476074219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,64,128,1,fp8,fp8,0,0.13397866487503052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,fp8,0,0.10749866565068562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,64,0,1,fp8,fp8,0,0.09784533580144246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,float16,0,0.064410666624705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,float16,0,0.06634133557478587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,fp8,0,0.06450133522351582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,64,128,1,fp8,fp8,0,0.0858026643594106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,fp8,0,0.06602666775385539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,64,0,1,fp8,fp8,0,0.06151466568311056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,float16,0,0.05933333436648051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,float16,0,0.060309335589408875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,fp8,0,0.059077332417170204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,64,128,1,fp8,fp8,0,0.07630933324495952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,fp8,0,0.061199997862180076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,64,0,1,fp8,fp8,0,0.056736002365748085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,float16,0,0.06033066908518473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,float16,0,0.060309335589408875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,fp8,0,0.0599839985370636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,64,128,1,fp8,fp8,0,0.07564799984296162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,fp8,0,0.06070933242638906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,64,0,1,fp8,fp8,0,0.05615466833114624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,float16,0,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,float16,0,0.06195733447869619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,fp8,0,0.060458665092786155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,64,128,1,fp8,fp8,0,0.07577600081761678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,fp8,0,0.06205866734186808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,64,0,1,fp8,fp8,0,0.055946667989095054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,float16,0,0.06021333237489065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,float16,0,0.06261866788069408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,fp8,0,0.061103999614715576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,64,128,1,fp8,fp8,0,0.07672533392906189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,fp8,0,0.06261333326498668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,64,0,1,fp8,fp8,0,0.056618665655454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,float16,0,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,float16,0,0.04031466692686081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,fp8,0,0.04199466605981191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,64,128,1,fp8,fp8,0,0.05249066650867462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,fp8,0,0.04091733445723852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,64,0,1,fp8,fp8,0,0.03695466617743174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,float16,0,0.03995199998219808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,float16,0,0.038191998998324074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,fp8,0,0.040021332601706185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,64,128,1,fp8,fp8,0,0.05115733544031779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,fp8,0,0.03862400104602178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,64,128,1,fp8,fp8,0,0.050623998045921326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,64,0,1,fp8,fp8,0,0.036661334335803986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,float16,0,0.040133332212766014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,float16,0,0.03807466725508372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,fp8,0,0.04058666775623957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,fp8,0,0.03845866769552231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,64,0,1,fp8,fp8,0,0.03707200040419897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,float16,0,0.040021332601706185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,float16,0,0.03886399914820989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,fp8,0,0.039962666730086006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,64,128,1,fp8,fp8,0,0.0513866643110911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,fp8,0,0.03878933439652125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,64,0,1,fp8,fp8,0,0.03665599972009659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,64,0,1,fp8,fp8,0,0.03670933345953623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,float16,0,0.040549332896868386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,float16,0,0.03878933439652125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,fp8,0,0.041189332803090416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,64,128,1,fp8,fp8,0,0.051589335004488625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,fp8,0,0.03854399919509888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,1,64,128,1,float16,float16,0,2.238709290822347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,1,64,0,1,float16,float16,0,2.2323946952819824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,1,64,128,1,float16,fp8,0,2.232858657836914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,1,64,0,1,float16,fp8,0,2.220309257507324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,1,64,0,1,fp8,fp8,0,2.002288023630778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,1,64,128,1,fp8,fp8,0,2.9985812505086265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,2,64,0,1,float16,float16,0,2.2438666025797525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,2,64,128,1,float16,float16,0,2.280229409535726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,2,64,128,1,float16,fp8,0,2.255610624949137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,2,64,0,1,float16,fp8,0,2.2375893592834473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,2,64,128,1,fp8,fp8,0,3.040752092997233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,2,64,0,1,fp8,fp8,0,2.035893281300863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,4,64,128,1,float16,float16,0,2.3076799710591636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,4,64,128,1,float16,fp8,0,2.2764479319254556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,4,64,0,1,float16,float16,0,2.292773405710856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,4,64,128,1,fp8,fp8,0,3.0609280268351235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,4,64,0,1,float16,fp8,0,2.262890656789144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,4,64,0,1,fp8,fp8,0,2.0552852948506675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,8,64,128,1,float16,float16,0,2.363248030344645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,8,64,0,1,float16,float16,0,2.3427093823750815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,8,64,128,1,float16,fp8,0,2.3477333386739097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,32,64,128,1,float16,float16,0,1.3013919989267986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,32,64,0,1,float16,float16,0,1.291866699854533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,8,64,0,1,float16,fp8,0,2.3371413548787436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,32,64,128,1,float16,fp8,0,1.2731040318806965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,8,64,128,1,fp8,fp8,0,3.117589314778646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,8,64,0,1,fp8,fp8,0,2.138495922088623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,32,64,128,1,fp8,fp8,0,1.630176067352295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,32,64,0,1,float16,fp8,0,1.2546026706695557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,32,64,0,1,fp8,fp8,0,1.1282560030619304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,1,64,128,1,float16,float16,0,1.1320052941640217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,1,64,0,1,float16,float16,0,1.1217973232269287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,1,64,128,1,float16,fp8,0,1.1383893489837646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,1,64,128,1,fp8,fp8,0,1.5067787170410156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,1,64,0,1,fp8,fp8,0,1.0074453353881836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,1,64,0,1,float16,fp8,0,1.1184746424357097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,2,64,128,1,float16,float16,0,1.1426293055216472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,2,64,0,1,float16,float16,0,1.1272160212198894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,2,64,128,1,float16,fp8,0,1.1394879817962646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,2,64,0,1,float16,fp8,0,1.1321333249409993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,2,64,128,1,fp8,fp8,0,1.5138452847798665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,2,64,0,1,fp8,fp8,0,1.016144037246704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,4,64,128,1,float16,float16,0,1.1502346992492676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,4,64,128,1,float16,fp8,0,1.147216002146403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,4,64,0,1,float16,float16,0,1.1410986582438152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,4,64,0,1,float16,fp8,0,1.1327626705169678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,4,64,128,1,fp8,fp8,0,1.5255413055419922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,4,64,0,1,fp8,fp8,0,1.0275200208028157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,8,64,0,1,float16,float16,0,1.1683039665222168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,8,64,128,1,float16,float16,0,1.1776800155639648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,8,64,128,1,float16,fp8,0,1.1682506402333577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,8,64,0,1,float16,fp8,0,1.1596852938334148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,8,64,128,1,fp8,fp8,0,1.5606452624003093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,8,64,0,1,fp8,fp8,0,1.0602933565775554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,32,64,128,1,float16,float16,0,0.6568906704584757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,32,64,0,1,float16,float16,0,0.6467093229293823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,32,64,128,1,float16,fp8,0,0.6429866552352905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,1,64,0,1,float16,float16,0,0.5710879961649576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,32,64,128,1,fp8,fp8,0,0.833301305770874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,32,64,0,1,float16,fp8,0,0.6352800130844116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,32,64,0,1,fp8,fp8,0,0.5774986743927002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,1,64,128,1,float16,float16,0,0.5811253388722738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,1,64,128,1,float16,fp8,0,0.5810933510462443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,1,64,0,1,float16,fp8,0,0.5688960154851278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,1,64,128,1,fp8,fp8,0,0.7734666665395101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,1,64,0,1,fp8,fp8,0,0.514304002126058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,2,64,128,1,float16,float16,0,0.5867306788762411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,2,64,0,1,float16,float16,0,0.5747520128885905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,2,64,128,1,float16,fp8,0,0.5864906708399454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,2,64,128,1,fp8,fp8,0,0.7740159829457601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,2,64,0,1,float16,fp8,0,0.5728053251902262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,2,64,0,1,fp8,fp8,0,0.5178826649983724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,4,64,128,1,float16,float16,0,0.5894346634546915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,4,64,0,1,float16,float16,0,0.5799093246459961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,4,64,128,1,float16,fp8,0,0.588207999865214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,4,64,128,1,fp8,fp8,0,0.7806293169657389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,4,64,0,1,float16,fp8,0,0.5780693292617798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,4,64,0,1,fp8,fp8,0,0.5213866631189982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,8,64,128,1,float16,float16,0,0.5966879924138387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,8,64,128,1,fp8,fp8,0,0.7894240220387777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,8,64,0,1,float16,fp8,0,0.5854133367538452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,8,64,0,1,float16,float16,0,0.5848906834920248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,8,64,128,1,float16,fp8,0,0.5951786835988363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,8,64,0,1,fp8,fp8,0,0.5313226779301962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,32,64,128,1,float16,float16,0,0.3438346783320109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,32,64,0,1,fp8,fp8,0,0.2966559926668803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,32,64,0,1,float16,float16,0,0.3370453516642253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,32,64,128,1,float16,fp8,0,0.3359146515528361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,32,64,128,1,fp8,fp8,0,0.43414398034413654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,32,64,0,1,float16,fp8,0,0.33082665999730426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,1,64,0,1,float16,float16,0,0.29851200183232623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,1,64,128,1,float16,float16,0,0.3061013420422872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,1,64,128,1,float16,fp8,0,0.3036746581395467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,1,64,128,1,fp8,fp8,0,0.4050453503926595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,1,64,0,1,float16,fp8,0,0.29903467496236164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,1,64,0,1,fp8,fp8,0,0.26844799518585205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,2,64,128,1,float16,float16,0,0.3086293339729309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,4,64,128,1,float16,float16,0,0.31060800949732464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,2,64,0,1,float16,float16,0,0.30002667506535846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,2,64,128,1,float16,fp8,0,0.30762133995691937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,2,64,128,1,fp8,fp8,0,0.4077226718266805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,2,64,0,1,float16,fp8,0,0.29993067185084027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,2,64,0,1,fp8,fp8,0,0.2711413304011027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,4,64,0,1,float16,float16,0,0.30129067103068036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,4,64,128,1,float16,fp8,0,0.3093600074450175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,4,64,128,1,fp8,fp8,0,0.4108533461888631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,4,64,0,1,float16,fp8,0,0.30241600672403973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,4,64,0,1,fp8,fp8,0,0.2752319971720378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,8,64,128,1,float16,float16,0,0.31546666224797565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,8,64,0,1,float16,float16,0,0.3049599925676982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,32,64,0,1,float16,float16,0,0.18018666903177896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,8,64,128,1,float16,fp8,0,0.31224000453948975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,8,64,128,1,fp8,fp8,0,0.40944000085194904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,8,64,0,1,float16,fp8,0,0.305786669254303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,32,64,0,1,fp8,fp8,0,0.15686399737993875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,8,64,0,1,fp8,fp8,0,0.27590399980545044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,32,64,128,1,float16,float16,0,0.18566399812698364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,32,64,128,1,float16,fp8,0,0.18483734130859375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,32,64,128,1,fp8,fp8,0,0.23128533363342285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,32,64,0,1,float16,fp8,0,0.17969600359598795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,1,64,128,1,float16,float16,0,0.1653546690940857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,1,64,0,1,float16,float16,0,0.16037866473197937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,1,64,128,1,float16,fp8,0,0.16592533389727274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,1,64,128,1,fp8,fp8,0,0.22102399667104086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,1,64,0,1,float16,fp8,0,0.16126400232315063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,1,64,0,1,fp8,fp8,0,0.14562666416168213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,2,64,128,1,float16,float16,0,0.16526933511098227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,2,64,0,1,float16,float16,0,0.16154666741689047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,2,64,128,1,float16,fp8,0,0.16632533073425293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,2,64,128,1,fp8,fp8,0,0.22272533178329468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,2,64,0,1,float16,fp8,0,0.16155733664830527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,2,64,0,1,fp8,fp8,0,0.14497599999109903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,4,64,128,1,float16,float16,0,0.16647467017173767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,4,64,0,1,float16,float16,0,0.1622880001862844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,4,64,128,1,float16,fp8,0,0.16767466068267822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,4,64,128,1,fp8,fp8,0,0.22155733903249106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,4,64,0,1,float16,fp8,0,0.16177067160606384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,4,64,0,1,fp8,fp8,0,0.1465226709842682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,8,64,128,1,float16,float16,0,0.16904000441233316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,8,64,0,1,float16,float16,0,0.166101336479187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,8,64,128,1,float16,fp8,0,0.16944533586502075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,8,64,128,1,fp8,fp8,0,0.223306675752004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,8,64,0,1,float16,fp8,0,0.16436266899108887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,8,64,0,1,fp8,fp8,0,0.14833066860834757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,32,64,128,1,float16,float16,0,0.10503466924031575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,32,64,0,1,float16,float16,0,0.10005866487820943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,32,64,128,1,float16,fp8,0,0.10457066694895427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,32,64,128,1,fp8,fp8,0,0.13520532846450806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,32,64,0,1,float16,fp8,0,0.09858666857083638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,32,64,0,1,fp8,fp8,0,0.09038399656613667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,1,64,0,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,1,64,128,1,float16,float16,0,0.09262399872144063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,1,64,0,1,float16,float16,0,0.08797333637873332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,1,64,128,1,float16,fp8,0,0.09328533212343852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,1,64,128,1,fp8,fp8,0,0.12363200386365254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,1,64,0,1,float16,fp8,0,0.08781333764394124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,2,64,128,1,float16,float16,0,0.09274666508038838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,2,64,0,1,float16,float16,0,0.08972799777984619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,4,64,0,1,float16,float16,0,0.08965866764386494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,2,64,128,1,float16,fp8,0,0.0946720043818156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,2,64,128,1,fp8,fp8,0,0.12160000205039978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,2,64,0,1,float16,fp8,0,0.08842666943868001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,2,64,0,1,fp8,fp8,0,0.08045866588751475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,4,64,128,1,float16,float16,0,0.09545066952705383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,4,64,128,1,float16,fp8,0,0.09411199887593587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,4,64,128,1,fp8,fp8,0,0.12733866771062216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,4,64,0,1,float16,fp8,0,0.08983467022577922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,4,64,0,1,fp8,fp8,0,0.08182399968306224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,8,64,128,1,float16,float16,0,0.09610133369763692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,8,64,0,1,float16,float16,0,0.09103999535242717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,8,64,128,1,float16,fp8,0,0.0965333382288615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,8,64,128,1,fp8,fp8,0,0.12745599945386252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,8,64,0,1,float16,fp8,0,0.09177600344022115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,8,64,0,1,fp8,fp8,0,0.08486933509508769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,32,64,128,1,float16,float16,0,0.062080000837643944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,32,64,0,1,float16,float16,0,0.057445332407951355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,32,64,128,1,float16,fp8,0,0.06140799820423126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,32,64,128,1,fp8,fp8,0,0.08428800106048584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,32,64,0,1,float16,fp8,0,0.05657599866390228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,32,64,0,1,fp8,fp8,0,0.05351999898751577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,1,64,0,1,float16,fp8,0,0.05253866811593374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,1,64,128,1,float16,float16,0,0.05638400216897329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,1,64,0,1,float16,float16,0,0.05258666475613912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,1,64,128,1,float16,fp8,0,0.05640000104904175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,1,64,128,1,fp8,fp8,0,0.07356800138950348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,1,64,0,1,fp8,fp8,0,0.047168001532554626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,2,64,128,1,float16,float16,0,0.05665599803129832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,2,64,0,1,float16,float16,0,0.052373334765434265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,2,64,128,1,float16,fp8,0,0.05654400090376536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,2,64,128,1,fp8,fp8,0,0.07384000221888225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,4,64,0,1,float16,fp8,0,0.05295999844868978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,2,64,0,1,float16,fp8,0,0.05243200063705444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,2,64,0,1,fp8,fp8,0,0.04753600060939789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,4,64,128,1,float16,float16,0,0.05639466643333435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,4,64,128,1,float16,fp8,0,0.05706666906674703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,4,64,0,1,float16,float16,0,0.053445334235827126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,4,64,128,1,fp8,fp8,0,0.07438399891058604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,4,64,0,1,fp8,fp8,0,0.048656001687049866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,8,64,128,1,float16,float16,0,0.05779733260472616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,8,64,0,1,float16,float16,0,0.05346666773160299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,8,64,128,1,float16,fp8,0,0.05850133299827576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,8,64,128,1,fp8,fp8,0,0.07399466633796692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,8,64,0,1,float16,fp8,0,0.053685332338015236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,8,64,0,1,fp8,fp8,0,0.04931733508904775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,32,64,128,1,float16,float16,0,0.040976000328858696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,32,64,0,1,float16,float16,0,0.03482666611671448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,32,64,128,1,float16,fp8,0,0.04125866790612539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,32,64,128,1,fp8,fp8,0,0.0507893313964208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,32,64,0,1,float16,fp8,0,0.03487999985615412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,32,64,0,1,fp8,fp8,0,0.031680000325044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,1,64,128,1,float16,float16,0,0.03912533322970072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,1,64,0,1,float16,float16,0,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,1,64,128,1,float16,fp8,0,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,1,64,128,1,fp8,fp8,0,0.050250664353370667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,2,64,128,1,fp8,fp8,0,0.05073066552480062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,1,64,0,1,float16,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,1,64,0,1,fp8,fp8,0,0.030832000076770782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,2,64,128,1,float16,float16,0,0.039162665605545044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,2,64,0,1,float16,float16,0,0.03230933348337809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,4,64,128,1,float16,fp8,0,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,2,64,128,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,4,64,0,1,fp8,fp8,0,0.031109333038330078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,2,64,0,1,float16,fp8,0,0.03316800047953924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,2,64,0,1,fp8,fp8,0,0.031066666046778362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,4,64,128,1,float16,float16,0,0.03980266551176707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,4,64,0,1,float16,float16,0,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,4,64,128,1,fp8,fp8,0,0.05019199848175049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,4,64,0,1,float16,fp8,0,0.03277866790692011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,8,64,128,1,float16,float16,0,0.04036800066630045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,8,64,0,1,float16,float16,0,0.033802665770053864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,8,64,128,1,float16,fp8,0,0.040634666879971824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,8,64,128,1,fp8,fp8,0,0.051029334465662636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,8,64,0,1,float16,fp8,0,0.034341332813103996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,8,64,0,1,fp8,fp8,0,0.03219733387231827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,32,64,128,1,float16,float16,0,0.027850667635599773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,32,64,0,1,float16,float16,0,0.024645333488782246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,32,64,128,1,float16,fp8,0,0.02811199923356374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,32,64,128,1,fp8,fp8,0,0.03419200082619985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,32,64,0,1,float16,fp8,0,0.02601066728432973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,32,64,0,1,fp8,fp8,0,0.024570666253566742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,1,64,128,1,float16,float16,0,0.026474667092164356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,1,64,0,1,float16,float16,0,0.024304000039895374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,1,64,128,1,float16,fp8,0,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,1,64,128,1,fp8,fp8,0,0.03291733314593633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,1,64,0,1,float16,fp8,0,0.02478933334350586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,1,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,2,64,128,1,float16,float16,0,0.026389333109060924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,2,64,0,1,float16,float16,0,0.02420266717672348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,2,64,128,1,float16,fp8,0,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,2,64,128,1,fp8,fp8,0,0.033088001112143196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,2,64,0,1,float16,fp8,0,0.024421334266662598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,2,64,0,1,fp8,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,4,64,128,1,float16,float16,0,0.02698666602373123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,4,64,0,1,float16,float16,0,0.024234667420387268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,4,64,128,1,float16,fp8,0,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,4,64,128,1,fp8,fp8,0,0.034186666210492454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,8,64,128,1,fp8,fp8,0,0.03392533212900162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,4,64,0,1,float16,fp8,0,0.024885334074497223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,4,64,0,1,fp8,fp8,0,0.023685333629449207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,8,64,128,1,float16,float16,0,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,8,64,0,1,float16,float16,0,0.024165332317352295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,8,64,128,1,float16,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,8,64,0,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,8,64,0,1,fp8,fp8,0,0.024469333390394848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,1,64,128,1,float16,float16,0,0.9684426784515381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,1,64,0,1,float16,float16,0,0.9422986507415771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,1,64,128,1,float16,fp8,0,0.9667893250783285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,1,64,0,1,float16,fp8,0,0.9399627049763998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,1,64,128,1,fp8,fp8,0,1.3060213724772136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,1,64,0,1,fp8,fp8,0,0.8642880121866862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,2,64,128,1,float16,float16,0,0.9883573055267334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,2,64,0,1,float16,float16,0,0.9561119874318441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,2,64,128,1,float16,fp8,0,0.984549363454183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,2,64,0,1,float16,fp8,0,0.9567893346150717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,2,64,128,1,fp8,fp8,0,1.3161760171254475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,2,64,0,1,fp8,fp8,0,0.8741172949473063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,4,64,128,1,float16,float16,0,0.9895679950714111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,4,64,0,1,float16,float16,0,0.961850643157959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,4,64,128,1,float16,fp8,0,0.98581329981486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,4,64,0,1,float16,fp8,0,0.9566346804300944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,4,64,0,1,fp8,fp8,0,0.8863200346628824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,4,64,128,1,fp8,fp8,0,1.3319679896036785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,8,64,128,1,float16,float16,0,1.0061279932657878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,8,64,0,1,float16,float16,0,0.9786400000254313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,8,64,128,1,float16,fp8,0,0.9982293446858724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,32,64,128,1,float16,float16,0,0.5718506574630737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,8,64,128,1,fp8,fp8,0,1.3645812670389812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,8,64,0,1,float16,fp8,0,0.974618673324585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,8,64,0,1,fp8,fp8,0,0.9165493647257487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,32,64,0,1,float16,float16,0,0.5630880196889242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,32,64,128,1,float16,fp8,0,0.5587413311004639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,32,64,128,1,fp8,fp8,0,0.7176960309346517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,32,64,0,1,float16,fp8,0,0.5528853336970011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,32,64,0,1,fp8,fp8,0,0.4950079917907715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,1,64,128,1,float16,float16,0,0.49509867032368976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,1,64,0,1,float16,float16,0,0.4809066851933797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,1,64,128,1,float16,fp8,0,0.4951893488566081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,1,64,128,1,fp8,fp8,0,0.6661333243052164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,1,64,0,1,float16,fp8,0,0.4817546606063843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,1,64,0,1,fp8,fp8,0,0.44548265139261883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,2,64,128,1,float16,float16,0,0.5011359850565592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,2,64,0,1,float16,float16,0,0.48959465821584064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,2,64,128,1,float16,fp8,0,0.4997440179189046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,2,64,128,1,fp8,fp8,0,0.6672159830729166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,2,64,0,1,float16,fp8,0,0.4912639856338501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,2,64,0,1,fp8,fp8,0,0.449290672938029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,4,64,128,1,float16,float16,0,0.5025066534678141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,4,64,0,1,float16,float16,0,0.4923146565755208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,4,64,128,1,float16,fp8,0,0.5022453467051188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,8,64,0,1,float16,float16,0,0.5008266766866049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,4,64,0,1,float16,fp8,0,0.49083201090494794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,4,64,128,1,fp8,fp8,0,0.6741066773732504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,4,64,0,1,fp8,fp8,0,0.45005865891774494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,8,64,128,1,float16,float16,0,0.5134613513946533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,8,64,128,1,float16,fp8,0,0.5093653202056885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,8,64,128,1,fp8,fp8,0,0.683194637298584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,8,64,0,1,float16,fp8,0,0.4959520101547241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,8,64,0,1,fp8,fp8,0,0.4591253201166789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,32,64,128,1,float16,float16,0,0.2998986641565959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,32,64,0,1,float16,float16,0,0.296506663163503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,32,64,128,1,float16,fp8,0,0.29385600487391156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,32,64,128,1,fp8,fp8,0,0.3685386578241984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,32,64,0,1,float16,fp8,0,0.2911093235015869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,32,64,0,1,fp8,fp8,0,0.2558133403460185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,1,64,128,1,float16,float16,0,0.2582506736119588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,1,64,0,1,float16,float16,0,0.2543413241704305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,1,64,128,1,float16,fp8,0,0.2590986688931783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,1,64,128,1,fp8,fp8,0,0.3446773290634155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,1,64,0,1,float16,fp8,0,0.2511039972305298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,1,64,0,1,fp8,fp8,0,0.23433599869410196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,2,64,128,1,float16,float16,0,0.26136000951131183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,2,64,0,1,float16,float16,0,0.25538132588068646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,2,64,128,1,float16,fp8,0,0.2616106669108073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,2,64,128,1,fp8,fp8,0,0.3454933166503906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,2,64,0,1,float16,fp8,0,0.25432533025741577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,2,64,0,1,fp8,fp8,0,0.23516800006230673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,4,64,128,1,float16,float16,0,0.26317334175109863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,4,64,0,1,float16,float16,0,0.2585653265317281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,4,64,128,1,float16,fp8,0,0.2624053359031677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,8,64,128,1,float16,fp8,0,0.2672586639722188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,4,64,128,1,fp8,fp8,0,0.34980801741282147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,8,64,128,1,fp8,fp8,0,0.35093335310618085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,4,64,0,1,float16,fp8,0,0.2572266658147176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,4,64,0,1,fp8,fp8,0,0.2379146615664164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,8,64,128,1,float16,float16,0,0.2679520050684611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,8,64,0,1,float16,float16,0,0.2617866595586141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,8,64,0,1,float16,fp8,0,0.260917325814565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,8,64,0,1,fp8,fp8,0,0.23864533503850302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,32,64,128,1,float16,float16,0,0.16416533788045248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,32,64,0,1,float16,float16,0,0.16156267126401266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,32,64,128,1,float16,fp8,0,0.161381334066391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,32,64,128,1,fp8,fp8,0,0.19452800353368124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,32,64,0,1,float16,fp8,0,0.15956800182660422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,32,64,0,1,fp8,fp8,0,0.1389173368612925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,1,64,128,1,float16,float16,0,0.1369493305683136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,1,64,0,1,float16,float16,0,0.13318399588267008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,1,64,128,1,float16,fp8,0,0.13714667161305746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,2,64,128,1,float16,fp8,0,0.1374559998512268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,1,64,128,1,fp8,fp8,0,0.18255466222763062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,1,64,0,1,float16,fp8,0,0.13370666901270548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,1,64,0,1,fp8,fp8,0,0.12924266854921976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,2,64,128,1,float16,float16,0,0.13749866684277853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,2,64,0,1,float16,float16,0,0.13450666268666586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,2,64,128,1,fp8,fp8,0,0.18357867002487183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,2,64,0,1,float16,fp8,0,0.13529599706331888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,2,64,0,1,fp8,fp8,0,0.12803733348846436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,4,64,128,1,float16,float16,0,0.13938132921854654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,4,64,0,1,float16,float16,0,0.1353386640548706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,4,64,128,1,float16,fp8,0,0.13986133535703024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,8,64,128,1,float16,fp8,0,0.1420960028966268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,4,64,128,1,fp8,fp8,0,0.18302400906880698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,4,64,0,1,float16,fp8,0,0.1362879971663157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,4,64,0,1,fp8,fp8,0,0.13125333189964294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,8,64,128,1,float16,float16,0,0.14231999715169272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,8,64,0,1,float16,float16,0,0.1404800017674764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,8,64,128,1,fp8,fp8,0,0.1850773294766744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,8,64,0,1,float16,fp8,0,0.1388266682624817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,8,64,0,1,fp8,fp8,0,0.13169067104657492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,32,64,0,1,float16,fp8,0,0.08790399630864461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,32,64,128,1,float16,float16,0,0.08955732981363933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,32,64,0,1,float16,float16,0,0.08982400099436443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,32,64,128,1,float16,fp8,0,0.08829333384831746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,32,64,128,1,fp8,fp8,0,0.10850133498509724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,32,64,0,1,fp8,fp8,0,0.08077333370844524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,1,64,128,1,float16,float16,0,0.07319466769695282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,1,64,0,1,float16,float16,0,0.07222933570543925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,1,64,128,1,float16,fp8,0,0.07313066720962524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,1,64,128,1,fp8,fp8,0,0.09878399968147278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,1,64,0,1,float16,fp8,0,0.07249600191911061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,1,64,0,1,fp8,fp8,0,0.07306666672229767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,2,64,128,1,float16,float16,0,0.07391466697057088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,4,64,128,1,float16,float16,0,0.07480533421039581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,2,64,0,1,float16,float16,0,0.07177599767843883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,2,64,128,1,float16,fp8,0,0.07462933162848155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,2,64,128,1,fp8,fp8,0,0.09894933303197224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,2,64,0,1,fp8,fp8,0,0.07195733487606049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,2,64,0,1,float16,fp8,0,0.07352533439795177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,4,64,0,1,float16,float16,0,0.07364266614119212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,4,64,128,1,float16,fp8,0,0.07503466804822286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,4,64,128,1,fp8,fp8,0,0.10160533587137859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,4,64,0,1,float16,fp8,0,0.07322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,4,64,0,1,fp8,fp8,0,0.07381333410739899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,8,64,128,1,float16,float16,0,0.07631466786066692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,8,64,0,1,float16,float16,0,0.07519466678301494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,8,64,128,1,float16,fp8,0,0.07688533266385396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,8,64,128,1,fp8,fp8,0,0.10240532954533894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,8,64,0,1,float16,fp8,0,0.07523199915885925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,8,64,0,1,fp8,fp8,0,0.07508266468842824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,32,64,128,1,float16,float16,0,0.04679466784000397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,32,64,0,1,float16,float16,0,0.04748799900213877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,1,64,0,1,float16,float16,0,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,32,64,128,1,float16,fp8,0,0.04666133224964142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,32,64,128,1,fp8,fp8,0,0.06286400059858958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,32,64,0,1,float16,fp8,0,0.046709333856900535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,32,64,0,1,fp8,fp8,0,0.04953599969546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,1,64,128,1,float16,float16,0,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,1,64,128,1,float16,fp8,0,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,1,64,128,1,fp8,fp8,0,0.05644266804059347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,1,64,0,1,float16,fp8,0,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,1,64,0,1,fp8,fp8,0,0.042992000778516136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,2,64,128,1,float16,float16,0,0.04221333563327789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,2,64,0,1,float16,float16,0,0.04218133290608724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,2,64,128,1,float16,fp8,0,0.042352000872294106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,2,64,128,1,fp8,fp8,0,0.05618133147557577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,2,64,0,1,float16,fp8,0,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,2,64,0,1,fp8,fp8,0,0.04366933306058248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,4,64,128,1,float16,float16,0,0.04251733422279358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,4,64,0,1,float16,float16,0,0.041706666350364685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,4,64,128,1,float16,fp8,0,0.04261333247025808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,4,64,128,1,fp8,fp8,0,0.05669333537419637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,4,64,0,1,float16,fp8,0,0.04221866528193156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,4,64,0,1,fp8,fp8,0,0.04500266909599304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,8,64,128,1,float16,float16,0,0.042837331692377724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,8,64,0,1,float16,float16,0,0.04299733539422353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,8,64,128,1,float16,fp8,0,0.04311466713746389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,8,64,128,1,fp8,fp8,0,0.05648000041643778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,8,64,0,1,float16,fp8,0,0.042591998974482216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,8,64,0,1,fp8,fp8,0,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,32,64,128,1,float16,float16,0,0.03251733382542928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,32,64,0,1,float16,float16,0,0.032698666055997215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,32,64,128,1,float16,fp8,0,0.032357332607110344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,1,64,128,1,float16,fp8,0,0.03035733352104823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,1,64,128,1,fp8,fp8,0,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,32,64,128,1,fp8,fp8,0,0.03655466685692469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,32,64,0,1,float16,fp8,0,0.03238933285077413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,32,64,0,1,fp8,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,1,64,128,1,float16,float16,0,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,1,64,0,1,float16,float16,0,0.03030933439731598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,1,64,0,1,float16,fp8,0,0.02977066735426585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,1,64,0,1,fp8,fp8,0,0.028512001037597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,2,64,128,1,float16,float16,0,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,2,64,0,1,float16,float16,0,0.030042665700117748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,2,64,128,1,float16,fp8,0,0.031034665803114574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,2,64,128,1,fp8,fp8,0,0.034287999073664345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,2,64,0,1,float16,fp8,0,0.029781334102153778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,2,64,0,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,4,64,128,1,float16,float16,0,0.030069333811601002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,4,64,0,1,float16,float16,0,0.03062933435042699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,4,64,128,1,float16,fp8,0,0.030789333085219067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,4,64,128,1,fp8,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,4,64,0,1,float16,fp8,0,0.03073599934577942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,4,64,0,1,fp8,fp8,0,0.028170667588710785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,8,64,128,1,float16,float16,0,0.030245333909988403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,8,64,0,1,float16,float16,0,0.031162666777769726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,32,64,128,1,float16,fp8,0,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,8,64,128,1,float16,fp8,0,0.030586667358875275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,8,64,128,1,fp8,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,8,64,0,1,float16,fp8,0,0.030586667358875275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,8,64,0,1,fp8,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,32,64,128,1,float16,float16,0,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,32,64,0,1,float16,float16,0,0.020581333587567013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,32,64,128,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,32,64,0,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,32,64,0,1,fp8,fp8,0,0.021802666286627453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,1,64,128,1,float16,float16,0,0.019823999454577763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,1,64,0,1,float16,float16,0,0.020186666399240494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,1,64,128,1,float16,fp8,0,0.020400000115235645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,1,64,128,1,fp8,fp8,0,0.024192000428835552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,1,64,0,1,float16,fp8,0,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,1,64,0,1,fp8,fp8,0,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,2,64,128,1,float16,float16,0,0.020330666253964107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,2,64,0,1,fp8,fp8,0,0.0204373337328434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,4,64,128,1,float16,float16,0,0.019866666446129482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,2,64,0,1,float16,float16,0,0.019845332950353622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,2,64,128,1,float16,fp8,0,0.0205226664741834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,2,64,128,1,fp8,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,2,64,0,1,float16,fp8,0,0.0204373337328434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,4,64,0,1,float16,float16,0,0.02027733375628789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,4,64,128,1,float16,fp8,0,0.020021333048741024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,4,64,128,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,4,64,0,1,float16,fp8,0,0.020282667130231857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,4,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,8,64,128,1,float16,float16,0,0.020666666328907013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,8,64,0,1,float16,float16,0,0.020346666375796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,8,64,128,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,32,64,128,1,float16,fp8,0,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,8,64,128,1,fp8,fp8,0,0.024847999215126038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,8,64,0,1,float16,fp8,0,0.019866666446129482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,8,64,0,1,fp8,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,32,64,128,1,float16,float16,0,0.017968000223239262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,32,64,0,1,float16,float16,0,0.01781333362062772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,32,64,128,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,32,64,0,1,float16,fp8,0,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,32,64,0,1,fp8,fp8,0,0.019978666057189304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,1,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,1,64,0,1,float16,float16,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,1,64,128,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,1,64,128,1,fp8,fp8,0,0.02271466702222824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,1,64,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,1,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,2,64,128,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,2,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,2,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,2,64,128,1,fp8,fp8,0,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,2,64,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,2,64,0,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,4,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,4,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,4,64,128,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,4,64,128,1,fp8,fp8,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,4,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,4,64,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,8,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,8,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,8,64,128,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,8,64,128,1,fp8,fp8,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,8,64,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,1,64,0,1,float16,fp8,0,0.3580746650695801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,8,64,0,1,fp8,fp8,0,0.019941333681344986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,1,64,128,1,float16,float16,0,0.35677866141001385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,1,64,0,1,float16,float16,0,0.35742934544881183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,1,64,128,1,float16,fp8,0,0.35683735211690265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,1,64,128,1,fp8,fp8,0,0.4389013449350993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,1,64,0,1,fp8,fp8,0,0.4400906562805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,2,64,128,1,float16,float16,0,0.36662399768829346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,2,64,0,1,float16,float16,0,0.36604801813761395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,2,64,128,1,float16,fp8,0,0.3654719988505046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,2,64,128,1,fp8,fp8,0,0.44421335061391193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,2,64,0,1,float16,fp8,0,0.3652533292770386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,2,64,0,1,fp8,fp8,0,0.44097598393758136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,4,64,128,1,float16,float16,0,0.36635732650756836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,4,64,0,1,float16,float16,0,0.36793065071105957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,4,64,128,1,float16,fp8,0,0.3648800055185954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,4,64,128,1,fp8,fp8,0,0.4440586566925049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,4,64,0,1,fp8,fp8,0,0.4432160059611003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,4,64,0,1,float16,fp8,0,0.36536534627278644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,8,64,128,1,float16,float16,0,0.37597334384918213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,8,64,0,1,float16,float16,0,0.37329598267873126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,8,64,128,1,float16,fp8,0,0.3731573422749837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,8,64,128,1,fp8,fp8,0,0.46564265092213947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,32,64,128,1,fp8,fp8,0,0.25542932748794556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,8,64,0,1,float16,fp8,0,0.37442131837209064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,32,64,128,1,float16,float16,0,0.2292906641960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,8,64,0,1,fp8,fp8,0,0.46634666124979657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,32,64,0,1,float16,float16,0,0.229695995648702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,32,64,128,1,float16,fp8,0,0.22321067253748575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,32,64,0,1,float16,fp8,0,0.2230400045712789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,32,64,0,1,fp8,fp8,0,0.25570666790008545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,1,64,128,1,float16,float16,0,0.1865440011024475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,1,64,0,1,float16,float16,0,0.18910932540893555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,1,64,128,1,float16,fp8,0,0.18638400236765543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,1,64,128,1,fp8,fp8,0,0.2310826579729716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,1,64,0,1,float16,fp8,0,0.18688533703486124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,1,64,0,1,fp8,fp8,0,0.23137599229812622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,2,64,128,1,float16,float16,0,0.18921067317326865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,2,64,0,1,float16,float16,0,0.19031467040379843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,2,64,128,1,float16,fp8,0,0.18888533115386963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,2,64,128,1,fp8,fp8,0,0.23397332429885864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,2,64,0,1,float16,fp8,0,0.19002666076024374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,2,64,0,1,fp8,fp8,0,0.23438400030136108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,4,64,128,1,float16,float16,0,0.1920586625734965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,4,64,0,1,float16,float16,0,0.19160000483194986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,4,64,128,1,float16,fp8,0,0.19147199392318726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,4,64,128,1,fp8,fp8,0,0.2330346703529358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,4,64,0,1,float16,fp8,0,0.19227200746536255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,4,64,0,1,fp8,fp8,0,0.23338133096694946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,8,64,128,1,float16,float16,0,0.19670399030049643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,8,64,0,1,float16,float16,0,0.197269340356191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,8,64,128,1,float16,fp8,0,0.19726399580637613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,8,64,128,1,fp8,fp8,0,0.2384000023206075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,8,64,0,1,float16,fp8,0,0.19549866517384848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,8,64,0,1,fp8,fp8,0,0.23805334170659384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,32,64,128,1,float16,float16,0,0.12569600343704224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,32,64,0,1,float16,float16,0,0.1255519986152649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,32,64,128,1,float16,fp8,0,0.1239359974861145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,32,64,128,1,fp8,fp8,0,0.13851199547449747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,32,64,0,1,fp8,fp8,0,0.13782399892807007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,32,64,0,1,float16,fp8,0,0.12319466471672058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,1,64,128,1,float16,float16,0,0.10050666332244873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,1,64,0,1,float16,float16,0,0.10133866469065349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,1,64,128,1,float16,fp8,0,0.10083733002344768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,1,64,128,1,fp8,fp8,0,0.12718400359153748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,1,64,0,1,float16,fp8,0,0.10124267141024272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,1,64,0,1,fp8,fp8,0,0.1269653340180715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,2,64,128,1,float16,float16,0,0.10099200407663982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,2,64,0,1,float16,float16,0,0.10204799969991048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,2,64,128,1,float16,fp8,0,0.10140267014503479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,2,64,128,1,fp8,fp8,0,0.12781332929929098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,2,64,0,1,float16,fp8,0,0.10146133104960124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,2,64,0,1,fp8,fp8,0,0.12777066230773926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,4,64,0,1,fp8,fp8,0,0.12914666533470154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,4,64,128,1,float16,float16,0,0.10301867127418518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,4,64,0,1,float16,float16,0,0.10297600428263347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,4,64,128,1,float16,fp8,0,0.10254399975140889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,4,64,128,1,fp8,fp8,0,0.12909332911173502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,4,64,0,1,float16,fp8,0,0.10336533188819885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,8,64,128,1,float16,float16,0,0.10621333122253418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,8,64,0,1,float16,float16,0,0.10621866583824158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,8,64,128,1,float16,fp8,0,0.10667199889818828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,8,64,128,1,fp8,fp8,0,0.13065600395202637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,32,64,128,1,fp8,fp8,0,0.08098133405049641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,8,64,0,1,float16,fp8,0,0.10563733180363973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,8,64,0,1,fp8,fp8,0,0.12998933593432108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,32,64,128,1,float16,float16,0,0.07114666700363159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,32,64,0,1,float16,float16,0,0.0705813318490982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,32,64,128,1,float16,fp8,0,0.06901866694291432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,32,64,0,1,float16,fp8,0,0.06926399966080983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,32,64,0,1,fp8,fp8,0,0.08090133468310039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,1,64,128,1,float16,float16,0,0.05486933390299479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,1,64,0,1,float16,float16,0,0.05527466535568237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,1,64,128,1,float16,fp8,0,0.0551146666208903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,1,64,128,1,fp8,fp8,0,0.07163199782371521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,1,64,0,1,float16,fp8,0,0.055013333757718406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,1,64,0,1,fp8,fp8,0,0.07095466554164886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,2,64,128,1,float16,float16,0,0.056133334835370384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,2,64,0,1,float16,float16,0,0.05566399792830149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,2,64,128,1,float16,fp8,0,0.05560533205668131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,2,64,0,1,float16,fp8,0,0.054858664671579994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,2,64,128,1,fp8,fp8,0,0.0720000018676122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,2,64,0,1,fp8,fp8,0,0.07228266696135204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,4,64,128,1,float16,float16,0,0.05646933118502299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,4,64,0,1,float16,float16,0,0.05650666852792104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,4,64,128,1,float16,fp8,0,0.05637866755326589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,4,64,128,1,fp8,fp8,0,0.07293866574764252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,4,64,0,1,float16,fp8,0,0.056608001391092934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,4,64,0,1,fp8,fp8,0,0.07272000114123027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,8,64,128,1,float16,float16,0,0.05885866781075796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,8,64,0,1,float16,float16,0,0.05880000193913778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,8,64,128,1,float16,fp8,0,0.05890133480230967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,8,64,128,1,fp8,fp8,0,0.0747573326031367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,8,64,0,1,float16,fp8,0,0.05879466732343038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,8,64,0,1,fp8,fp8,0,0.07526933153470357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,32,64,128,1,float16,float16,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,1,64,128,1,float16,float16,0,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,32,64,0,1,float16,float16,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,32,64,128,1,float16,fp8,0,0.037802666425704956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,32,64,128,1,fp8,fp8,0,0.04853333532810211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,32,64,0,1,float16,fp8,0,0.03705599904060364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,32,64,0,1,fp8,fp8,0,0.04828266799449921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,1,64,0,1,float16,float16,0,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,1,64,128,1,float16,fp8,0,0.033173332611719765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,1,64,128,1,fp8,fp8,0,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,1,64,0,1,float16,fp8,0,0.03266666581233343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,1,64,0,1,fp8,fp8,0,0.043194666504859924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,2,64,128,1,float16,float16,0,0.032960000137488045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,2,64,0,1,float16,float16,0,0.03307733436425527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,2,64,128,1,float16,fp8,0,0.033157333731651306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,2,64,128,1,fp8,fp8,0,0.04350399971008301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,2,64,0,1,float16,fp8,0,0.03349866718053818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,2,64,0,1,fp8,fp8,0,0.04341333111127218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,4,64,128,1,float16,float16,0,0.033088001112143196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,4,64,0,1,float16,float16,0,0.03310399999221166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,8,64,128,1,float16,fp8,0,0.03389866650104523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,4,64,128,1,float16,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,4,64,128,1,fp8,fp8,0,0.04322666426499685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,4,64,0,1,float16,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,4,64,0,1,fp8,fp8,0,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,8,64,128,1,float16,float16,0,0.03425599883000056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,32,64,0,1,float16,float16,0,0.024432001014550526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,8,64,0,1,float16,float16,0,0.034474665919939675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,8,64,128,1,fp8,fp8,0,0.04408533374468485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,8,64,0,1,float16,fp8,0,0.03348266581694285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,8,64,0,1,fp8,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,32,64,128,1,float16,float16,0,0.024304000039895374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,32,64,128,1,float16,fp8,0,0.02463999887307485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,32,64,128,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,32,64,0,1,float16,fp8,0,0.024336000283559162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,32,64,0,1,fp8,fp8,0,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,1,64,128,1,float16,float16,0,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,1,64,0,1,fp8,fp8,0,0.028160000840822857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,1,64,0,1,float16,float16,0,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,1,64,128,1,float16,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,1,64,128,1,fp8,fp8,0,0.028218666712443035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,1,64,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,2,64,128,1,float16,float16,0,0.022709332406520844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,2,64,0,1,float16,float16,0,0.02216533323129018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,2,64,128,1,float16,fp8,0,0.022890667120615642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,2,64,128,1,fp8,fp8,0,0.028042666614055634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,2,64,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,2,64,0,1,fp8,fp8,0,0.02826666583617528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,4,64,128,1,float16,float16,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,4,64,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,8,64,128,1,float16,fp8,0,0.023941333095232647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,4,64,128,1,float16,fp8,0,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,4,64,128,1,fp8,fp8,0,0.0283146674434344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,4,64,0,1,float16,fp8,0,0.023706667125225067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,4,64,0,1,fp8,fp8,0,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,8,64,128,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,8,64,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,8,64,128,1,fp8,fp8,0,0.02903999884923299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,8,64,0,1,float16,fp8,0,0.023898666103680927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,8,64,0,1,fp8,fp8,0,0.028815999627113342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,32,64,128,1,float16,float16,0,0.018453333526849747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,32,64,0,1,float16,float16,0,0.018191999445358913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,32,64,128,1,float16,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,1,64,128,1,fp8,fp8,0,0.020687999824682873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,32,64,128,1,fp8,fp8,0,0.021744000415007275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,32,64,0,1,float16,fp8,0,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,32,64,0,1,fp8,fp8,0,0.02187199890613556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,1,64,128,1,float16,float16,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,1,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,1,64,128,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,1,64,0,1,float16,fp8,0,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,1,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,2,64,128,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,2,64,0,1,float16,float16,0,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,2,64,128,1,float16,fp8,0,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,2,64,128,1,fp8,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,2,64,0,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,2,64,0,1,fp8,fp8,0,0.02067199970285098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,8,64,128,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,4,64,128,1,float16,float16,0,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,4,64,0,1,float16,float16,0,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,4,64,128,1,float16,fp8,0,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,4,64,128,1,fp8,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,4,64,0,1,float16,fp8,0,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,4,64,0,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,8,64,0,1,float16,float16,0,0.017840000490347546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,8,64,128,1,float16,fp8,0,0.017711999515692394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,8,64,128,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,8,64,0,1,float16,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,8,64,0,1,fp8,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,1,64,128,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,32,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,32,64,0,1,float16,float16,0,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,32,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,32,64,128,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,1,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,32,64,0,1,float16,fp8,0,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,32,64,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,1,64,0,1,float16,float16,0,0.014544000228246054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,1,64,128,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,2,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,1,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,1,64,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,2,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,2,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,2,64,128,1,float16,fp8,0,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,2,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,2,64,0,1,fp8,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,4,64,128,1,float16,float16,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,4,64,0,1,float16,float16,0,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,4,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,4,64,128,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,4,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,4,64,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,8,64,128,1,float16,float16,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,8,64,0,1,float16,float16,0,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,8,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,8,64,128,1,fp8,fp8,0,0.019744000087181728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,8,64,0,1,float16,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,8,64,0,1,fp8,fp8,0,0.01974933346112569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,32,64,128,1,float16,float16,0,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,32,64,0,1,float16,float16,0,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,32,64,128,1,float16,fp8,0,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,32,64,128,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,32,64,0,1,float16,fp8,0,0.014015999933083853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,32,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,1,64,128,1,float16,float16,0,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,1,64,0,1,float16,float16,0,0.013690666606028875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,1,64,128,1,float16,fp8,0,0.013888000200192133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,1,64,128,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,1,64,0,1,float16,fp8,0,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,1,64,0,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,2,64,128,1,float16,float16,0,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,2,64,0,1,float16,float16,0,0.013605333864688873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,2,64,128,1,float16,fp8,0,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,2,64,128,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,2,64,0,1,float16,fp8,0,0.014085333794355392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,2,64,0,1,fp8,fp8,0,0.018751999984184902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,4,64,128,1,float16,float16,0,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,4,64,0,1,float16,float16,0,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,4,64,128,1,float16,fp8,0,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,8,64,0,1,float16,float16,0,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,4,64,128,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,4,64,0,1,float16,fp8,0,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,4,64,0,1,fp8,fp8,0,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,8,64,128,1,float16,float16,0,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,8,64,128,1,float16,fp8,0,0.014096000542243322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,8,64,128,1,fp8,fp8,0,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,8,64,0,1,float16,fp8,0,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,8,64,0,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,1,64,128,1,fp8,fp8,0,0.31141867240269977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,1,64,128,1,float16,float16,0,0.17267733812332153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,1,64,0,1,float16,float16,0,0.17151999473571777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,1,64,128,1,float16,fp8,0,0.17102932929992676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,1,64,0,1,float16,fp8,0,0.17202667395273843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,1,64,0,1,fp8,fp8,0,0.3118026653925578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,2,64,128,1,float16,float16,0,0.1755839983622233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,2,64,0,1,float16,float16,0,0.1763520042101542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,2,64,128,1,float16,fp8,0,0.1755573352177938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,2,64,128,1,fp8,fp8,0,0.31194667021433514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,2,64,0,1,float16,fp8,0,0.1750453313191732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,2,64,0,1,fp8,fp8,0,0.31329600016276044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,4,64,128,1,float16,float16,0,0.1776906649271647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,4,64,0,1,float16,float16,0,0.1764693260192871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,4,64,128,1,float16,fp8,0,0.1758026679356893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,4,64,128,1,fp8,fp8,0,0.31381332874298096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,4,64,0,1,float16,fp8,0,0.1763733426729838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,4,64,0,1,fp8,fp8,0,0.3115573326746623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,8,64,128,1,float16,float16,0,0.18055466810862222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,8,64,0,1,float16,float16,0,0.18064000209172568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,8,64,128,1,float16,fp8,0,0.18004266421000162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,8,64,128,1,fp8,fp8,0,0.32126933336257935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,8,64,0,1,float16,fp8,0,0.18044267098108926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,8,64,0,1,fp8,fp8,0,0.32124267021814984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,32,64,128,1,float16,float16,0,0.11822932958602905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,32,64,128,1,float16,fp8,0,0.1139359970887502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,32,64,0,1,float16,float16,0,0.11915733416875203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,32,64,128,1,fp8,fp8,0,0.17856534322102866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,32,64,0,1,float16,fp8,0,0.1160640021165212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,32,64,0,1,fp8,fp8,0,0.1792959968249003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,1,64,128,1,float16,float16,0,0.09309867024421692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,1,64,0,1,float16,float16,0,0.09406933188438416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,2,64,0,1,float16,float16,0,0.09359467029571533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,1,64,128,1,float16,fp8,0,0.09318932890892029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,1,64,128,1,fp8,fp8,0,0.16716800133387247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,1,64,0,1,float16,fp8,0,0.09320533275604248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,1,64,0,1,fp8,fp8,0,0.16615999738375345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,2,64,128,1,float16,float16,0,0.09390933314959209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,2,64,128,1,float16,fp8,0,0.09397866328557332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,2,64,0,1,float16,fp8,0,0.09422399600346883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,2,64,128,1,fp8,fp8,0,0.16766933600107828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,2,64,0,1,fp8,fp8,0,0.16857600212097168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,4,64,128,1,float16,float16,0,0.09525866309801738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,4,64,0,1,float16,float16,0,0.09490133325258891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,8,64,0,1,float16,float16,0,0.09947199622790019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,4,64,128,1,float16,fp8,0,0.09521599610646565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,4,64,128,1,fp8,fp8,0,0.16765866676966348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,4,64,0,1,float16,fp8,0,0.09513599673906963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,4,64,0,1,fp8,fp8,0,0.16823999087015787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,8,64,128,1,float16,float16,0,0.09968533118565877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,8,64,128,1,float16,fp8,0,0.09828799962997437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,8,64,0,1,float16,fp8,0,0.09960533181826274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,8,64,128,1,fp8,fp8,0,0.17052799463272095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,8,64,0,1,fp8,fp8,0,0.16966933012008667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,32,64,128,1,float16,float16,0,0.06591466565926869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,1,64,128,1,float16,float16,0,0.051183998584747314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,32,64,0,1,float16,float16,0,0.0658079981803894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,32,64,128,1,float16,fp8,0,0.06453333298365276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,32,64,128,1,fp8,fp8,0,0.10109866658846538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,32,64,0,1,float16,fp8,0,0.06355200211207072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,32,64,0,1,fp8,fp8,0,0.10100266337394714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,1,64,0,1,float16,float16,0,0.05102399984995524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,1,64,128,1,float16,fp8,0,0.05183466772238413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,1,64,128,1,fp8,fp8,0,0.09084799885749817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,1,64,0,1,float16,fp8,0,0.05206400156021118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,1,64,0,1,fp8,fp8,0,0.0906933347384135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,2,64,128,1,float16,float16,0,0.05193600058555603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,2,64,0,1,float16,float16,0,0.05153599878152212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,2,64,128,1,float16,fp8,0,0.05208533505598704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,2,64,128,1,fp8,fp8,0,0.09304533402125041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,2,64,0,1,float16,fp8,0,0.05266133447488149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,2,64,0,1,fp8,fp8,0,0.09225599964459737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,4,64,128,1,float16,float16,0,0.05314666529496511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,4,64,0,1,float16,float16,0,0.052986666560173035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,4,64,128,1,float16,fp8,0,0.05285866558551788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,4,64,128,1,fp8,fp8,0,0.09284800291061401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,4,64,0,1,float16,fp8,0,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,4,64,0,1,fp8,fp8,0,0.09304533402125041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,8,64,128,1,float16,float16,0,0.05454933146635691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,8,64,0,1,fp8,fp8,0,0.09589866797129314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,8,64,0,1,float16,float16,0,0.05433600147565206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,8,64,128,1,float16,fp8,0,0.05468266705671946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,8,64,128,1,fp8,fp8,0,0.09692266583442688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,8,64,0,1,float16,fp8,0,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,32,64,128,1,float16,float16,0,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,32,64,0,1,float16,float16,0,0.0367999995748202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,32,64,128,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,32,64,128,1,fp8,fp8,0,0.058933332562446594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,32,64,0,1,float16,fp8,0,0.03514666606982549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,32,64,0,1,fp8,fp8,0,0.05927466849486033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,1,64,128,1,float16,float16,0,0.031104000906149547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,1,64,0,1,float16,float16,0,0.03183999905983607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,1,64,128,1,float16,fp8,0,0.03189333279927572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,1,64,128,1,fp8,fp8,0,0.054234668612480164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,1,64,0,1,float16,fp8,0,0.03234666585922241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,1,64,0,1,fp8,fp8,0,0.053541332483291626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,2,64,128,1,float16,float16,0,0.031856000423431396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,2,64,0,1,float16,float16,0,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,2,64,128,1,float16,fp8,0,0.0320266659061114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,2,64,128,1,fp8,fp8,0,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,2,64,0,1,float16,fp8,0,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,2,64,0,1,fp8,fp8,0,0.054005334774653115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,4,64,128,1,float16,float16,0,0.03196266790231069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,4,64,0,1,float16,float16,0,0.031983998914559685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,4,64,128,1,float16,fp8,0,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,4,64,128,1,fp8,fp8,0,0.054010664423306785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,4,64,0,1,float16,fp8,0,0.03233066697915395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,4,64,0,1,fp8,fp8,0,0.054154664278030396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,8,64,128,1,float16,float16,0,0.03289066751797994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,8,64,0,1,float16,float16,0,0.03265066693226496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,8,64,128,1,float16,fp8,0,0.03285333265860876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,8,64,128,1,fp8,fp8,0,0.053690666953722634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,8,64,0,1,float16,fp8,0,0.0329066663980484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,8,64,0,1,fp8,fp8,0,0.054330666859944664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,32,64,128,1,float16,float16,0,0.021957332889238994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,32,64,0,1,float16,float16,0,0.022117334107557934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,32,64,128,1,float16,fp8,0,0.022074667116006214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,32,64,128,1,fp8,fp8,0,0.03510933369398117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,32,64,0,1,float16,fp8,0,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,32,64,0,1,fp8,fp8,0,0.034346667428811394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,1,64,128,1,float16,float16,0,0.02075733368595441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,1,64,0,1,float16,float16,0,0.02083733429511388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,1,64,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,1,64,128,1,fp8,fp8,0,0.03385066737731298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,2,64,128,1,fp8,fp8,0,0.03378133227427801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,1,64,0,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,1,64,0,1,fp8,fp8,0,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,2,64,128,1,float16,float16,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,2,64,0,1,float16,float16,0,0.020703999946514767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,2,64,128,1,float16,fp8,0,0.020831999679406483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,2,64,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,2,64,0,1,fp8,fp8,0,0.03395200024048487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,4,64,128,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,4,64,0,1,float16,float16,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,4,64,128,1,float16,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,4,64,128,1,fp8,fp8,0,0.03470933437347412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,4,64,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,4,64,0,1,fp8,fp8,0,0.03443733354409536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,8,64,128,1,float16,float16,0,0.02165866643190384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,8,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,8,64,128,1,float16,fp8,0,0.021903999149799347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,8,64,128,1,fp8,fp8,0,0.03498133271932602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,8,64,0,1,float16,fp8,0,0.021727999051411945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,8,64,0,1,fp8,fp8,0,0.034976000587145485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,32,64,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,32,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,32,64,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,32,64,128,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,32,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,32,64,0,1,fp8,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,1,64,128,1,float16,float16,0,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,1,64,0,1,float16,float16,0,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,1,64,128,1,float16,fp8,0,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,1,64,128,1,fp8,fp8,0,0.02443733314673106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,1,64,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,1,64,0,1,fp8,fp8,0,0.02463999887307485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,2,64,0,1,fp8,fp8,0,0.024336000283559162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,2,64,128,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,2,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,2,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,2,64,128,1,fp8,fp8,0,0.02473066747188568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,2,64,0,1,float16,fp8,0,0.016282666474580765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,4,64,128,1,float16,float16,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,4,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,4,64,128,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,4,64,128,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,4,64,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,4,64,0,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,8,64,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,8,64,0,1,float16,float16,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,8,64,128,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,32,64,128,1,float16,fp8,0,0.014250667144854864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,8,64,128,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,8,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,8,64,0,1,fp8,fp8,0,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,32,64,128,1,float16,float16,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,32,64,0,1,float16,float16,0,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,32,64,128,1,fp8,fp8,0,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,32,64,0,1,float16,fp8,0,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,32,64,0,1,fp8,fp8,0,0.020394666741291683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,1,64,128,1,float16,float16,0,0.013562666873137156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,1,64,0,1,float16,float16,0,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,1,64,128,1,float16,fp8,0,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,1,64,128,1,fp8,fp8,0,0.02032533288002014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,1,64,0,1,float16,fp8,0,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,1,64,0,1,fp8,fp8,0,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,2,64,128,1,float16,float16,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,2,64,0,1,float16,float16,0,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,2,64,128,1,float16,fp8,0,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,2,64,128,1,fp8,fp8,0,0.020266667008399963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,2,64,0,1,float16,fp8,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,2,64,0,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,4,64,128,1,float16,float16,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,4,64,0,1,float16,float16,0,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,4,64,128,1,float16,fp8,0,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,4,64,128,1,fp8,fp8,0,0.019930666933457058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,4,64,0,1,float16,fp8,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,4,64,0,1,fp8,fp8,0,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,8,64,128,1,float16,float16,0,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,8,64,0,1,float16,float16,0,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,8,64,128,1,float16,fp8,0,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,8,64,128,1,fp8,fp8,0,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,8,64,0,1,float16,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,32,64,128,1,float16,float16,0,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,8,64,0,1,fp8,fp8,0,0.020186666399240494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,32,64,0,1,float16,float16,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,32,64,128,1,float16,fp8,0,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,32,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,32,64,0,1,float16,fp8,0,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,32,64,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,1,64,128,1,float16,float16,0,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,1,64,0,1,float16,float16,0,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,1,64,128,1,float16,fp8,0,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,1,64,128,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,1,64,0,1,float16,fp8,0,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,1,64,0,1,fp8,fp8,0,0.018640000373125076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,2,64,128,1,float16,float16,0,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,2,64,0,1,float16,float16,0,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,2,64,128,1,float16,fp8,0,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,2,64,128,1,fp8,fp8,0,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,2,64,0,1,float16,fp8,0,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,2,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,4,64,128,1,float16,float16,0,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,4,64,0,1,float16,float16,0,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,4,64,128,1,float16,fp8,0,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,4,64,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,4,64,0,1,float16,fp8,0,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,4,64,0,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,8,64,128,1,float16,float16,0,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,8,64,0,1,float16,float16,0,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,8,64,128,1,float16,fp8,0,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,8,64,128,1,fp8,fp8,0,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,8,64,0,1,float16,fp8,0,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,8,64,0,1,fp8,fp8,0,0.018687999496857326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,32,64,128,1,float16,float16,0,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,32,64,0,1,float16,float16,0,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,32,64,128,1,float16,fp8,0,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,32,64,128,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,32,64,0,1,float16,fp8,0,0.012517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,32,64,0,1,fp8,fp8,0,0.018458666900793713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,1,64,128,1,float16,float16,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,1,64,0,1,float16,float16,0,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,1,64,128,1,float16,fp8,0,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,1,64,128,1,fp8,fp8,0,0.01770666614174843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,1,64,0,1,float16,fp8,0,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,2,64,0,1,float16,fp8,0,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,1,64,0,1,fp8,fp8,0,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,2,64,128,1,float16,float16,0,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,2,64,0,1,float16,float16,0,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,2,64,128,1,float16,fp8,0,0.012645332763592402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,2,64,128,1,fp8,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,2,64,0,1,fp8,fp8,0,0.017887999614079792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,4,64,128,1,float16,float16,0,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,4,64,0,1,float16,float16,0,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,4,64,128,1,float16,fp8,0,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,4,64,128,1,fp8,fp8,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,4,64,0,1,float16,fp8,0,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,4,64,0,1,fp8,fp8,0,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,8,64,128,1,float16,float16,0,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,8,64,0,1,float16,float16,0,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,8,64,128,1,float16,fp8,0,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,8,64,128,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,8,64,0,1,float16,fp8,0,0.01246400053302447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,8,64,0,1,fp8,fp8,0,0.01846933364868164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,1,64,128,1,float16,float16,0,0.1060693363348643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,1,64,0,1,float16,float16,0,0.1058186690012614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,1,64,128,1,float16,fp8,0,0.10441600282986958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,1,64,128,1,fp8,fp8,0,0.2603893280029297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,1,64,0,1,float16,fp8,0,0.10587199529012044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,1,64,0,1,fp8,fp8,0,0.2609333395957947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,2,64,128,1,float16,float16,0,0.10631466905275981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,2,64,0,1,float16,float16,0,0.10685867071151733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,2,64,128,1,float16,fp8,0,0.10631466905275981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,2,64,0,1,float16,fp8,0,0.1060640017191569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,2,64,128,1,fp8,fp8,0,0.2594719926516215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,2,64,0,1,fp8,fp8,0,0.2608106732368469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,4,64,128,1,float16,float16,0,0.10762133200963338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,4,64,0,1,float16,float16,0,0.10790399710337321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,4,64,128,1,float16,fp8,0,0.10572266578674316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,4,64,0,1,float16,fp8,0,0.10590400298436482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,4,64,128,1,fp8,fp8,0,0.260368009408315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,8,64,128,1,float16,float16,0,0.10967999696731567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,4,64,0,1,fp8,fp8,0,0.26153600215911865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,8,64,0,1,float16,float16,0,0.11045333743095398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,8,64,128,1,float16,fp8,0,0.1090880036354065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,8,64,0,1,float16,fp8,0,0.10891200105349223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,32,64,0,1,float16,float16,0,0.0691840002934138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,32,64,128,1,float16,fp8,0,0.0674186646938324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,32,64,128,1,fp8,fp8,0,0.14987732966740927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,8,64,128,1,fp8,fp8,0,0.2635733286539714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,32,64,128,1,float16,float16,0,0.06811200082302094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,8,64,0,1,fp8,fp8,0,0.2640586694081624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,32,64,0,1,float16,fp8,0,0.06665066878000896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,32,64,0,1,fp8,fp8,0,0.14803733428319296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,1,64,128,1,float16,float16,0,0.056847999493281044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,1,64,0,1,float16,float16,0,0.05770133435726166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,1,64,128,1,float16,fp8,0,0.05705066521962484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,1,64,128,1,fp8,fp8,0,0.13743999600410461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,1,64,0,1,float16,fp8,0,0.056618665655454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,1,64,0,1,fp8,fp8,0,0.13713600238164267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,2,64,128,1,float16,float16,0,0.05758399764696757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,2,64,0,1,float16,float16,0,0.05786666770776113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,2,64,128,1,float16,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,2,64,128,1,fp8,fp8,0,0.1383573313554128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,2,64,0,1,float16,fp8,0,0.05734399954477946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,2,64,0,1,fp8,fp8,0,0.13828266660372415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,4,64,128,1,float16,float16,0,0.05724266668160757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,8,64,128,1,float16,float16,0,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,4,64,0,1,float16,float16,0,0.05816533168156942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,4,64,128,1,float16,fp8,0,0.05831466615200043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,4,64,128,1,fp8,fp8,0,0.140474667151769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,8,64,0,1,float16,fp8,0,0.059690664211908974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,4,64,0,1,float16,fp8,0,0.05820266902446747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,4,64,0,1,fp8,fp8,0,0.1411146620909373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,32,64,0,1,float16,float16,0,0.03640000025431315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,8,64,0,1,float16,float16,0,0.059936001896858215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,8,64,128,1,float16,fp8,0,0.05937600135803223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,8,64,128,1,fp8,fp8,0,0.14265066385269165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,8,64,0,1,fp8,fp8,0,0.1430346667766571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,32,64,128,1,float16,float16,0,0.03666666646798452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,32,64,128,1,float16,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,32,64,128,1,fp8,fp8,0,0.08296533425649007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,32,64,0,1,float16,fp8,0,0.03604800005753835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,1,64,0,1,float16,fp8,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,32,64,0,1,fp8,fp8,0,0.08305599788824718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,1,64,128,1,float16,float16,0,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,1,64,0,1,float16,float16,0,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,1,64,128,1,float16,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,1,64,128,1,fp8,fp8,0,0.07871999839941661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,1,64,0,1,fp8,fp8,0,0.07732800145943959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,2,64,128,1,float16,float16,0,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,2,64,0,1,float16,float16,0,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,2,64,128,1,float16,fp8,0,0.03386666625738144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,2,64,128,1,fp8,fp8,0,0.07662400106589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,2,64,0,1,float16,fp8,0,0.0340639998515447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,2,64,0,1,fp8,fp8,0,0.07824533184369405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,4,64,128,1,float16,float16,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,4,64,0,1,float16,float16,0,0.03316266586383184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,4,64,128,1,float16,fp8,0,0.03410666684309641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,4,64,128,1,fp8,fp8,0,0.0769706666469574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,4,64,0,1,float16,fp8,0,0.03392533212900162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,4,64,0,1,fp8,fp8,0,0.07702399790287018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,8,64,128,1,float16,float16,0,0.03459733227888743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,32,64,128,1,float16,float16,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,8,64,0,1,float16,float16,0,0.03489066660404205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,8,64,128,1,float16,fp8,0,0.03430933256944021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,8,64,128,1,fp8,fp8,0,0.07810133198897044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,8,64,0,1,float16,fp8,0,0.03451200077931086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,32,64,0,1,fp8,fp8,0,0.04811733464399973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,1,64,128,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,8,64,0,1,fp8,fp8,0,0.07806399961312611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,32,64,0,1,float16,float16,0,0.022709332406520844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,32,64,128,1,float16,fp8,0,0.02275199939807256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,32,64,128,1,fp8,fp8,0,0.0476746658484141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,32,64,0,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,1,64,0,1,float16,float16,0,0.02203733225663503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,1,64,128,1,float16,fp8,0,0.022064000368118286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,1,64,128,1,fp8,fp8,0,0.04611733555793762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,1,64,0,1,float16,fp8,0,0.0220266655087471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,2,64,0,1,float16,fp8,0,0.022405333817005157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,1,64,0,1,fp8,fp8,0,0.04590400060017904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,2,64,128,1,float16,float16,0,0.021984001000722248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,2,64,0,1,float16,float16,0,0.021829334398110706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,2,64,128,1,float16,fp8,0,0.02235200007756551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,2,64,128,1,fp8,fp8,0,0.046037331223487854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,2,64,0,1,fp8,fp8,0,0.04637866715590159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,4,64,128,1,float16,float16,0,0.022677332162857056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,4,64,0,1,float16,float16,0,0.02276266614596049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,4,64,128,1,float16,fp8,0,0.022015998760859173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,4,64,128,1,fp8,fp8,0,0.04713066418965658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,4,64,0,1,float16,fp8,0,0.02231466770172119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,4,64,0,1,fp8,fp8,0,0.0469813346862793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,8,64,128,1,float16,float16,0,0.02266666789849599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,8,64,0,1,float16,float16,0,0.022175999979178112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,8,64,128,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,8,64,128,1,fp8,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,8,64,0,1,float16,fp8,0,0.02223466585079829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,32,64,128,1,fp8,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,8,64,0,1,fp8,fp8,0,0.04734933376312256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,32,64,128,1,float16,float16,0,0.016021333634853363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,32,64,0,1,float16,float16,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,32,64,128,1,float16,fp8,0,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,32,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,32,64,0,1,fp8,fp8,0,0.030784000953038532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,1,64,128,1,float16,float16,0,0.01598400001724561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,1,64,0,1,float16,float16,0,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,1,64,128,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,1,64,128,1,fp8,fp8,0,0.030389333764712017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,1,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,1,64,0,1,fp8,fp8,0,0.03050133337577184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,2,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,2,64,0,1,float16,float16,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,2,64,128,1,float16,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,2,64,128,1,fp8,fp8,0,0.030037333567937214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,2,64,0,1,float16,fp8,0,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,2,64,0,1,fp8,fp8,0,0.030005333324273426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,4,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,4,64,0,1,float16,float16,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,4,64,128,1,float16,fp8,0,0.016202667107184727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,4,64,128,1,fp8,fp8,0,0.030666666726271313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,4,64,0,1,float16,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,4,64,0,1,fp8,fp8,0,0.030789333085219067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,8,64,128,1,float16,float16,0,0.016048000504573185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,8,64,0,1,float16,float16,0,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,8,64,128,1,float16,fp8,0,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,8,64,128,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,32,64,0,1,float16,fp8,0,0.01358933374285698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,8,64,0,1,float16,fp8,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,8,64,0,1,fp8,fp8,0,0.031034665803114574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,32,64,128,1,float16,float16,0,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,32,64,0,1,float16,float16,0,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,32,64,128,1,float16,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,32,64,128,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,32,64,0,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,1,64,128,1,float16,float16,0,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,2,64,128,1,float16,float16,0,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,1,64,0,1,float16,float16,0,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,1,64,128,1,float16,fp8,0,0.01368533323208491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,1,64,128,1,fp8,fp8,0,0.02277333289384842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,1,64,0,1,float16,fp8,0,0.013786666095256805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,1,64,0,1,fp8,fp8,0,0.02271999915440877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,2,64,0,1,float16,float16,0,0.013573333621025085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,2,64,128,1,float16,fp8,0,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,2,64,128,1,fp8,fp8,0,0.022672000030676525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,2,64,0,1,float16,fp8,0,0.013631999492645264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,2,64,0,1,fp8,fp8,0,0.022789334257443745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,4,64,128,1,float16,float16,0,0.013482666263977686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,4,64,0,1,float16,float16,0,0.013370666652917862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,4,64,128,1,float16,fp8,0,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,4,64,128,1,fp8,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,4,64,0,1,float16,fp8,0,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,4,64,0,1,fp8,fp8,0,0.022848000129063923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,8,64,128,1,float16,float16,0,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,8,64,0,1,float16,float16,0,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,8,64,128,1,float16,fp8,0,0.013861333330472311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,8,64,128,1,fp8,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,8,64,0,1,float16,fp8,0,0.013690666606028875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,8,64,0,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,32,64,128,1,float16,float16,0,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,32,64,0,1,float16,float16,0,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,32,64,128,1,float16,fp8,0,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,32,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,1,64,0,1,float16,fp8,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,1,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,32,64,0,1,float16,fp8,0,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,32,64,0,1,fp8,fp8,0,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,1,64,128,1,float16,float16,0,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,1,64,0,1,float16,float16,0,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,1,64,128,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,1,64,128,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,2,64,128,1,float16,float16,0,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,2,64,0,1,float16,float16,0,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,2,64,128,1,float16,fp8,0,0.012479999413092932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,2,64,128,1,fp8,fp8,0,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,2,64,0,1,float16,fp8,0,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,2,64,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,4,64,128,1,float16,float16,0,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,4,64,0,1,float16,float16,0,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,4,64,128,1,float16,fp8,0,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,8,64,128,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,4,64,128,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,4,64,0,1,float16,fp8,0,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,32,64,128,1,float16,float16,0,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,4,64,0,1,fp8,fp8,0,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,8,64,128,1,float16,float16,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,8,64,0,1,float16,float16,0,0.012437333663304647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,8,64,128,1,float16,fp8,0,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,8,64,0,1,float16,fp8,0,0.012586666891972223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,8,64,0,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,32,64,0,1,float16,float16,0,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,32,64,128,1,float16,fp8,0,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,32,64,128,1,fp8,fp8,0,0.018229333062966663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,32,64,0,1,float16,fp8,0,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,32,64,0,1,fp8,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,1,64,0,1,fp8,fp8,0,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,1,64,128,1,float16,float16,0,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,2,64,0,1,float16,float16,0,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,1,64,0,1,float16,float16,0,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,1,64,128,1,float16,fp8,0,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,1,64,128,1,fp8,fp8,0,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,1,64,0,1,float16,fp8,0,0.012549333274364471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,2,64,0,1,fp8,fp8,0,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,2,64,128,1,float16,float16,0,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,2,64,128,1,float16,fp8,0,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,2,64,128,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,2,64,0,1,float16,fp8,0,0.012319999436537424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,4,64,128,1,float16,float16,0,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,4,64,0,1,float16,float16,0,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,4,64,128,1,float16,fp8,0,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,4,64,128,1,fp8,fp8,0,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,4,64,0,1,float16,fp8,0,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,4,64,0,1,fp8,fp8,0,0.018346666047970455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,8,64,128,1,float16,float16,0,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,8,64,0,1,float16,float16,0,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,8,64,128,1,float16,fp8,0,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,8,64,128,1,fp8,fp8,0,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,8,64,0,1,float16,fp8,0,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,32,64,0,1,fp8,fp8,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,1,64,128,1,float16,float16,0,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,8,64,0,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,32,64,128,1,float16,float16,0,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,32,64,0,1,float16,float16,0,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,32,64,128,1,float16,fp8,0,0.01227733368674914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,32,64,128,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,32,64,0,1,float16,fp8,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,1,64,0,1,float16,float16,0,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,1,64,128,1,float16,fp8,0,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,2,64,128,1,float16,fp8,0,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,1,64,128,1,fp8,fp8,0,0.018687999496857326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,2,64,0,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,1,64,0,1,float16,fp8,0,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,1,64,0,1,fp8,fp8,0,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,2,64,128,1,float16,float16,0,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,2,64,0,1,float16,float16,0,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,2,64,128,1,fp8,fp8,0,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,2,64,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,4,64,128,1,float16,float16,0,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,4,64,0,1,float16,float16,0,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,4,64,128,1,float16,fp8,0,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,8,64,128,1,float16,fp8,0,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,4,64,128,1,fp8,fp8,0,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,4,64,0,1,float16,fp8,0,0.012543999900420507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,4,64,0,1,fp8,fp8,0,0.018277333428462345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,8,64,128,1,float16,float16,0,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,8,64,0,1,float16,float16,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,8,64,128,1,fp8,fp8,0,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,8,64,0,1,float16,fp8,0,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,8,64,0,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,1,64,128,1,float16,float16,0,0.07730133334795634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,1,64,0,1,float16,float16,0,0.07690133154392242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,1,64,128,1,float16,fp8,0,0.07718933125336964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,1,64,128,1,fp8,fp8,0,0.231605331103007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,1,64,0,1,float16,fp8,0,0.07673066854476929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,1,64,0,1,fp8,fp8,0,0.23292799790700278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,2,64,0,1,fp8,fp8,0,0.231605331103007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,2,64,128,1,float16,float16,0,0.0784800002972285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,2,64,0,1,float16,float16,0,0.07825600107510884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,2,64,128,1,float16,fp8,0,0.07707733412583669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,2,64,128,1,fp8,fp8,0,0.23167999585469565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,2,64,0,1,float16,fp8,0,0.0775733341773351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,4,64,128,1,float16,float16,0,0.07818133135636647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,4,64,0,1,float16,float16,0,0.07869333525498708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,4,64,128,1,float16,fp8,0,0.07825066645940144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,4,64,128,1,fp8,fp8,0,0.23573867479960123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,4,64,0,1,float16,fp8,0,0.07808533310890198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,4,64,0,1,fp8,fp8,0,0.234607994556427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,8,64,128,1,float16,float16,0,0.07961600025494893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,8,64,0,1,float16,float16,0,0.07980266710122426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,8,64,128,1,float16,fp8,0,0.07957866787910461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,8,64,128,1,fp8,fp8,0,0.23681066433588663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,8,64,0,1,float16,fp8,0,0.07995200157165527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,8,64,0,1,fp8,fp8,0,0.2379253307978312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,32,64,128,1,float16,float16,0,0.045253331462542214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,32,64,0,1,float16,float16,0,0.045194665590922035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,32,64,128,1,float16,fp8,0,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,32,64,128,1,fp8,fp8,0,0.1306933363278707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,32,64,0,1,float16,fp8,0,0.04452799757321676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,32,64,0,1,fp8,fp8,0,0.1295413374900818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,1,64,128,1,float16,float16,0,0.043621331453323364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,1,64,0,1,float16,float16,0,0.04399466514587402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,1,64,128,1,float16,fp8,0,0.04401599864164988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,1,64,128,1,fp8,fp8,0,0.12457066774368286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,1,64,0,1,float16,fp8,0,0.04374399781227112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,1,64,0,1,fp8,fp8,0,0.12556800246238708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,2,64,128,1,float16,float16,0,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,2,64,0,1,float16,float16,0,0.0439626673857371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,2,64,128,1,float16,fp8,0,0.04370133578777313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,2,64,128,1,fp8,fp8,0,0.1251253286997477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,2,64,0,1,float16,fp8,0,0.043621331453323364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,2,64,0,1,fp8,fp8,0,0.12469866871833801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,4,64,128,1,float16,float16,0,0.04398933549722036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,4,64,0,1,float16,float16,0,0.0440586656332016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,8,64,0,1,float16,float16,0,0.044218664367993675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,8,64,128,1,float16,fp8,0,0.04430399835109711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,4,64,128,1,float16,fp8,0,0.04409599800904592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,4,64,128,1,fp8,fp8,0,0.12468266487121582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,4,64,0,1,float16,fp8,0,0.04364799956480662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,4,64,0,1,fp8,fp8,0,0.12552533547083536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,32,64,128,1,float16,float16,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,8,64,128,1,float16,float16,0,0.04427733520666758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,8,64,128,1,fp8,fp8,0,0.12556800246238708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,8,64,0,1,float16,fp8,0,0.04413333535194397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,32,64,0,1,float16,fp8,0,0.02701866626739502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,8,64,0,1,fp8,fp8,0,0.12566933035850525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,32,64,0,1,float16,float16,0,0.026895999908447266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,32,64,128,1,float16,fp8,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,32,64,128,1,fp8,fp8,0,0.07256000240643819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,32,64,0,1,fp8,fp8,0,0.07154666880766551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,1,64,128,1,float16,float16,0,0.026869334280490875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,1,64,0,1,float16,float16,0,0.026863999664783478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,1,64,128,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,1,64,128,1,fp8,fp8,0,0.07165866593519847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,1,64,0,1,float16,fp8,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,1,64,0,1,fp8,fp8,0,0.0718506673971812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,2,64,128,1,float16,float16,0,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,2,64,0,1,float16,float16,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,2,64,128,1,float16,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,2,64,128,1,fp8,fp8,0,0.07134933272997539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,2,64,0,1,float16,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,2,64,0,1,fp8,fp8,0,0.07180800040562947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,4,64,128,1,float16,float16,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,4,64,0,1,float16,float16,0,0.02773866554101308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,4,64,128,1,float16,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,4,64,128,1,fp8,fp8,0,0.07171733180681865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,4,64,0,1,float16,fp8,0,0.02804800122976303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,4,64,0,1,fp8,fp8,0,0.07149866720040639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,8,64,128,1,float16,float16,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,8,64,0,1,float16,float16,0,0.028005334238211315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,8,64,128,1,float16,fp8,0,0.027855999767780304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,32,64,128,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,8,64,128,1,fp8,fp8,0,0.07189333438873291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,8,64,0,1,float16,fp8,0,0.027632000545660656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,8,64,0,1,fp8,fp8,0,0.0717386653025945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,32,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,32,64,0,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,32,64,128,1,fp8,fp8,0,0.04452266792456309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,32,64,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,32,64,0,1,fp8,fp8,0,0.04433600107828776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,1,64,128,1,float16,float16,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,1,64,0,1,float16,float16,0,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,1,64,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,1,64,128,1,fp8,fp8,0,0.043194666504859924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,1,64,0,1,float16,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,1,64,0,1,fp8,fp8,0,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,2,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,2,64,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,2,64,128,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,2,64,128,1,fp8,fp8,0,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,2,64,0,1,float16,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,2,64,0,1,fp8,fp8,0,0.04308799902598063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,4,64,128,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,4,64,0,1,float16,float16,0,0.01874133323629697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,4,64,128,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,4,64,128,1,fp8,fp8,0,0.043951998154322304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,4,64,0,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,4,64,0,1,fp8,fp8,0,0.04334933559099833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,8,64,128,1,float16,float16,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,8,64,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,8,64,128,1,float16,fp8,0,0.01974933346112569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,8,64,128,1,fp8,fp8,0,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,8,64,0,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,8,64,0,1,fp8,fp8,0,0.04397333165009817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,32,64,128,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,32,64,0,1,float16,float16,0,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,32,64,128,1,float16,fp8,0,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,32,64,128,1,fp8,fp8,0,0.030213333666324615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,32,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,32,64,0,1,fp8,fp8,0,0.029658667743206024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,1,64,128,1,float16,float16,0,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,1,64,0,1,float16,float16,0,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,1,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,1,64,128,1,fp8,fp8,0,0.02871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,1,64,0,1,float16,fp8,0,0.014698666830857595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,2,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,1,64,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,2,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,2,64,0,1,float16,float16,0,0.014592000593741735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,2,64,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,2,64,128,1,fp8,fp8,0,0.029824001093705494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,2,64,0,1,fp8,fp8,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,4,64,128,1,float16,float16,0,0.01463466634353002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,4,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,4,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,4,64,128,1,fp8,fp8,0,0.029866665601730347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,4,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,4,64,0,1,fp8,fp8,0,0.030389333764712017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,8,64,128,1,float16,float16,0,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,8,64,0,1,float16,float16,0,0.014416000495354334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,8,64,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,8,64,128,1,fp8,fp8,0,0.030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,8,64,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,8,64,0,1,fp8,fp8,0,0.030181333422660828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,32,64,128,1,float16,float16,0,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,32,64,0,1,float16,float16,0,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,32,64,128,1,float16,fp8,0,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,32,64,128,1,fp8,fp8,0,0.022240000466505688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,32,64,0,1,float16,fp8,0,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,32,64,0,1,fp8,fp8,0,0.022672000030676525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,1,64,128,1,float16,float16,0,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,1,64,0,1,float16,float16,0,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,1,64,128,1,float16,fp8,0,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,1,64,128,1,fp8,fp8,0,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,1,64,0,1,float16,fp8,0,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,1,64,0,1,fp8,fp8,0,0.023775999744733173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,2,64,128,1,float16,float16,0,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,2,64,0,1,float16,float16,0,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,2,64,128,1,float16,fp8,0,0.013370666652917862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,2,64,128,1,fp8,fp8,0,0.02236266682545344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,2,64,0,1,float16,fp8,0,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,2,64,0,1,fp8,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,4,64,128,1,float16,float16,0,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,4,64,0,1,float16,float16,0,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,4,64,128,1,float16,fp8,0,0.013317332913478216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,4,64,128,1,fp8,fp8,0,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,4,64,0,1,float16,fp8,0,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,4,64,0,1,fp8,fp8,0,0.022778667509555817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,8,64,128,1,float16,float16,0,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,8,64,0,1,float16,float16,0,0.012495999534924826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,8,64,128,1,float16,fp8,0,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,8,64,128,1,fp8,fp8,0,0.022175999979178112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,8,64,0,1,float16,fp8,0,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,8,64,0,1,fp8,fp8,0,0.022122666239738464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,32,64,128,1,float16,float16,0,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,32,64,0,1,float16,float16,0,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,32,64,128,1,float16,fp8,0,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,32,64,128,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,32,64,0,1,float16,fp8,0,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,32,64,0,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,1,64,128,1,float16,float16,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,1,64,0,1,float16,float16,0,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,1,64,128,1,float16,fp8,0,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,1,64,128,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,1,64,0,1,float16,fp8,0,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,1,64,0,1,fp8,fp8,0,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,2,64,128,1,float16,float16,0,0.012122667084137598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,2,64,0,1,float16,float16,0,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,2,64,128,1,float16,fp8,0,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,2,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,2,64,0,1,float16,fp8,0,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,2,64,0,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,4,64,128,1,float16,float16,0,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,4,64,0,1,float16,float16,0,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,4,64,128,1,float16,fp8,0,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,4,64,128,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,4,64,0,1,float16,fp8,0,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,4,64,0,1,fp8,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,8,64,128,1,float16,float16,0,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,8,64,0,1,float16,float16,0,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,8,64,128,1,float16,fp8,0,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,8,64,128,1,fp8,fp8,0,0.01841066653529803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,8,64,0,1,float16,fp8,0,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,8,64,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,32,64,128,1,float16,float16,0,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,32,64,0,1,float16,float16,0,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,32,64,128,1,float16,fp8,0,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,32,64,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,1,64,128,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,32,64,0,1,float16,fp8,0,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,32,64,0,1,fp8,fp8,0,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,1,64,128,1,float16,float16,0,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,1,64,0,1,float16,float16,0,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,1,64,128,1,float16,fp8,0,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,1,64,0,1,float16,fp8,0,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,1,64,0,1,fp8,fp8,0,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,2,64,128,1,float16,float16,0,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,2,64,0,1,float16,float16,0,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,2,64,128,1,float16,fp8,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,2,64,128,1,fp8,fp8,0,0.018346666047970455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,2,64,0,1,float16,fp8,0,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,2,64,0,1,fp8,fp8,0,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,4,64,128,1,float16,float16,0,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,4,64,0,1,float16,float16,0,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,4,64,128,1,float16,fp8,0,0.012170666207869848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,4,64,128,1,fp8,fp8,0,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,4,64,0,1,float16,fp8,0,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,4,64,0,1,fp8,fp8,0,0.01825599993268649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,8,64,128,1,float16,float16,0,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,8,64,0,1,float16,float16,0,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,8,64,128,1,float16,fp8,0,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,8,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,8,64,0,1,float16,fp8,0,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,8,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,32,64,128,1,float16,float16,0,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,32,64,0,1,float16,float16,0,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,32,64,128,1,float16,fp8,0,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,32,64,128,1,fp8,fp8,0,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,32,64,0,1,float16,fp8,0,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,32,64,0,1,fp8,fp8,0,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,1,64,128,1,float16,float16,0,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,1,64,0,1,float16,float16,0,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,2,64,128,1,float16,fp8,0,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,1,64,128,1,float16,fp8,0,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,1,64,128,1,fp8,fp8,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,1,64,0,1,float16,fp8,0,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,1,64,0,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,2,64,128,1,float16,float16,0,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,2,64,0,1,float16,float16,0,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,2,64,128,1,fp8,fp8,0,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,2,64,0,1,float16,fp8,0,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,2,64,0,1,fp8,fp8,0,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,4,64,128,1,float16,float16,0,0.014159999787807465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,4,64,0,1,float16,float16,0,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,4,64,128,1,float16,fp8,0,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,4,64,128,1,fp8,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,4,64,0,1,float16,fp8,0,0.012479999413092932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,4,64,0,1,fp8,fp8,0,0.01855466639002164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,8,64,128,1,float16,float16,0,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,8,64,0,1,float16,float16,0,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,8,64,128,1,float16,fp8,0,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,8,64,128,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,8,64,0,1,float16,fp8,0,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,8,64,0,1,fp8,fp8,0,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,float16,0,2.153050740559896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,fp8,0,2.111392021179199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,64,128,1,fp8,fp8,0,2.7979841232299805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,float16,0,2.1890293757120767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,fp8,0,2.1544960339864097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,64,128,1,fp8,fp8,0,2.831109364827474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,float16,0,2.1941493352254233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,float16,0,14.177579243977865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,64,0,1,fp8,fp8,0,12.933354695638021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,fp8,0,2.174959977467855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,fp8,0,14.322709401448568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,64,128,1,fp8,fp8,0,2.822122573852539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,float16,0,14.421306610107422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,float16,0,2.251797358194987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,64,0,1,fp8,fp8,0,12.973258972167969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,fp8,0,14.433979034423828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,fp8,0,2.223951975504557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,64,128,1,fp8,fp8,0,2.879701296488444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,float16,0,14.351387023925781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,float16,0,1.2168533007303874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,fp8,0,1.2141493161519368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,64,128,1,fp8,fp8,0,1.581424077351888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,64,0,1,fp8,fp8,0,12.977813720703125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,fp8,0,14.53317387898763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,float16,0,14.354698181152344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,float16,0,1.0977386633555095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,float16,0,7.411968231201172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,fp8,0,1.0852959950764973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,64,128,1,fp8,fp8,0,1.4232373237609863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,64,0,1,fp8,fp8,0,13.043455759684244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,fp8,0,7.301061630249023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,64,0,1,fp8,fp8,0,6.693626403808594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,fp8,0,14.467503865559896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,float16,0,1.1150986353556316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,fp8,0,1.1065759658813477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,64,128,1,fp8,fp8,0,1.44870392481486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,float16,0,7.381551742553711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,float16,0,1.1295039653778076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,fp8,0,7.1940962473551435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,fp8,0,1.1390026410420735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,64,0,1,fp8,fp8,0,6.585584004720052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,64,128,1,fp8,fp8,0,1.47379732131958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,float16,0,7.287386576334636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,float16,0,1.1347306569417317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,64,0,1,fp8,fp8,0,6.569002787272136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,fp8,0,1.1342079639434814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,fp8,0,7.211706797281901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,64,128,1,fp8,fp8,0,1.4822400410970051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,float16,0,7.302783966064453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,float16,0,0.6412906646728516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,fp8,0,0.6511679887771606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,64,0,1,fp8,fp8,0,6.629701614379883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,fp8,0,7.235567728678386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,64,128,1,fp8,fp8,0,0.8417173226674398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,float16,0,0.602399984995524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,float16,0,7.248965581258138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,float16,0,3.697845458984375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,fp8,0,0.5942506790161133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,64,0,1,fp8,fp8,0,6.653210957845052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,fp8,0,7.345984141031901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,64,0,1,fp8,fp8,0,3.431941350301107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,64,128,1,fp8,fp8,0,0.7780906359354655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,fp8,0,3.747215906778971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,float16,0,3.8036588033040366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,float16,0,0.6006666819254557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,fp8,0,3.696768124898275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,64,128,1,fp8,fp8,0,0.7813759644826254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,fp8,0,0.5929226477940878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,64,0,1,fp8,fp8,0,3.4000800450642905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,float16,0,3.8530826568603516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,float16,0,0.6026613314946493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,fp8,0,3.7415520350138345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,64,0,1,fp8,fp8,0,3.3865652084350586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,fp8,0,0.6036320130030314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,64,128,1,fp8,fp8,0,0.776634693145752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,64,0,1,fp8,fp8,0,3.393418629964193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,float16,0,0.6092586517333984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,float16,0,3.7607253392537436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,float16,0,3.7031466166178384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,fp8,0,0.6127093235651652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,fp8,0,3.6762825647989907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,64,128,1,fp8,fp8,0,0.7918720245361328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,float16,0,0.4235200087229411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,fp8,0,3.7182718912760415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,fp8,0,0.4262080192565918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,float16,0,2.0097173055013022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,64,0,1,fp8,fp8,0,3.3770507176717124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,64,128,1,fp8,fp8,0,0.5140373309453329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,float16,0,0.42502931753794354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,fp8,0,1.9967412948608398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,64,0,1,fp8,fp8,0,1.7570187250773113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,fp8,0,0.4247359832127889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,64,128,1,fp8,fp8,0,0.5119253396987915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,float16,0,1.9838132858276367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,float16,0,0.4254453182220459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,64,128,1,fp8,fp8,0,0.5111786524454752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,64,0,1,fp8,fp8,0,1.768229325612386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,fp8,0,2.010122617085775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,float16,0,1.9953227043151855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,fp8,0,0.4236319859822591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,float16,0,0.4244053363800049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,fp8,0,1.9843786557515461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,float16,0,0.4238400061925252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,fp8,0,1.9962239265441895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,64,0,1,fp8,fp8,0,1.7580746014912922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,float16,0,1.9992586771647136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,fp8,0,0.42313599586486816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,64,128,1,fp8,fp8,0,0.512165347735087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,64,0,1,fp8,fp8,0,1.7593919436136882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,fp8,0,0.42450133959452313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,float16,0,1.9833173751831055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,64,128,1,fp8,fp8,0,0.5108960072199503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,fp8,0,1.996938705444336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,64,0,1,fp8,fp8,0,1.7595787048339844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,float16,0,1.601962725321452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,fp8,0,1.5763146082560222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,64,128,1,fp8,fp8,0,2.082570711771647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,float16,0,1.6338987350463867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,float16,0,8.423999786376953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,64,0,1,fp8,fp8,0,7.557727813720703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,fp8,0,1.615829308827718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,64,128,1,fp8,fp8,0,2.10428794225057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,fp8,0,8.241002400716146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,float16,0,8.41860262552897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,float16,0,1.6642773946126301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,64,0,1,fp8,fp8,0,7.630341211954753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,fp8,0,1.6315306027730305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,fp8,0,8.509520212809244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,64,128,1,fp8,fp8,0,2.118735949198405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,float16,0,1.6814133326212566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,float16,0,8.372986475626627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,fp8,0,1.6629865964253743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,float16,0,0.9044480323791504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,64,128,1,fp8,fp8,0,2.170426686604818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,fp8,0,0.90829865137736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,64,0,1,fp8,fp8,0,7.636362711588542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,64,128,1,fp8,fp8,0,1.1903253396352131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,fp8,0,8.33084805806478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,float16,0,4.407541275024414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,float16,0,8.435434977213541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,float16,0,0.8317866325378418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,64,0,1,fp8,fp8,0,7.636026382446289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,fp8,0,8.430389404296875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,64,128,1,fp8,fp8,0,1.0810720125834148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,fp8,0,4.330154736836751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,fp8,0,0.8150613307952881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,64,0,1,fp8,fp8,0,3.953770637512207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,float16,0,4.244341214497884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,float16,0,0.8366186618804932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,fp8,0,4.3778025309244795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,fp8,0,0.8246933619181315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,64,0,1,fp8,fp8,0,3.857349395751953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,64,128,1,fp8,fp8,0,1.0889333089192708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,fp8,0,0.8256106376647949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,float16,0,4.313002586364746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,float16,0,0.8352800210316976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,fp8,0,4.284751892089844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,64,0,1,fp8,fp8,0,3.8544480005900064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,64,128,1,fp8,fp8,0,1.0924586455027263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,float16,0,4.310517311096191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,fp8,0,4.289999961853027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,float16,0,0.8470986684163412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,64,0,1,fp8,fp8,0,3.845856030782064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,fp8,0,0.8506506284077963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,64,128,1,fp8,fp8,0,1.1066559950510662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,float16,0,0.4857706626256307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,float16,0,4.2350772221883135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,fp8,0,0.48981865247090656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,64,128,1,fp8,fp8,0,0.6371839841206869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,float16,0,2.196986675262451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,fp8,0,4.261690775553386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,64,0,1,fp8,fp8,0,3.9053014119466147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,float16,0,0.4578506549199422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,64,128,1,fp8,fp8,0,0.5868800083796183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,fp8,0,2.1837120056152344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,64,0,1,fp8,fp8,0,2.061936060587565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,fp8,0,0.4493013223012288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,float16,0,2.1643946965535483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,fp8,0,2.1748639742533364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,float16,0,0.46036799748738605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,fp8,0,0.4506666660308838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,64,0,1,fp8,fp8,0,1.9887040456136067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,64,128,1,fp8,fp8,0,0.5862773259480795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,float16,0,2.1673332850138345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,float16,0,0.4643733501434326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,fp8,0,2.1626133918762207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,64,0,1,fp8,fp8,0,1.9857333501180012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,fp8,0,0.4556373357772827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,64,128,1,fp8,fp8,0,0.5897599856058756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,float16,0,2.1547093391418457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,fp8,0,2.1662185986836753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,float16,0,0.46485865116119385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,fp8,0,0.46279998620351154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,64,0,1,fp8,fp8,0,2.000154654184977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,64,0,1,fp8,fp8,0,2.004965305328369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,64,128,1,fp8,fp8,0,0.5988800128300985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,float16,0,2.1737759908040366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,fp8,0,2.1563733418782554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,float16,0,0.32517866293589276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,float16,0,1.207034667332967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,fp8,0,0.32605334122975665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,64,128,1,fp8,fp8,0,0.3952426513036092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,fp8,0,1.2072319984436035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,float16,0,0.3234399954477946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,64,0,1,fp8,fp8,0,1.0575306415557861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,fp8,0,0.32493333021799725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,64,128,1,fp8,fp8,0,0.39450132846832275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,float16,0,1.2104053497314453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,fp8,0,1.2153706550598145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,float16,0,0.324074665705363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,64,0,1,fp8,fp8,0,1.0566240151723225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,64,128,1,fp8,fp8,0,0.39264531930287677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,fp8,0,0.32518933216730755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,float16,0,1.2085973421732585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,fp8,0,1.2119253476460774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,64,0,1,fp8,fp8,0,1.0567626953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,float16,0,0.32479466994603473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,float16,0,1.205514669418335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,fp8,0,0.3242400089899699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,64,128,1,fp8,fp8,0,0.39878400166829425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,fp8,0,1.2098346551259358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,float16,0,0.3245333234469096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,64,0,1,fp8,fp8,0,1.0564853350321453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,fp8,0,0.3255680004755656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,float16,0,1.2101066907246907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,64,128,1,fp8,fp8,0,0.39240535100301105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,fp8,0,1.2071893215179443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,64,0,1,fp8,fp8,0,1.060202678044637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,float16,0,1.3283039728800456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,fp8,0,1.3056213061014812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,64,128,1,fp8,fp8,0,1.7369599342346191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,float16,0,1.3554986317952473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,fp8,0,1.3394346237182617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,64,128,1,fp8,fp8,0,1.7573173840840657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,float16,0,5.994117101033528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,float16,0,1.367685317993164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,64,0,1,fp8,fp8,0,5.391786575317383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,fp8,0,6.037338892618815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,float16,0,6.054981231689453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,fp8,0,1.3693067232767742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,64,128,1,fp8,fp8,0,1.774629275004069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,64,0,1,fp8,fp8,0,5.427120208740234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,fp8,0,6.013754526774089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,float16,0,1.4010987281799316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,float16,0,5.955130894978841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,fp8,0,1.3793333371480305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,64,128,1,fp8,fp8,0,1.8020906448364258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,float16,0,0.7528533140818278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,fp8,0,5.942197163899739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,64,0,1,fp8,fp8,0,5.4792531331380205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,fp8,0,0.7540640036265055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,float16,0,6.024426778157552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,64,128,1,fp8,fp8,0,0.9937012990315756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,float16,0,3.046191851298014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,64,0,1,fp8,fp8,0,5.514213562011719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,fp8,0,0.6789920330047607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,fp8,0,6.004330952962239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,float16,0,0.6912639935811361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,fp8,0,3.0485973358154297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,64,0,1,fp8,fp8,0,2.834143956502279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,float16,0,3.0162026087443032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,64,128,1,fp8,fp8,0,0.8937919934590658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,float16,0,0.6996906598409017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,fp8,0,0.6845813592274984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,fp8,0,3.0003414154052734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,64,0,1,fp8,fp8,0,2.7444159189860025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,64,128,1,fp8,fp8,0,0.9011572996775309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,fp8,0,3.0073067347208657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,float16,0,0.7043680349985758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,float16,0,2.994469324747721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,fp8,0,0.6956586837768555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,64,0,1,fp8,fp8,0,2.754255930582682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,float16,0,2.983173370361328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,64,128,1,fp8,fp8,0,0.9084106286366781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,float16,0,0.7110293706258138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,fp8,0,3.025066693623861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,fp8,0,0.7078773180643717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,64,0,1,fp8,fp8,0,2.783386548360189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,64,128,1,fp8,fp8,0,0.9265279769897461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,float16,0,3.025343894958496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,fp8,0,3.0288960138956704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,float16,0,0.4101066589355469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,fp8,0,0.41444798310597736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,64,0,1,fp8,fp8,0,2.776639938354492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,float16,0,1.604159990946452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,64,128,1,fp8,fp8,0,0.5382453203201294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,fp8,0,1.5877493222554524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,float16,0,0.3863733212153117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,64,0,1,fp8,fp8,0,1.4331146876017253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,float16,0,1.5598613421122234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,fp8,0,0.3794986804326375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,64,128,1,fp8,fp8,0,0.49318401018778485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,fp8,0,1.555616060892741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,float16,0,0.3877280155817668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,64,0,1,fp8,fp8,0,1.3800212542215984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,fp8,0,0.3815679947535197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,float16,0,1.5499040285746257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,64,128,1,fp8,fp8,0,0.4961386521657308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,fp8,0,1.5574560165405273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,float16,0,0.3898186683654785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,64,0,1,fp8,fp8,0,1.388271967569987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,fp8,0,0.38517332077026367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,64,128,1,fp8,fp8,0,0.49883735179901123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,float16,0,1.563978672027588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,fp8,0,1.5608000755310059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,float16,0,0.3930879831314087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,64,0,1,fp8,fp8,0,1.3899626731872559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,fp8,0,0.39071468512217206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,64,128,1,fp8,fp8,0,0.5043786764144897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,fp8,0,0.2755840023358663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,float16,0,1.5699520111083984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,float16,0,0.2757973273595174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,fp8,0,1.5685173670450847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,64,0,1,fp8,fp8,0,1.3969279925028484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,float16,0,0.8363573551177979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,64,128,1,fp8,fp8,0,0.33586132526397705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,float16,0,0.2739466627438863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,64,0,1,fp8,fp8,0,0.7749333381652832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,fp8,0,0.8366666634877523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,float16,0,0.8412213325500488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,fp8,0,0.2747146685918172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,64,128,1,fp8,fp8,0,0.3339039882024129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,fp8,0,0.8371573289235433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,float16,0,0.2749493320782979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,64,0,1,fp8,fp8,0,0.7673172950744629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,float16,0,0.8376586437225342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,fp8,0,0.2742026646931966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,64,0,1,fp8,fp8,0,0.7708693345387777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,64,128,1,fp8,fp8,0,0.33579198519388836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,fp8,0,0.8371679782867432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,float16,0,0.27499733368555707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,fp8,0,0.2749759952227275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,float16,0,0.83842666943868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,64,128,1,fp8,fp8,0,0.333957314491272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,fp8,0,0.8373599847157797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,float16,0,0.27509866158167523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,64,0,1,fp8,fp8,0,0.7711520195007324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,fp8,0,0.8354986508687338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,fp8,0,0.2749013304710388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,64,128,1,fp8,fp8,0,0.3335253397623698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,float16,0,0.8366933663686117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,64,0,1,fp8,fp8,0,0.7727200190226237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,float16,0,2.1035893758138022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,fp8,0,2.072986602783203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,64,128,1,fp8,fp8,0,2.7301012674967446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,float16,0,2.1431733767191568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,fp8,0,2.122682730356852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,64,128,1,fp8,fp8,0,2.7549972534179688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,float16,0,7.915568033854167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,64,0,1,fp8,fp8,0,7.120543797810872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,float16,0,2.1634079615275064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,fp8,0,7.9182078043619795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,float16,0,8.07248560587565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,fp8,0,2.164501349131266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,fp8,0,7.89027214050293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,64,128,1,fp8,fp8,0,2.7789653142293296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,64,0,1,fp8,fp8,0,7.153877258300781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,float16,0,2.217290719350179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,float16,0,7.93724250793457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,fp8,0,2.208672046661377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,64,128,1,fp8,fp8,0,2.8181918462117515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,float16,0,1.1965760389963787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,fp8,0,7.944570541381836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,fp8,0,1.170192003250122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,64,0,1,fp8,fp8,0,7.243333180745442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,64,128,1,fp8,fp8,0,1.545471986134847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,float16,0,4.093008041381836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,float16,0,8.022554397583008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,float16,0,1.0586293538411458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,fp8,0,7.952277501424153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,64,0,1,fp8,fp8,0,7.2973175048828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,fp8,0,1.0524266560872395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,64,0,1,fp8,fp8,0,3.7410561243693032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,fp8,0,4.106922785441081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,float16,0,3.971258799235026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,float16,0,1.0750400225321453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,64,128,1,fp8,fp8,0,1.3944533665974934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,fp8,0,1.060655991236369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,fp8,0,3.9430507024129233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,64,0,1,fp8,fp8,0,3.619562784830729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,64,128,1,fp8,fp8,0,1.407914638519287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,float16,0,3.978245417277018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,float16,0,1.0900800228118896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,fp8,0,3.953136126200358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,64,0,1,fp8,fp8,0,3.618405342102051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,fp8,0,1.080624024073283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,64,128,1,fp8,fp8,0,1.4225066502888997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,float16,0,3.929840087890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,float16,0,1.1093013286590576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,fp8,0,1.1023786862691243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,fp8,0,3.99509334564209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,64,0,1,fp8,fp8,0,3.652895927429199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,64,128,1,fp8,fp8,0,1.4406879742940266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,float16,0,4.029354731241862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,float16,0,0.6016159852345785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,fp8,0,0.6105653444925944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,64,128,1,fp8,fp8,0,0.7930346330006918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,float16,0,2.06113068262736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,fp8,0,3.9713172912597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,64,0,1,fp8,fp8,0,3.6710453033447266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,fp8,0,2.0639626185099282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,float16,0,0.5599466562271118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,64,0,1,fp8,fp8,0,1.9164586067199707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,fp8,0,0.5534293254216512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,64,128,1,fp8,fp8,0,0.7274506886800131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,float16,0,1.9986079533894856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,float16,0,0.5631733338038126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,fp8,0,1.9957440694173176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,fp8,0,0.551695982615153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,64,0,1,fp8,fp8,0,1.852783997853597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,64,128,1,fp8,fp8,0,0.7318933010101318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,float16,0,2.0091733932495117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,float16,0,0.5633120139439901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,fp8,0,1.992693265279134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,fp8,0,0.5615679820378622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,64,0,1,fp8,fp8,0,1.8561760584513347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,64,128,1,fp8,fp8,0,0.7297759850819906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,float16,0,2.0120479265848794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,fp8,0,2.0041653315226235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,float16,0,0.5696106751759847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,fp8,0,0.5680053234100342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,64,0,1,fp8,fp8,0,1.8488960266113281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,float16,0,2.016650676727295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,64,128,1,fp8,fp8,0,0.7491412957509359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,float16,0,0.33212266365687054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,fp8,0,2.00218137105306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,64,0,1,fp8,fp8,0,1.8674933115641277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,float16,0,1.071295976638794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,fp8,0,0.3378346761067708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,64,128,1,fp8,fp8,0,0.4348906675974528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,float16,0,0.312063992023468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,fp8,0,1.0771413644154866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,64,0,1,fp8,fp8,0,0.9672693411509196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,fp8,0,0.3096426725387573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,64,128,1,fp8,fp8,0,0.4018239974975586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,float16,0,1.0474080244700115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,fp8,0,1.0535413424173992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,64,0,1,fp8,fp8,0,0.9301973183949789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,float16,0,0.3146880070368449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,float16,0,1.0538933277130127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,fp8,0,0.31107733647028607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,64,128,1,fp8,fp8,0,0.4057600100835164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,fp8,0,1.0522613525390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,float16,0,0.3160159985224406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,64,0,1,fp8,fp8,0,0.9274133046468099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,fp8,0,0.3122239907582601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,float16,0,1.0582506656646729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,64,128,1,fp8,fp8,0,0.4077119827270508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,fp8,0,1.0610346794128418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,float16,0,0.31754134098688763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,64,0,1,fp8,fp8,0,0.9373439947764078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,fp8,0,0.317194660504659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,float16,0,1.0674080053965251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,fp8,0,0.22637865940729776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,64,128,1,fp8,fp8,0,0.41276800632476807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,fp8,0,1.0592640240987141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,float16,0,0.22652800877888998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,64,0,1,fp8,fp8,0,0.9418400128682455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,float16,0,0.5766880114873251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,64,128,1,fp8,fp8,0,0.27553067604700726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,fp8,0,0.5791786511739095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,64,0,1,fp8,fp8,0,0.5341546535491943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,float16,0,0.22431466976801553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,fp8,0,0.22606933116912842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,64,0,1,fp8,fp8,0,0.5311413208643595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,float16,0,0.5746933221817017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,64,128,1,fp8,fp8,0,0.27365867296854657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,fp8,0,0.5742666721343994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,float16,0,0.22442134221394858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,fp8,0,0.22515199581782022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,float16,0,0.5760053396224976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,64,128,1,fp8,fp8,0,0.2755733331044515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,fp8,0,0.5746186574300131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,64,0,1,fp8,fp8,0,0.531269351641337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,float16,0,0.22497065862019858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,fp8,0,0.22533865769704184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,float16,0,0.5745706558227539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,64,128,1,fp8,fp8,0,0.27391467491785687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,fp8,0,0.5802666743596395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,64,0,1,fp8,fp8,0,0.5363680124282837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,float16,0,0.22565333048502603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,float16,0,0.575381318728129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,fp8,0,0.22576000293095908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,float16,0,1.5778133074442546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,64,128,1,fp8,fp8,0,0.2748746673266093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,fp8,0,0.5746399958928426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,64,0,1,fp8,fp8,0,0.5326026678085327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,fp8,0,1.5448479652404785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,64,128,1,fp8,fp8,0,2.0378986994425454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,float16,0,4.642640113830566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,float16,0,1.6072853406270344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,fp8,0,1.5904746055603027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,64,128,1,fp8,fp8,0,2.074181397755941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,64,0,1,fp8,fp8,0,4.262607892354329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,fp8,0,4.6701812744140625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,float16,0,4.777957280476888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,float16,0,1.6234720547993977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,fp8,0,1.6099680264790852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,fp8,0,4.678789456685384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,64,0,1,fp8,fp8,0,4.296106656392415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,64,128,1,fp8,fp8,0,2.1164533297220864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,float16,0,4.795056025187175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,float16,0,1.653429349263509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,64,0,1,fp8,fp8,0,4.312197367350261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,fp8,0,1.6371946334838867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,fp8,0,4.7209014892578125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,64,128,1,fp8,fp8,0,2.1486454010009766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,float16,0,0.8814666271209717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,float16,0,4.709669431050618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,fp8,0,0.8813333511352539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,64,128,1,fp8,fp8,0,1.1568373044331868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,float16,0,2.423290729522705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,64,0,1,fp8,fp8,0,4.359008153279622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,fp8,0,4.75767453511556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,fp8,0,2.4127999941507974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,float16,0,0.795039971669515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,fp8,0,0.78111465771993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,float16,0,2.3592480023701987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,64,0,1,fp8,fp8,0,2.2759626706441245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,64,128,1,fp8,fp8,0,1.0381333033243816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,fp8,0,0.7906453609466553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,fp8,0,2.345242659250895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,64,0,1,fp8,fp8,0,2.1674720446268716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,float16,0,0.8013652960459391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,float16,0,2.341957410176595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,64,128,1,fp8,fp8,0,1.0453120072682698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,fp8,0,2.319690704345703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,float16,0,0.806826670964559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,64,0,1,fp8,fp8,0,2.162773291269938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,fp8,0,0.7954880396525065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,float16,0,0.8139146963755289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,64,128,1,fp8,fp8,0,1.0554186503092449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,float16,0,2.3530453046162925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,fp8,0,2.3603572845458984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,64,0,1,fp8,fp8,0,2.1757920583089194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,fp8,0,0.8112106323242188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,float16,0,2.3646346728006997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,64,128,1,fp8,fp8,0,1.0766560236612956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,float16,0,0.45629334449768066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,fp8,0,2.3515146573384604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,64,0,1,fp8,fp8,0,2.191786607106527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,fp8,0,0.4617760181427002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,float16,0,1.2498026688893635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,64,128,1,fp8,fp8,0,0.5994133154551188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,fp8,0,1.2529333432515461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,float16,0,0.4241600036621094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,64,0,1,fp8,fp8,0,1.166767994562785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,fp8,0,0.4163413445154826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,float16,0,1.2259626388549805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,fp8,0,0.420693318049113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,float16,0,1.217029333114624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,64,128,1,fp8,fp8,0,0.5527146657307943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,64,128,1,fp8,fp8,0,0.5500320196151733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,fp8,0,1.2112053235371907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,float16,0,0.42785600821177167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,64,0,1,fp8,fp8,0,1.1155680020650227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,fp8,0,1.2090293566385906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,float16,0,0.42905600865681964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,64,0,1,fp8,fp8,0,1.1189546585083008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,fp8,0,0.4238133430480957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,float16,0,1.2186986605326335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,64,128,1,fp8,fp8,0,0.5597333510716757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,fp8,0,1.212154706319173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,64,0,1,fp8,fp8,0,1.1226879755655925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,float16,0,0.4345066547393799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,float16,0,0.255295991897583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,fp8,0,0.43209067980448407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,float16,0,1.225061337153117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,64,128,1,fp8,fp8,0,0.5658346811930338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,fp8,0,1.219930648803711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,64,0,1,fp8,fp8,0,1.139408032099406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,float16,0,0.6608746846516927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,fp8,0,0.25967466831207275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,64,128,1,fp8,fp8,0,0.33774932225545246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,fp8,0,0.6649706761042277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,64,0,1,fp8,fp8,0,0.5944426854451498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,float16,0,0.23928000529607138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,float16,0,0.6474719842274984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,fp8,0,0.2388746738433838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,64,128,1,fp8,fp8,0,0.3111039996147156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,fp8,0,0.644373337427775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,64,0,1,fp8,fp8,0,0.5731306473414103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,float16,0,0.24091200033823648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,float16,0,0.6510986487070719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,fp8,0,0.2384106715520223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,64,128,1,fp8,fp8,0,0.31285866101582843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,fp8,0,0.6507306496302286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,64,0,1,fp8,fp8,0,0.5716373523076376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,float16,0,0.243669331073761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,float16,0,0.6504640181859335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,fp8,0,0.23995200792948404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,64,128,1,fp8,fp8,0,0.31385600566864014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,fp8,0,0.6484213272730509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,64,0,1,fp8,fp8,0,0.5752053260803223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,64,0,1,fp8,fp8,0,0.5766880114873251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,float16,0,0.2450666626294454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,fp8,0,0.17656532923380533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,float16,0,0.6585973501205444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,fp8,0,0.24386133750279745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,64,128,1,fp8,fp8,0,0.31948800881703693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,fp8,0,0.6540213425954183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,float16,0,0.17642132441202799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,float16,0,0.36471466223398846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,64,128,1,fp8,fp8,0,0.21726399660110474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,fp8,0,0.3648746808369954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,64,0,1,fp8,fp8,0,0.3397013346354167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,float16,0,0.17345066865285239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,float16,0,0.3605813185373942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,fp8,0,0.17438934246699014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,64,128,1,fp8,fp8,0,0.21040000518163046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,fp8,0,0.3633333444595337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,64,0,1,fp8,fp8,0,0.3375093142191569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,float16,0,0.1741973360379537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,float16,0,0.36294933160146076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,fp8,0,0.17427200078964233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,64,128,1,fp8,fp8,0,0.21384533246358237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,fp8,0,0.3608693281809489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,64,128,1,fp8,fp8,0,0.21412267287572226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,64,0,1,fp8,fp8,0,0.3378239870071411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,float16,0,0.1753013332684835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,float16,0,0.36243732770284015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,fp8,0,0.17455466588338217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,fp8,0,0.3617440064748128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,64,0,1,fp8,fp8,0,0.3375360171000163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,float16,0,0.17464532454808554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,64,0,1,fp8,fp8,0,0.33749866485595703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,float16,0,0.3645973205566406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,fp8,0,0.1753066579500834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,64,128,1,fp8,fp8,0,0.21618666251500449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,fp8,0,0.3628480037053426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,float16,0,4.609312057495117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,float16,0,2.079871972401937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,fp8,0,2.052741368611654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,64,128,1,fp8,fp8,0,2.6828746795654297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,fp8,0,2.1262879371643066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,fp8,0,4.5575253168741865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,64,0,1,fp8,fp8,0,4.208154678344727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,float16,0,2.1342239379882812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,float16,0,4.604954719543457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,64,128,1,fp8,fp8,0,2.726703961690267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,float16,0,2.1433493296305337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,fp8,0,4.709477424621582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,64,0,1,fp8,fp8,0,4.260442733764648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,fp8,0,2.1246347427368164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,float16,0,4.652528127034505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,64,128,1,fp8,fp8,0,2.765391985575358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,float16,0,2.185317357381185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,fp8,0,4.60372257232666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,64,0,1,fp8,fp8,0,4.259903907775879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,fp8,0,2.166869322458903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,64,128,1,fp8,fp8,0,2.796559969584147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,float16,0,1.1751893361409504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,float16,0,4.7639360427856445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,float16,0,2.4277706146240234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,fp8,0,4.6827999750773115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,64,0,1,fp8,fp8,0,4.350565274556478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,64,0,1,fp8,fp8,0,2.274522622426351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,fp8,0,1.15829332669576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,64,128,1,fp8,fp8,0,1.5072852770487468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,fp8,0,2.4048852920532227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,float16,0,1.0424426396687825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,fp8,0,1.0235146681467693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,float16,0,2.2793280283610025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,fp8,0,2.2669173876444497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,float16,0,2.3007787068684897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,64,128,1,fp8,fp8,0,1.3553333282470703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,float16,0,1.0534133116404216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,64,0,1,fp8,fp8,0,2.11079470316569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,fp8,0,1.0384693145751953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,64,128,1,fp8,fp8,0,1.3820746739705403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,fp8,0,2.2795626322428384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,64,0,1,fp8,fp8,0,2.138746738433838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,float16,0,1.0608266989390056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,float16,0,2.322453339894613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,fp8,0,1.0504266421000164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,64,128,1,fp8,fp8,0,1.390336036682129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,fp8,0,2.302549362182617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,float16,0,1.0782612959543865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,fp8,0,1.0697973569234211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,64,0,1,fp8,fp8,0,2.1575892766316733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,float16,0,2.342288017272949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,64,128,1,fp8,fp8,0,1.4255199432373047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,float16,0,0.5866346756617228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,fp8,0,0.5880000193913778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,fp8,0,2.3160160382588706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,float16,0,1.213215986887614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,64,0,1,fp8,fp8,0,2.179807980855306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,64,128,1,fp8,fp8,0,0.7683946291605631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,fp8,0,1.2272106806437175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,64,0,1,fp8,fp8,0,1.1545120080312092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,float16,0,0.5365226666132609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,float16,0,1.173103968302409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,fp8,0,0.5252799987792969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,64,128,1,fp8,fp8,0,0.6980533599853516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,fp8,0,1.1698346932729085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,float16,0,0.5404800176620483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,64,0,1,fp8,fp8,0,1.0873386859893799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,float16,0,1.173360029856364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,fp8,0,0.5312266747156779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,64,128,1,fp8,fp8,0,0.7001333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,float16,0,0.5424799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,fp8,0,1.1661492983500164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,64,0,1,fp8,fp8,0,1.091930627822876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,fp8,0,0.5349546670913696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,float16,0,0.5520266691843668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,float16,0,1.184880018234253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,64,128,1,fp8,fp8,0,0.7121813297271729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,fp8,0,1.1720746358235676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,64,0,1,fp8,fp8,0,1.088917334874471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,fp8,0,0.5493013461430868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,float16,0,1.1821226278940837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,64,128,1,fp8,fp8,0,0.7221386432647705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,fp8,0,1.1812427043914795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,64,128,1,fp8,fp8,0,0.41526933511098224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,float16,0,0.31293867031733197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,fp8,0,0.640778660774231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,64,0,1,fp8,fp8,0,1.1101120313008626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,float16,0,0.6359306573867798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,fp8,0,0.3160373369852702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,float16,0,0.29176533222198486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,64,0,1,fp8,fp8,0,0.5829813480377197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,fp8,0,0.2863146662712097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,float16,0,0.6220853328704834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,64,128,1,fp8,fp8,0,0.37720000743865967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,fp8,0,0.6175359884897867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,64,0,1,fp8,fp8,0,0.5505919853846232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,float16,0,0.29293866952260333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,float16,0,0.61954132715861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,fp8,0,0.2876159946123759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,64,128,1,fp8,fp8,0,0.3817760149637858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,fp8,0,0.6172320048014323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,64,0,1,fp8,fp8,0,0.5519786675771078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,float16,0,0.2951786716779073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,float16,0,0.6213173468907675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,fp8,0,0.28970134258270264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,64,128,1,fp8,fp8,0,0.38388800621032715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,fp8,0,0.6172906557718912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,float16,0,0.29993067185084027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,64,0,1,fp8,fp8,0,0.5584959983825684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,float16,0,0.6217973232269287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,fp8,0,0.2972106734911601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,64,128,1,fp8,fp8,0,0.38889066378275555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,fp8,0,0.6182613372802734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,float16,0,0.1779413421948751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,64,0,1,fp8,fp8,0,0.5634133418401083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,float16,0,0.32920000950495404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,fp8,0,0.1816426714261373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,64,128,1,fp8,fp8,0,0.23591466744740805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,fp8,0,0.3340959946314494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,64,0,1,fp8,fp8,0,0.3131519953409831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,float16,0,0.16671999295552573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,float16,0,0.3174720009167989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,fp8,0,0.16674667596817017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,fp8,0,0.16715733210245767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,64,128,1,fp8,fp8,0,0.21818133195241293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,fp8,0,0.3178826570510864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,64,0,1,fp8,fp8,0,0.296453336874644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,float16,0,0.16847467422485352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,float16,0,0.31782400608062744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,64,128,1,fp8,fp8,0,0.21940267086029053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,fp8,0,0.31825600067774457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,64,0,1,fp8,fp8,0,0.2962026596069336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,float16,0,0.16943466663360596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,float16,0,0.32081600030263263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,fp8,0,0.1686240037282308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,64,128,1,fp8,fp8,0,0.22259199619293213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,fp8,0,0.31778132915496826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,64,0,1,fp8,fp8,0,0.2993066708246867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,float16,0,0.172106663386027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,float16,0,0.32286399602890015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,fp8,0,0.17231466372807822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,64,128,1,fp8,fp8,0,0.2255679965019226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,fp8,0,0.32049065828323364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,64,0,1,fp8,fp8,0,0.3035573363304138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,float16,0,0.1267733375231425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,float16,0,0.20004800955454508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,fp8,0,0.12686399618784586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,64,128,1,fp8,fp8,0,0.1585813363393148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,fp8,0,0.20047465960184732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,64,0,1,fp8,fp8,0,0.18925867478052774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,float16,0,0.12263466914494832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,float16,0,0.1977120041847229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,fp8,0,0.12224533160527547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,64,128,1,fp8,fp8,0,0.1455573340257009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,fp8,0,0.197434663772583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,64,0,1,fp8,fp8,0,0.1851253310839335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,float16,0,0.12288533647855122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,float16,0,0.19708265860875449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,fp8,0,0.12268799543380737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,64,128,1,fp8,fp8,0,0.14546133081118265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,fp8,0,0.19692800442377725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,64,0,1,fp8,fp8,0,0.1859253247578939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,float16,0,0.12257066369056702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,float16,0,0.19755200544993082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,fp8,0,0.12390399972597758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,64,128,1,fp8,fp8,0,0.15069866180419922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,fp8,0,0.19825067122777304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,64,0,1,fp8,fp8,0,0.18835733334223428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,float16,0,0.12405866384506226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,float16,0,0.19910933574040732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,fp8,0,0.12478933731714885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,64,128,1,fp8,fp8,0,0.1548479994138082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,fp8,0,0.1993173360824585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,64,0,1,fp8,fp8,0,0.1869706710179647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,float16,0,1.5403146743774414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,fp8,0,1.5112959543863933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,float16,0,2.823989232381185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,64,128,1,fp8,fp8,0,2.010144074757894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,float16,0,1.5721813837687175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,fp8,0,2.7901172637939453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,64,0,1,fp8,fp8,0,2.6213706334431968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,fp8,0,1.5445547103881836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,float16,0,2.8504533767700195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,64,128,1,fp8,fp8,0,2.039034684499105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,float16,0,1.5863946278889973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,fp8,0,2.841205279032389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,64,0,1,fp8,fp8,0,2.648709297180176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,fp8,0,1.560197353363037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,64,128,1,fp8,fp8,0,2.0569067001342773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,float16,0,2.8796908060709634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,fp8,0,2.8638827006022134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,float16,0,1.6128320693969727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,64,0,1,fp8,fp8,0,2.67137082417806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,fp8,0,1.6145119667053223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,float16,0,2.891317367553711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,float16,0,0.8773919741312662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,64,128,1,fp8,fp8,0,2.0897653897603354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,fp8,0,2.882394790649414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,64,0,1,fp8,fp8,0,2.7155946095784507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,fp8,0,0.862015962600708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,float16,0,1.519386609395345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,64,128,1,fp8,fp8,0,1.1365493138631184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,float16,0,0.7717599868774414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,fp8,0,1.503434658050537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,64,0,1,fp8,fp8,0,1.4354079564412434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,fp8,0,0.7666400273640951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,float16,0,1.4207626978556316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,fp8,0,0.7737279733022054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,64,128,1,fp8,fp8,0,1.0121440092722576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,fp8,0,1.4077280362447102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,float16,0,0.781493345896403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,64,0,1,fp8,fp8,0,1.3205760320027669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,float16,0,1.4262666702270508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,64,128,1,fp8,fp8,0,1.0272586345672607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,fp8,0,1.4171199798583984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,64,0,1,fp8,fp8,0,1.325050671895345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,float16,0,0.7885973453521729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,fp8,0,0.7753600279490153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,float16,0,1.4314346313476562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,64,128,1,fp8,fp8,0,1.0367733637491863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,fp8,0,1.423375924428304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,float16,0,0.8005066712697347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,64,0,1,fp8,fp8,0,1.3416479428609211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,fp8,0,0.7959573268890381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,64,0,1,fp8,fp8,0,1.3654133478800456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,float16,0,1.4449599583943684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,64,128,1,fp8,fp8,0,1.0508106549580891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,float16,0,0.4437013467152913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,fp8,0,1.4333866437276204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,float16,0,0.7702399889628092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,fp8,0,0.4480266571044922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,64,128,1,fp8,fp8,0,0.5844266812006632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,fp8,0,0.7776799996693929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,64,0,1,fp8,fp8,0,0.7330026626586914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,float16,0,0.40459732214609784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,fp8,0,0.4011840025583903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,float16,0,0.7376800378163656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,64,128,1,fp8,fp8,0,0.5295519828796387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,fp8,0,0.7341492970784506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,64,0,1,fp8,fp8,0,0.6831519603729248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,float16,0,0.40908801555633545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,float16,0,0.74125870068868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,fp8,0,0.40299201011657715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,64,128,1,fp8,fp8,0,0.5341066519419352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,fp8,0,0.7331519921620687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,64,0,1,fp8,fp8,0,0.6843732992808024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,float16,0,0.41447468598683673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,fp8,0,0.41025598843892414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,float16,0,0.7424853642781576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,64,128,1,fp8,fp8,0,0.5417919953664144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,fp8,0,0.7350239753723145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,64,0,1,fp8,fp8,0,0.688256025314331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,float16,0,0.4181813398996989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,float16,0,0.7466453711191813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,fp8,0,0.4174026648203532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,64,128,1,fp8,fp8,0,0.5513279835383097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,fp8,0,0.7475679715474447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,float16,0,0.24012267589569092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,64,0,1,fp8,fp8,0,0.6946612993876139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,float16,0,0.41098666191101074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,fp8,0,0.24390933911005655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,64,128,1,fp8,fp8,0,0.32130134105682373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,fp8,0,0.4143306811650594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,64,0,1,fp8,fp8,0,0.3774506648381551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,float16,0,0.22276800870895386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,float16,0,0.39230934778849286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,fp8,0,0.2194826602935791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,64,128,1,fp8,fp8,0,0.2932479977607727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,fp8,0,0.3922773202260335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,64,128,1,fp8,fp8,0,0.2929973403612773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,64,0,1,fp8,fp8,0,0.35234665870666504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,float16,0,0.22369066874186197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,float16,0,0.3944213390350342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,fp8,0,0.22082134087880453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,fp8,0,0.3919573227564494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,64,0,1,fp8,fp8,0,0.35461334387461346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,float16,0,0.22678399085998535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,float16,0,0.3978773355484009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,float16,0,0.40241066614786786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,fp8,0,0.22431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,64,128,1,fp8,fp8,0,0.296725332736969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,fp8,0,0.3949120044708252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,64,0,1,fp8,fp8,0,0.35284264882405597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,float16,0,0.2285919984181722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,fp8,0,0.22849599520365396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,64,128,1,fp8,fp8,0,0.301744004090627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,fp8,0,0.4020853439966838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,64,0,1,fp8,fp8,0,0.361786683400472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,float16,0,0.14065600434939066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,float16,0,0.21423467000325522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,fp8,0,0.14268267154693604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,64,128,1,fp8,fp8,0,0.18660799662272134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,fp8,0,0.21674132347106934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,64,0,1,fp8,fp8,0,0.2066719929377238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,float16,0,0.13082666198412576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,float16,0,0.2076693375905355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,fp8,0,0.13058132926623026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,64,128,1,fp8,fp8,0,0.17055465777715048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,fp8,0,0.20428800582885742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,64,0,1,fp8,fp8,0,0.19395732879638672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,float16,0,0.13083199659983316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,float16,0,0.20577067136764526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,fp8,0,0.13130666812260947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,64,128,1,fp8,fp8,0,0.17292799552281699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,fp8,0,0.20548800627390543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,64,0,1,fp8,fp8,0,0.19363200664520264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,float16,0,0.13167466719945273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,float16,0,0.20651733875274658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,fp8,0,0.13159466783205667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,64,128,1,fp8,fp8,0,0.17442667484283447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,fp8,0,0.20696000258127847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,64,0,1,fp8,fp8,0,0.19766932725906372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,float16,0,0.13424000144004822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,float16,0,0.20878400405248007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,fp8,0,0.13379733761151633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,64,128,1,fp8,fp8,0,0.17727466424306235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,fp8,0,0.20938666661580405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,64,0,1,fp8,fp8,0,0.19874666134516397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,float16,0,0.10086400310198466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,float16,0,0.13780267039934793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,fp8,0,0.1004960040251414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,64,128,1,fp8,fp8,0,0.12446399529774983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,fp8,0,0.13637866576512656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,64,0,1,fp8,fp8,0,0.12917332847913107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,float16,0,0.09709333380063374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,float16,0,0.1344266633192698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,fp8,0,0.09801066915194194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,64,128,1,fp8,fp8,0,0.11556266744931538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,fp8,0,0.1341759959856669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,64,0,1,fp8,fp8,0,0.12707733114560446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,float16,0,0.09770666559537251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,float16,0,0.13478933771451315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,fp8,0,0.09760000308354695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,64,128,1,fp8,fp8,0,0.11592533191045125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,fp8,0,0.13499733805656433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,64,0,1,fp8,fp8,0,0.1272479991118113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,float16,0,0.09733333190282185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,float16,0,0.1338986655076345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,fp8,0,0.09718933701515198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,64,128,1,fp8,fp8,0,0.11572266618410747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,fp8,0,0.13457600275675455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,64,0,1,fp8,fp8,0,0.12718933820724487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,float16,0,0.09803199768066406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,float16,0,0.1351146697998047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,fp8,0,0.0976639986038208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,64,128,1,fp8,fp8,0,0.11567466457684834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,fp8,0,0.13430933157602945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,64,0,1,fp8,fp8,0,0.12801067034403482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,float16,0,2.053376038869222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,fp8,0,2.027989387512207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,fp8,0,2.9576905568440757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,float16,0,2.9787467320760093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,64,128,1,fp8,fp8,0,2.6272853215535483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,float16,0,2.033877372741699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,64,0,1,fp8,fp8,0,2.7209227879842124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,fp8,0,2.0141654014587402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,float16,0,2.972976048787435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,64,128,1,fp8,fp8,0,2.6761013666788735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,fp8,0,2.957285245259603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,float16,0,2.0570507049560547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,64,0,1,fp8,fp8,0,2.7940905888875327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,fp8,0,2.0341973304748535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,float16,0,2.99344539642334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,64,128,1,fp8,fp8,0,2.679562568664551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,float16,0,2.0938560167948403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,fp8,0,2.9731038411458335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,64,0,1,fp8,fp8,0,2.7925707499186196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,float16,0,3.03819211324056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,fp8,0,2.090005397796631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,float16,0,1.147157351175944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,float16,0,1.6150026321411133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,64,128,1,fp8,fp8,0,2.750282605489095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,fp8,0,3.0157814025878906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,64,0,1,fp8,fp8,0,2.854522705078125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,fp8,0,1.1399306456247966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,64,128,1,fp8,fp8,0,1.4941600163777669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,64,0,1,fp8,fp8,0,1.5380053520202637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,float16,0,1.0161226590474446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,fp8,0,1.6114293734232585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,float16,0,1.486394723256429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,fp8,0,1.4663200378417969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,float16,0,1.0297333399454753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,fp8,0,0.9940160115559896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,64,128,1,fp8,fp8,0,1.3173173268636067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,64,0,1,fp8,fp8,0,1.3685439427693684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,fp8,0,1.0126826763153076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,64,128,1,fp8,fp8,0,1.3281760215759277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,float16,0,1.501157283782959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,fp8,0,1.4823360443115234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,float16,0,1.5111680030822754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,64,0,1,fp8,fp8,0,1.3799840609232585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,float16,0,1.0284053484598796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,fp8,0,1.0216533342997234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,64,128,1,fp8,fp8,0,1.3430453936258953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,fp8,0,1.4900906880696614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,64,0,1,fp8,fp8,0,1.400602658589681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,float16,0,1.0543253421783447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,fp8,0,1.045408010482788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,64,0,1,fp8,fp8,0,1.4323840141296387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,float16,0,1.5235679944356282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,64,128,1,fp8,fp8,0,1.3747092882792156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,fp8,0,1.51473601659139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,float16,0,0.5766826470692953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,float16,0,0.8184266885121664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,fp8,0,0.5777813196182251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,64,128,1,fp8,fp8,0,0.7502346833546957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,fp8,0,0.8147520224253336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,64,0,1,fp8,fp8,0,0.778165340423584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,float16,0,0.5194613138834635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,fp8,0,0.5129546721776327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,float16,0,0.7585493723551432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,64,128,1,fp8,fp8,0,0.6827946503957113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,fp8,0,0.7539520263671875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,64,0,1,fp8,fp8,0,0.7003146807352701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,float16,0,0.5258346796035767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,float16,0,0.7693119843800863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,fp8,0,0.5177386601765951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,64,128,1,fp8,fp8,0,0.684005339940389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,fp8,0,0.7554559707641602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,64,0,1,fp8,fp8,0,0.7107253074645996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,float16,0,0.531274676322937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,64,0,1,fp8,fp8,0,0.711903969446818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,float16,0,0.5372960170110067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,float16,0,0.7672533194224039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,fp8,0,0.524346669514974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,64,128,1,fp8,fp8,0,0.6946400006612142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,fp8,0,0.757322629292806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,float16,0,0.7789386908213297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,fp8,0,0.5348000129063925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,64,128,1,fp8,fp8,0,0.7029226620992025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,fp8,0,0.7718186378479004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,64,0,1,fp8,fp8,0,0.7252586682637533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,float16,0,0.30400000015894574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,float16,0,0.4270346562067668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,fp8,0,0.3044106761614482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,64,128,1,fp8,fp8,0,0.4012639919916789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,fp8,0,0.42984533309936523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,64,0,1,fp8,fp8,0,0.3965280055999756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,float16,0,0.2781706651051839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,float16,0,0.3999253511428833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,fp8,0,0.27462400992711383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,64,128,1,fp8,fp8,0,0.3661493460337321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,fp8,0,0.39454933007558185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,64,0,1,fp8,fp8,0,0.3630933364232381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,float16,0,0.28119999170303345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,float16,0,0.4035786787668864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,fp8,0,0.27531200647354126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,64,128,1,fp8,fp8,0,0.36627201239267987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,fp8,0,0.4011146624883016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,64,0,1,fp8,fp8,0,0.3654400110244751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,float16,0,0.28254934151967365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,float16,0,0.4071040153503418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,fp8,0,0.27958933512369794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,64,128,1,fp8,fp8,0,0.3694933255513509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,fp8,0,0.4026879866917928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,64,0,1,fp8,fp8,0,0.36618133385976154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,float16,0,0.2860959966977437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,float16,0,0.4069066842397054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,fp8,0,0.2866080005963643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,64,128,1,fp8,fp8,0,0.37645332018534344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,fp8,0,0.40769068400065106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,64,0,1,fp8,fp8,0,0.3726133505503337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,float16,0,0.16744534174601236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,float16,0,0.22528000672658285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,fp8,0,0.17041067282358804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,64,128,1,fp8,fp8,0,0.22489066918691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,fp8,0,0.22764267524083456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,64,0,1,fp8,fp8,0,0.21474667390187582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,float16,0,0.15524799625078836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,64,0,1,fp8,fp8,0,0.1965013345082601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,float16,0,0.2069973349571228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,fp8,0,0.1532533367474874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,64,128,1,fp8,fp8,0,0.2062399983406067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,fp8,0,0.20684266090393066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,float16,0,0.15606932838757834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,float16,0,0.20919466018676758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,fp8,0,0.15436266859372458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,64,128,1,fp8,fp8,0,0.20615466435750326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,fp8,0,0.20694400866826376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,64,0,1,fp8,fp8,0,0.19705599546432495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,float16,0,0.15819199879964194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,float16,0,0.2098026672999064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,fp8,0,0.1548746625582377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,64,128,1,fp8,fp8,0,0.21037866671880087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,fp8,0,0.20683199167251587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,64,0,1,fp8,fp8,0,0.20000000794728598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,float16,0,0.15969600280125937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,float16,0,0.21346133947372437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,fp8,0,0.1600053310394287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,64,128,1,fp8,fp8,0,0.21278399229049683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,fp8,0,0.21210134029388428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,64,0,1,fp8,fp8,0,0.202239990234375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,float16,0,0.10195733110109965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,float16,0,0.12504532933235168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,fp8,0,0.10300266742706299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,64,128,1,fp8,fp8,0,0.13528533776601157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,fp8,0,0.12643200159072876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,64,0,1,fp8,fp8,0,0.11156800389289856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,64,0,1,fp8,fp8,0,0.1227839986483256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,float16,0,0.09248000383377075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,float16,0,0.09319999814033508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,float16,0,0.11726933717727661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,fp8,0,0.09283733367919922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,64,128,1,fp8,fp8,0,0.11691199739774068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,fp8,0,0.11705600221951802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,float16,0,0.11889599760373433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,fp8,0,0.09238400061925252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,64,128,1,fp8,fp8,0,0.12412800391515096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,fp8,0,0.11757866541544597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,64,0,1,fp8,fp8,0,0.11018666625022888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,float16,0,0.09373866518338521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,float16,0,0.11819733182589214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,fp8,0,0.09245866537094116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,64,128,1,fp8,fp8,0,0.12084800004959106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,fp8,0,0.11805333693822224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,64,0,1,fp8,fp8,0,0.11337066690127055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,float16,0,0.09589333335558574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,float16,0,0.11940800150235494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,fp8,0,0.09618133306503296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,64,128,1,fp8,fp8,0,0.12542933225631714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,fp8,0,0.11942399541536967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,64,0,1,fp8,fp8,0,0.1160640021165212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,float16,0,0.07099199791749318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,float16,0,0.08416533470153809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,fp8,0,0.07129066685835521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,64,128,1,fp8,fp8,0,0.0869813362757365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,fp8,0,0.07056533296902974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,fp8,0,0.0842026670773824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,64,0,1,fp8,fp8,0,0.0803413341442744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,float16,0,0.07073600093523662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,float16,0,0.08370666702588399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,64,128,1,fp8,fp8,0,0.0884320040543874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,64,128,1,fp8,fp8,0,0.08833600083986919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,fp8,0,0.08417600393295288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,fp8,0,0.08307200173536937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,64,0,1,fp8,fp8,0,0.07889600098133087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,float16,0,0.07042666773001353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,float16,0,0.08371733625729878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,fp8,0,0.07092800239721934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,64,0,1,fp8,fp8,0,0.08006399869918823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,float16,0,0.07112533350785573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,float16,0,0.08393067121505737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,fp8,0,0.07086400190989177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,64,128,1,fp8,fp8,0,0.08805867036183675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,64,128,1,fp8,fp8,0,0.08865066369374593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,fp8,0,0.08334933718045552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,64,0,1,fp8,fp8,0,0.07981333136558533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,float16,0,0.07083199918270111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,float16,0,0.08386133114496867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,fp8,0,0.07087466617425282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,fp8,0,0.08309866487979889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,64,0,1,fp8,fp8,0,0.07993599772453308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,float16,0,1.5280319849650066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,fp8,0,1.5083093643188477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,float16,0,1.9477814038594563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,fp8,0,1.9277173678080242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,64,128,1,fp8,fp8,0,1.954319953918457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,64,0,1,fp8,fp8,0,1.7714026769002278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,float16,0,1.5181973775227864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,fp8,0,1.5045119921366374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,float16,0,1.9589866002400715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,fp8,0,1.9260266621907551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,64,128,1,fp8,fp8,0,1.9758453369140625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,float16,0,1.536293347676595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,64,0,1,fp8,fp8,0,1.8181440035502117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,64,128,1,fp8,fp8,0,2.0056479771931968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,fp8,0,1.5234826405843098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,float16,0,1.962234656016032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,fp8,0,1.9456373850504558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,64,0,1,fp8,fp8,0,1.8254186312357585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,float16,0,1.5720267295837402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,fp8,0,1.5629919370015461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,float16,0,2.0127894083658853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,float16,0,0.8564480145772299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,64,128,1,fp8,fp8,0,2.0483627319335938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,fp8,0,1.9826292991638184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,float16,0,1.0788319905598958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,64,0,1,fp8,fp8,0,1.8692213694254558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,fp8,0,0.8530773321787516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,64,128,1,fp8,fp8,0,1.1118133068084717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,float16,0,0.7603733539581299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,fp8,0,1.065951983133952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,64,0,1,fp8,fp8,0,1.0206879774729412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,fp8,0,0.7473440170288086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,float16,0,0.9838666915893555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,64,128,1,fp8,fp8,0,0.9919573465983073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,float16,0,0.768666664759318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,64,0,1,fp8,fp8,0,0.8956106503804525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,fp8,0,0.9659466743469238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,fp8,0,0.7571039994557699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,float16,0,0.9927573204040527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,64,128,1,fp8,fp8,0,1.0014399687449138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,fp8,0,0.9802133242289225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,64,0,1,fp8,fp8,0,0.9064373175303141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,float16,0,0.7767253716786703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,float16,0,0.9945013523101807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,fp8,0,0.9868746598561605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,fp8,0,0.7674880027770996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,64,0,1,fp8,fp8,0,0.9221440156300863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,64,128,1,fp8,fp8,0,1.0136480331420898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,float16,0,0.7912267049153646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,fp8,0,0.7870506445566813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,float16,0,1.0067520141601562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,64,128,1,fp8,fp8,0,1.030618667602539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,fp8,0,1.0044053395589192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,float16,0,0.4384053150812785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,64,0,1,fp8,fp8,0,0.938202699025472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,float16,0,0.5500373442967733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,float16,0,0.5088266531626383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,fp8,0,0.43621333440144855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,64,128,1,fp8,fp8,0,0.5687093337376913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,fp8,0,0.546288013458252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,64,0,1,fp8,fp8,0,0.5179413159688314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,float16,0,0.39527467886606854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,fp8,0,0.3885600169499715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,64,128,1,fp8,fp8,0,0.516432007153829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,fp8,0,0.5011253356933594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,64,0,1,fp8,fp8,0,0.46512532234191895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,float16,0,0.39764265219370526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,float16,0,0.5076106786727905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,fp8,0,0.39393067359924316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,64,128,1,fp8,fp8,0,0.5216586589813232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,fp8,0,0.5026506582895914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,64,0,1,fp8,fp8,0,0.4726826747258504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,float16,0,0.4025706847508748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,float16,0,0.5131040016810099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,fp8,0,0.39792001247406006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,64,128,1,fp8,fp8,0,0.5243733326594034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,fp8,0,0.5069599946339926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,64,0,1,fp8,fp8,0,0.4759840170542399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,float16,0,0.40940264860788983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,float16,0,0.5213653246561686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,fp8,0,0.4081386725107829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,64,128,1,fp8,fp8,0,0.5344213247299194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,fp8,0,0.5168533325195312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,64,0,1,fp8,fp8,0,0.4841759999593099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,float16,0,0.21335999170939127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,float16,0,0.234442671140035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,float16,0,0.29127466678619385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,fp8,0,0.2347253362337748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,64,128,1,fp8,fp8,0,0.30849067370096844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,fp8,0,0.29318400224049884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,64,0,1,fp8,fp8,0,0.2703146735827128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,float16,0,0.2713013291358948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,fp8,0,0.21019200483957926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,64,128,1,fp8,fp8,0,0.28179200490315753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,fp8,0,0.2677173415819804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,64,0,1,fp8,fp8,0,0.24291733900705972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,float16,0,0.2156533400217692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,float16,0,0.27274133761723834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,fp8,0,0.21136534214019775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,64,128,1,fp8,fp8,0,0.2841599980990092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,fp8,0,0.26921067635218304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,64,0,1,fp8,fp8,0,0.24737600485483804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,float16,0,0.21704000234603882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,float16,0,0.2748426596323649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,fp8,0,0.21549334128697714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,64,128,1,fp8,fp8,0,0.28544533252716064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,fp8,0,0.27216533819834393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,64,0,1,fp8,fp8,0,0.24952000379562378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,float16,0,0.2204266587893168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,float16,0,0.2786133289337158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,fp8,0,0.2198986609776815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,64,128,1,fp8,fp8,0,0.2916959921518962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,fp8,0,0.27856000264485675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,64,0,1,fp8,fp8,0,0.2534186641375224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,float16,0,0.13221866885821024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,float16,0,0.15652799606323242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,fp8,0,0.1344053347905477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,64,128,1,fp8,fp8,0,0.17630932728449503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,fp8,0,0.15622933705647787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,64,0,1,fp8,fp8,0,0.14949867129325867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,float16,0,0.12138133247693379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,float16,0,0.14307199915250143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,fp8,0,0.1195146640141805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,64,128,1,fp8,fp8,0,0.1600053310394287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,fp8,0,0.1402346690495809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,64,0,1,fp8,fp8,0,0.13457066814104715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,float16,0,0.12180266777674358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,float16,0,0.1430400013923645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,fp8,0,0.12059199810028076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,64,128,1,fp8,fp8,0,0.16294933358828226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,fp8,0,0.14095999797185263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,64,0,1,fp8,fp8,0,0.13453333576520285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,float16,0,0.12310399611790974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,float16,0,0.12516799569129944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,64,0,1,fp8,fp8,0,0.1379200021425883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,float16,0,0.14355732997258505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,64,128,1,fp8,fp8,0,0.1674399971961975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,fp8,0,0.12089600165685017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,64,128,1,fp8,fp8,0,0.16445866227149963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,fp8,0,0.14300800363222757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,float16,0,0.14634133378664652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,fp8,0,0.12532800436019897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,fp8,0,0.14475199580192566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,64,0,1,fp8,fp8,0,0.14073066910107931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,float16,0,0.08052266637484233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,float16,0,0.09025599559148152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,fp8,0,0.08120533327261607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,64,128,1,fp8,fp8,0,0.10997333129247029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,fp8,0,0.09085866808891296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,64,0,1,fp8,fp8,0,0.09004799524943034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,float16,0,0.0735040009021759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,float16,0,0.08496000369389851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,fp8,0,0.0739573339621226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,64,128,1,fp8,fp8,0,0.09398399790128072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,fp8,0,0.08462933699289958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,64,0,1,fp8,fp8,0,0.08055466910203297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,float16,0,0.07338666419188182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,float16,0,0.08571199576059978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,fp8,0,0.07382399837176006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,64,128,1,fp8,fp8,0,0.09401599566141765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,fp8,0,0.08553066849708557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,64,0,1,fp8,fp8,0,0.08041599889596303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,float16,0,0.07332266867160797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,float16,0,0.08542933066685994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,fp8,0,0.07341333230336507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,64,128,1,fp8,fp8,0,0.0944160024325053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,fp8,0,0.0849120020866394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,64,0,1,fp8,fp8,0,0.08045333127180736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,64,0,1,fp8,fp8,0,0.08261333405971527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,float16,0,0.07481066882610321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,float16,0,0.08573333422342937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,fp8,0,0.07407466570536296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,64,128,1,fp8,fp8,0,0.09855467081069946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,fp8,0,0.08627733588218689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,float16,0,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,float16,0,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,fp8,0,0.05807999769846598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,fp8,0,0.05776533484458923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,64,128,1,fp8,fp8,0,0.06887466708819072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,fp8,0,0.06401066482067108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,64,0,1,fp8,fp8,0,0.06011733412742615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,float16,0,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,float16,0,0.063701331615448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,fp8,0,0.0580266664425532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,fp8,0,0.06379733482996623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,64,128,1,fp8,fp8,0,0.06789333124955495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,fp8,0,0.06359466910362244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,64,0,1,fp8,fp8,0,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,float16,0,0.0577706644932429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,float16,0,0.0635040005048116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,64,128,1,fp8,fp8,0,0.06811733543872833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,64,0,1,fp8,fp8,0,0.060191998879114784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,float16,0,0.058037335673967995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,float16,0,0.0632533331712087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,fp8,0,0.05798399945100149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,64,128,1,fp8,fp8,0,0.06804800033569336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,fp8,0,0.063701331615448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,64,0,1,fp8,fp8,0,0.06005866825580597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,64,0,1,fp8,fp8,0,0.060506666700045265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,float16,0,0.05841066439946493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,float16,0,1.8496853510538738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,float16,0,0.06358399987220764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,fp8,0,0.058330665032068886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,64,128,1,fp8,fp8,0,0.06824000179767609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,fp8,0,0.06363200147946675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,float16,0,2.100154717763265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,fp8,0,1.8396906852722168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,64,128,1,fp8,fp8,0,2.446565310160319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,fp8,0,2.0983306566874185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,64,0,1,fp8,fp8,0,1.9026293754577637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,float16,0,1.8641972541809082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,float16,0,2.1259306271870932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,fp8,0,1.8602186838785808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,64,128,1,fp8,fp8,0,2.4730025927225747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,64,0,1,fp8,fp8,0,1.9455413818359375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,fp8,0,2.121888001759847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,float16,0,1.9080746968587239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,float16,0,2.1572532653808594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,fp8,0,1.9177385965983074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,64,128,1,fp8,fp8,0,2.5796000162760415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,fp8,0,2.1634666124979653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,float16,0,1.9479680061340332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,64,0,1,fp8,fp8,0,2.0431092580159507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,float16,0,2.202768007914225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,fp8,0,1.9407307306925456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,64,128,1,fp8,fp8,0,2.555583953857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,fp8,0,2.2128960291544595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,64,0,1,fp8,fp8,0,2.0323146184285483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,float16,0,1.0235146681467693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,float16,0,0.9375786781311035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,float16,0,1.1429973443349202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,fp8,0,1.0065813064575195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,64,128,1,fp8,fp8,0,1.3144480387369792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,fp8,0,1.1319146951039631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,64,0,1,fp8,fp8,0,1.0399359862009685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,float16,0,1.0598773161570232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,fp8,0,0.9312160015106201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,64,128,1,fp8,fp8,0,1.2380106449127197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,fp8,0,1.0599253177642822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,64,0,1,fp8,fp8,0,0.9658133188883463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,float16,0,0.9437066713968912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,float16,0,1.067733367284139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,fp8,0,0.943178653717041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,fp8,0,1.0698400338490803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,64,128,1,fp8,fp8,0,1.2534613609313965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,64,0,1,fp8,fp8,0,0.9815573692321777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,float16,0,0.9616320133209229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,float16,0,1.0924800237019856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,fp8,0,0.9638933340708414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,64,128,1,fp8,fp8,0,1.2957706451416016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,fp8,0,1.0881120363871257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,64,0,1,fp8,fp8,0,1.023909330368042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,fp8,0,0.9697706699371338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,float16,0,0.9681599934895834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,float16,0,1.0999360084533691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,64,128,1,fp8,fp8,0,1.287882645924886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,float16,0,0.5230026642481486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,fp8,0,1.0998186270395915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,64,0,1,fp8,fp8,0,1.0132799943288167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,fp8,0,0.5168533325195312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,float16,0,0.5846666495005289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,float16,0,0.5454826752344767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,64,128,1,fp8,fp8,0,0.6795360247294108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,fp8,0,0.583461324373881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,64,0,1,fp8,fp8,0,0.5319360097249349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,float16,0,0.4806613524754842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,fp8,0,0.481663982073466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,64,128,1,fp8,fp8,0,0.6344053347905477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,fp8,0,0.5424000024795532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,64,0,1,fp8,fp8,0,0.49669865767161053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,float16,0,0.48501332600911456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,float16,0,0.5511519908905029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,fp8,0,0.4846133391062419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,64,128,1,fp8,fp8,0,0.6410186688105265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,fp8,0,0.5484053293863932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,64,0,1,fp8,fp8,0,0.49945068359375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,float16,0,0.49299200375874835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,float16,0,0.5607893466949463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,fp8,0,0.4936319986979167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,64,128,1,fp8,fp8,0,0.6658240159352621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,fp8,0,0.5591040054957072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,64,0,1,fp8,fp8,0,0.520250678062439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,float16,0,0.4956586758295695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,float16,0,0.5589333375295004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,float16,0,0.3095466693242391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,fp8,0,0.4973546663920085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,64,128,1,fp8,fp8,0,0.6621013482411703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,fp8,0,0.5629760026931763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,float16,0,0.27750933170318604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,64,0,1,fp8,fp8,0,0.5189599990844727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,fp8,0,0.27432533105214435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,64,128,1,fp8,fp8,0,0.3526080052057902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,fp8,0,0.30717867612838745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,64,0,1,fp8,fp8,0,0.27079999446868896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,float16,0,0.252074658870697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,float16,0,0.28814399242401123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,fp8,0,0.2521066665649414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,64,128,1,fp8,fp8,0,0.3369226853052775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,fp8,0,0.2847893238067627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,64,0,1,fp8,fp8,0,0.25279466311136883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,float16,0,0.25417067607243854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,float16,0,0.28775999943415326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,fp8,0,0.2557706634203593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,64,128,1,fp8,fp8,0,0.33842666943868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,fp8,0,0.28732800483703613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,64,0,1,fp8,fp8,0,0.2556160092353821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,float16,0,0.25867732365926105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,float16,0,0.29203200340270996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,fp8,0,0.25987199942270917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,64,128,1,fp8,fp8,0,0.344218651453654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,fp8,0,0.2923520008722941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,64,0,1,fp8,fp8,0,0.26190932591756183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,float16,0,0.260042667388916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,float16,0,0.29316266377766925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,fp8,0,0.25921066602071124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,64,128,1,fp8,fp8,0,0.3445599873860677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,fp8,0,0.2945493261019389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,64,0,1,fp8,fp8,0,0.26181334257125854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,float16,0,0.15398400028546652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,float16,0,0.16697067022323608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,fp8,0,0.15204800168673197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,64,128,1,fp8,fp8,0,0.19497066736221313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,fp8,0,0.16566399733225504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,64,0,1,fp8,fp8,0,0.1465013325214386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,float16,0,0.1393173336982727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,float16,0,0.15127467115720114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,fp8,0,0.13985600074132284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,64,128,1,fp8,fp8,0,0.1846026579538981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,fp8,0,0.15064533551534018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,64,0,1,fp8,fp8,0,0.13860799868901572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,float16,0,0.14010666807492575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,float16,0,0.15288000305493674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,fp8,0,0.14099199573198953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,64,128,1,fp8,fp8,0,0.1875306765238444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,fp8,0,0.1527733306090037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,64,0,1,fp8,fp8,0,0.139082670211792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,float16,0,0.14379733800888062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,float16,0,0.15544533729553223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,fp8,0,0.14485866824785867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,64,128,1,fp8,fp8,0,0.19020267327626547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,fp8,0,0.15618667006492615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,64,0,1,fp8,fp8,0,0.14140799641609192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,float16,0,0.14479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,float16,0,0.156031996011734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,fp8,0,0.14552533626556396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,64,128,1,fp8,fp8,0,0.1904053290685018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,64,0,1,fp8,fp8,0,0.14388799667358398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,fp8,0,0.1565600037574768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,float16,0,0.08975999553998311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,float16,0,0.09327466289202373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,fp8,0,0.08929066856702168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,64,128,1,fp8,fp8,0,0.11514666676521301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,fp8,0,0.09240532914797465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,64,0,1,fp8,fp8,0,0.08524266878763835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,float16,0,0.078015998005867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,float16,0,0.08478933572769165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,fp8,0,0.07797866563002269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,64,128,1,fp8,fp8,0,0.10249066352844238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,fp8,0,0.08524266878763835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,fp8,0,0.08416533470153809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,64,0,1,fp8,fp8,0,0.07671999931335449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,float16,0,0.07947733501593272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,float16,0,0.08535466591517131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,fp8,0,0.07980266710122426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,64,128,1,fp8,fp8,0,0.10470933715502422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,64,0,1,fp8,fp8,0,0.07814399898052216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,float16,0,0.08088000118732452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,float16,0,0.08642666538556416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,fp8,0,0.0817440003156662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,64,128,1,fp8,fp8,0,0.10757866501808167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,fp8,0,0.08737599849700928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,64,0,1,fp8,fp8,0,0.07979733248551686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,float16,0,0.08221333225568135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,float16,0,0.08754666646321614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,fp8,0,0.08274133503437042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,64,128,1,fp8,fp8,0,0.10966400305430095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,fp8,0,0.0886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,64,0,1,fp8,fp8,0,0.08065600196520488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,float16,0,0.05269333223501841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,float16,0,0.054714664816856384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,fp8,0,0.0529120018084844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,64,128,1,fp8,fp8,0,0.0693333347638448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,fp8,0,0.05403733253479004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,64,0,1,fp8,fp8,0,0.050800000627835594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,float16,0,0.04942933221658071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,float16,0,0.05170666674772898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,64,128,1,fp8,fp8,0,0.06666133304437001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,fp8,0,0.051274667183558144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,64,0,1,fp8,fp8,0,0.04736533264319102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,float16,0,0.050186668833096824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,float16,0,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,fp8,0,0.04964800179004669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,64,128,1,fp8,fp8,0,0.06625066697597504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,64,128,1,fp8,fp8,0,0.0666240006685257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,fp8,0,0.051818668842315674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,64,0,1,fp8,fp8,0,0.0492799977461497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,float16,0,0.05138133466243744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,float16,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,fp8,0,0.05100800096988678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,fp8,0,0.05231466889381409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,64,0,1,fp8,fp8,0,0.049733335773150124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,float16,0,0.051882664362589516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,float16,0,0.0536053329706192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,fp8,0,0.05203733344872793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,64,128,1,fp8,fp8,0,0.06549866497516632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,fp8,0,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,64,0,1,fp8,fp8,0,0.04897066454092661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,float16,0,0.0379573330283165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,float16,0,0.03774933268626531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,64,128,1,fp8,fp8,0,0.04334400097529093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,fp8,0,0.037733333806196846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,64,128,1,fp8,fp8,0,0.04256533086299896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,64,0,1,fp8,fp8,0,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,float16,0,0.03851199895143509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,float16,0,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,fp8,0,0.036101333796978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,fp8,0,0.03591466695070267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,64,0,1,fp8,fp8,0,0.03405333310365677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,float16,0,0.03626133253177007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,float16,0,0.035775999228159584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,fp8,0,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,float16,0,0.03656533360481262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,64,128,1,fp8,fp8,0,0.04274133344491323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,fp8,0,0.03669866671164831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,64,0,1,fp8,fp8,0,0.03419733295838038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,float16,0,0.03846399982770284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,fp8,0,0.03613866617282232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,64,128,1,fp8,fp8,0,0.04322666426499685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,fp8,0,0.03681066632270813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,64,0,1,fp8,fp8,0,0.03522666543722153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,float16,0,0.037018666664759316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,float16,0,0.037178667883078255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,fp8,0,0.03686933219432831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,1,64,128,1,float16,float16,0,1.7466932932535808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,64,128,1,fp8,fp8,0,0.04348266621430715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,fp8,0,0.03686933219432831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,64,0,1,fp8,fp8,0,0.03502399971087774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,1,64,0,1,float16,float16,0,1.7218987147013347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,1,64,128,1,float16,fp8,0,1.7434773445129395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,1,64,0,1,float16,fp8,0,1.7208479245503743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,1,64,128,1,fp8,fp8,0,2.3031999270121255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,1,64,0,1,fp8,fp8,0,1.5485547383626301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,2,64,128,1,float16,float16,0,1.7665972709655762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,2,64,0,1,float16,float16,0,1.7595252990722656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,2,64,128,1,float16,fp8,0,1.7714667320251465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,2,64,128,1,fp8,fp8,0,2.326869328816732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,2,64,0,1,float16,fp8,0,1.7432799339294434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,2,64,0,1,fp8,fp8,0,1.5887200037638347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,4,64,128,1,float16,float16,0,1.816090742746989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,4,64,0,1,float16,float16,0,1.799034595489502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,4,64,128,1,float16,fp8,0,1.8091999689737956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,4,64,0,1,float16,fp8,0,1.7936639785766602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,4,64,128,1,fp8,fp8,0,2.4316959381103516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,4,64,0,1,fp8,fp8,0,1.6607519785563152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,8,64,128,1,float16,float16,0,1.846549352010091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,8,64,0,1,float16,float16,0,1.834320068359375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,8,64,128,1,float16,fp8,0,1.8535572687784831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,8,64,128,1,fp8,fp8,0,2.41646401087443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,8,64,0,1,float16,fp8,0,1.8316906293233235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,24,64,128,1,float16,float16,0,0.9788213570912679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,24,64,0,1,float16,float16,0,0.9585386912027994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,8,64,0,1,fp8,fp8,0,1.6665172576904297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,24,64,128,1,float16,fp8,0,0.9571626981099447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,24,64,128,1,fp8,fp8,0,1.2422293027242024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,24,64,0,1,float16,fp8,0,0.94541335105896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,24,64,0,1,fp8,fp8,0,0.8570346832275391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,1,64,128,1,float16,float16,0,0.8858826955159506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,1,64,0,1,float16,float16,0,0.8748266696929932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,1,64,128,1,float16,fp8,0,0.8862613042195638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,1,64,128,1,fp8,fp8,0,1.1599093278249104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,1,64,0,1,float16,fp8,0,0.8714133103688558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,1,64,0,1,fp8,fp8,0,0.7791146437327067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,2,64,128,1,float16,float16,0,0.8940746784210205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,2,64,0,1,float16,float16,0,0.882042646408081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,2,64,128,1,float16,fp8,0,0.8927413622538248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,2,64,128,1,fp8,fp8,0,1.18558931350708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,2,64,0,1,float16,fp8,0,0.8787786960601807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,2,64,0,1,fp8,fp8,0,0.7946720123291016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,4,64,0,1,float16,fp8,0,0.9032373428344727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,4,64,128,1,float16,float16,0,0.9150239626566569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,4,64,0,1,float16,float16,0,0.9079679648081461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,4,64,128,1,float16,fp8,0,0.9146773020426432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,4,64,128,1,fp8,fp8,0,1.2217013041178386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,4,64,0,1,fp8,fp8,0,0.8417812983194987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,8,64,128,1,float16,float16,0,0.9175946712493896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,8,64,0,1,float16,float16,0,0.9107999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,8,64,128,1,float16,fp8,0,0.9223466714223226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,8,64,128,1,fp8,fp8,0,1.2176480293273926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,8,64,0,1,float16,fp8,0,0.9124639828999838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,8,64,0,1,fp8,fp8,0,0.8333333333333334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,24,64,128,1,float16,float16,0,0.5017120043436686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,24,64,0,1,float16,float16,0,0.4938933451970418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,24,64,128,1,float16,fp8,0,0.49454398949941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,24,64,128,1,fp8,fp8,0,0.6357866525650024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,24,64,0,1,float16,fp8,0,0.48425066471099854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,24,64,0,1,fp8,fp8,0,0.43644265333811444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,1,64,128,1,float16,float16,0,0.45748265584309894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,1,64,0,1,float16,float16,0,0.44926400979359943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,1,64,128,1,float16,fp8,0,0.45744001865386963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,1,64,128,1,fp8,fp8,0,0.6017920176188151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,1,64,0,1,float16,fp8,0,0.4482986529668172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,1,64,0,1,fp8,fp8,0,0.40115733941396076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,2,64,128,1,float16,float16,0,0.4611999988555908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,2,64,0,1,float16,float16,0,0.4525279998779297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,2,64,128,1,float16,fp8,0,0.46299731731414795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,2,64,128,1,fp8,fp8,0,0.6077333291371664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,2,64,0,1,float16,fp8,0,0.4535573323567708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,2,64,0,1,fp8,fp8,0,0.40747201442718506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,4,64,128,1,float16,float16,0,0.47095998128255206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,4,64,0,1,float16,float16,0,0.462282657623291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,4,64,128,1,float16,fp8,0,0.4710986614227295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,4,64,128,1,fp8,fp8,0,0.6222240130106608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,4,64,0,1,float16,fp8,0,0.4623519976933797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,4,64,0,1,fp8,fp8,0,0.42770667870839435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,8,64,128,1,float16,float16,0,0.47419734795888263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,8,64,0,1,float16,fp8,0,0.4636533260345459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,8,64,0,1,float16,float16,0,0.46353598435719806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,8,64,128,1,float16,fp8,0,0.4725653330485026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,8,64,128,1,fp8,fp8,0,0.6203946669896444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,24,64,128,1,float16,float16,0,0.26765867074330646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,24,64,0,1,float16,fp8,0,0.2564693291982015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,8,64,0,1,fp8,fp8,0,0.42420800526936847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,24,64,0,1,float16,float16,0,0.2603893280029297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,24,64,128,1,float16,fp8,0,0.26414400339126587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,24,64,128,1,fp8,fp8,0,0.33293332656224567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,24,64,0,1,fp8,fp8,0,0.22470933198928833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,1,64,128,1,float16,float16,0,0.2421600023905436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,1,64,0,1,float16,float16,0,0.23549334208170572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,1,64,128,1,float16,fp8,0,0.24276266495386759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,2,64,128,1,float16,fp8,0,0.24313066403071085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,1,64,128,1,fp8,fp8,0,0.3150720000267029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,1,64,0,1,float16,fp8,0,0.23386667172114053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,1,64,0,1,fp8,fp8,0,0.20860799153645834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,2,64,128,1,float16,float16,0,0.24158932765324911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,4,64,0,1,float16,float16,0,0.24179200331370035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,2,64,0,1,float16,float16,0,0.2379680077234904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,2,64,128,1,fp8,fp8,0,0.319050669670105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,2,64,0,1,float16,fp8,0,0.2369920015335083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,2,64,0,1,fp8,fp8,0,0.21069333950678507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,4,64,128,1,float16,float16,0,0.2466986576716105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,4,64,128,1,float16,fp8,0,0.24713067213694254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,4,64,128,1,fp8,fp8,0,0.3234826723734538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,4,64,0,1,float16,fp8,0,0.240447998046875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,4,64,0,1,fp8,fp8,0,0.21617066860198975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,8,64,128,1,float16,float16,0,0.2481173276901245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,8,64,0,1,float16,float16,0,0.24355733394622803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,8,64,128,1,float16,fp8,0,0.248906672000885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,8,64,128,1,fp8,fp8,0,0.32316267490386963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,8,64,0,1,float16,fp8,0,0.24361066023508707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,8,64,0,1,fp8,fp8,0,0.21478400627772012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,24,64,128,1,float16,float16,0,0.14784533778826395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,24,64,0,1,float16,float16,0,0.14407466848691305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,24,64,128,1,float16,fp8,0,0.14636799693107605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,24,64,128,1,fp8,fp8,0,0.18705600500106812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,24,64,0,1,float16,fp8,0,0.14246400197347006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,24,64,0,1,fp8,fp8,0,0.12481066584587097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,1,64,128,1,float16,float16,0,0.1330880026022593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,1,64,0,1,float16,float16,0,0.12708800037701926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,2,64,0,1,float16,float16,0,0.1295413374900818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,2,64,128,1,float16,fp8,0,0.1349440018335978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,1,64,128,1,float16,fp8,0,0.13411200046539307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,1,64,128,1,fp8,fp8,0,0.17629865805308023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,1,64,0,1,float16,fp8,0,0.1267626682917277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,1,64,0,1,fp8,fp8,0,0.11588799953460693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,2,64,128,1,float16,float16,0,0.13497066497802734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,2,64,128,1,fp8,fp8,0,0.17748266458511353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,2,64,0,1,float16,fp8,0,0.12915733456611633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,4,64,0,1,float16,fp8,0,0.1309333344300588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,2,64,0,1,fp8,fp8,0,0.11738133430480957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,4,64,128,1,float16,float16,0,0.13726933797200522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,4,64,0,1,float16,float16,0,0.13124799728393555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,4,64,128,1,float16,fp8,0,0.13843733072280884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,4,64,128,1,fp8,fp8,0,0.18085867166519165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,4,64,0,1,fp8,fp8,0,0.11859200398127238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,8,64,0,1,float16,float16,0,0.13313600420951843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,8,64,128,1,float16,float16,0,0.13961066802342734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,8,64,128,1,float16,fp8,0,0.1404266655445099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,8,64,128,1,fp8,fp8,0,0.17947733402252197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,8,64,0,1,float16,fp8,0,0.13225600123405457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,8,64,0,1,fp8,fp8,0,0.1199733316898346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,24,64,128,1,float16,float16,0,0.08754666646321614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,24,64,0,1,float16,float16,0,0.08018133540948232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,24,64,128,1,float16,fp8,0,0.08595200379689534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,24,64,128,1,fp8,fp8,0,0.10934933026631673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,24,64,0,1,float16,fp8,0,0.07978666822115581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,1,64,0,1,float16,fp8,0,0.07162133355935414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,24,64,0,1,fp8,fp8,0,0.07327466706434886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,1,64,128,1,float16,float16,0,0.07513066629568736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,1,64,0,1,float16,float16,0,0.07173333565394084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,1,64,128,1,float16,fp8,0,0.07688533266385396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,1,64,128,1,fp8,fp8,0,0.09691199660301208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,1,64,0,1,fp8,fp8,0,0.06562133133411407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,2,64,128,1,float16,float16,0,0.07716266810894012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,2,64,0,1,float16,float16,0,0.07319466769695282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,2,64,128,1,float16,fp8,0,0.07671999931335449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,2,64,128,1,fp8,fp8,0,0.0997973382472992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,2,64,0,1,float16,fp8,0,0.07236266632874806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,2,64,0,1,fp8,fp8,0,0.06639466683069865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,4,64,128,1,float16,float16,0,0.07925866544246674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,4,64,0,1,float16,float16,0,0.07388799885908763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,4,64,128,1,float16,fp8,0,0.07815466821193695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,8,64,0,1,float16,float16,0,0.07442133128643036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,4,64,128,1,fp8,fp8,0,0.10068800052007039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,4,64,0,1,float16,fp8,0,0.07494933406511943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,4,64,0,1,fp8,fp8,0,0.06743999818960826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,8,64,128,1,float16,float16,0,0.08080000181992848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,8,64,128,1,float16,fp8,0,0.08105066418647766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,8,64,128,1,fp8,fp8,0,0.10467732946077983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,8,64,0,1,float16,fp8,0,0.07491733133792877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,8,64,0,1,fp8,fp8,0,0.06900799771149953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,24,64,128,1,float16,float16,0,0.0525439977645874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,24,64,0,1,float16,float16,0,0.0473280002673467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,24,64,128,1,float16,fp8,0,0.052149335543314614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,1,64,0,1,float16,float16,0,0.04460800190766653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,24,64,128,1,fp8,fp8,0,0.0674186646938324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,24,64,0,1,float16,fp8,0,0.047413334250450134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,24,64,0,1,fp8,fp8,0,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,1,64,128,1,float16,float16,0,0.048810665806134544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,1,64,128,1,float16,fp8,0,0.05268799761931101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,1,64,128,1,fp8,fp8,0,0.06279466549555461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,1,64,0,1,float16,fp8,0,0.045007998744646706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,1,64,0,1,fp8,fp8,0,0.04223466912905375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,2,64,128,1,float16,float16,0,0.04931733508904775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,2,64,0,1,float16,float16,0,0.04465599854787191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,2,64,128,1,float16,fp8,0,0.049626668294270836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,2,64,128,1,fp8,fp8,0,0.06246933341026306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,2,64,0,1,float16,fp8,0,0.044863998889923096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,2,64,0,1,fp8,fp8,0,0.04214933514595032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,4,64,128,1,float16,float16,0,0.05055999755859375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,4,64,0,1,float16,float16,0,0.046351999044418335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,4,64,128,1,float16,fp8,0,0.05056533217430115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,4,64,128,1,fp8,fp8,0,0.06438399851322174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,4,64,0,1,float16,fp8,0,0.04630400240421295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,4,64,0,1,fp8,fp8,0,0.04257600009441376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,8,64,128,1,float16,float16,0,0.051167999704678856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,8,64,0,1,float16,float16,0,0.046629334489504494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,8,64,128,1,float16,fp8,0,0.05106666684150696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,8,64,128,1,fp8,fp8,0,0.06304533282915752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,8,64,0,1,float16,fp8,0,0.04692799846331278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,8,64,0,1,fp8,fp8,0,0.042768001556396484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,24,64,128,1,float16,float16,0,0.03736000011364619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,24,64,0,1,float16,float16,0,0.03454933315515518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,24,64,128,1,float16,fp8,0,0.0378506655494372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,24,64,128,1,fp8,fp8,0,0.043925335009892784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,24,64,0,1,float16,fp8,0,0.0351946676770846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,24,64,0,1,fp8,fp8,0,0.03242666771014532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,1,64,128,1,float16,float16,0,0.035717333356539406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,1,64,0,1,float16,float16,0,0.033189333975315094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,1,64,128,1,float16,fp8,0,0.036144000788529716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,1,64,128,1,fp8,fp8,0,0.04221866528193156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,1,64,0,1,float16,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,1,64,0,1,fp8,fp8,0,0.031770666440327965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,2,64,128,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,2,64,0,1,float16,float16,0,0.03286399940649668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,2,64,128,1,float16,fp8,0,0.03604800005753835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,2,64,128,1,fp8,fp8,0,0.04277333120505015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,2,64,0,1,float16,fp8,0,0.03388266762097677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,2,64,0,1,fp8,fp8,0,0.031104000906149547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,4,64,128,1,float16,float16,0,0.0365280012289683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,4,64,0,1,float16,float16,0,0.03414933383464813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,4,64,128,1,float16,fp8,0,0.03644266724586487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,4,64,128,1,fp8,fp8,0,0.04260799785455068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,4,64,0,1,float16,fp8,0,0.034058667719364166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,4,64,0,1,fp8,fp8,0,0.03205333401759466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,8,64,128,1,float16,float16,0,0.036874666810035706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,24,64,128,1,float16,float16,0,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,8,64,0,1,float16,float16,0,0.033914667864640556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,8,64,128,1,float16,fp8,0,0.036714665591716766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,8,64,128,1,fp8,fp8,0,0.04333333174387614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,8,64,0,1,float16,fp8,0,0.03486400097608566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,8,64,0,1,fp8,fp8,0,0.032074667513370514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,24,64,0,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,1,64,0,1,float16,float16,0,0.02436800052722295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,24,64,128,1,float16,fp8,0,0.027776000400384266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,24,64,128,1,fp8,fp8,0,0.03395200024048487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,24,64,0,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,24,64,0,1,fp8,fp8,0,0.02386133372783661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,1,64,128,1,float16,float16,0,0.02629333237806956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,1,64,128,1,float16,fp8,0,0.02605866640806198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,1,64,128,1,fp8,fp8,0,0.0329066663980484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,1,64,0,1,float16,fp8,0,0.024645333488782246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,1,64,0,1,fp8,fp8,0,0.02386666586001714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,2,64,128,1,float16,float16,0,0.02644266684850057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,2,64,0,1,float16,float16,0,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,2,64,128,1,float16,fp8,0,0.026736001173655193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,2,64,128,1,fp8,fp8,0,0.032645332316557564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,2,64,0,1,float16,fp8,0,0.02425066630045573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,2,64,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,4,64,128,1,float16,float16,0,0.026213333010673523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,4,64,0,1,float16,float16,0,0.02407466620206833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,4,64,128,1,float16,fp8,0,0.026730666557947796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,4,64,128,1,fp8,fp8,0,0.03313600023587545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,4,64,0,1,float16,fp8,0,0.024720000723997753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,4,64,0,1,fp8,fp8,0,0.02369600037733714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,8,64,128,1,float16,float16,0,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,8,64,0,1,float16,float16,0,0.024688000480333965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,8,64,128,1,float16,fp8,0,0.02677333354949951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,8,64,128,1,fp8,fp8,0,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,8,64,0,1,float16,fp8,0,0.02489600082238515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,8,64,0,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,1,64,128,1,float16,float16,0,0.7402826944986979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,1,64,0,1,float16,float16,0,0.7228960196177164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,1,64,128,1,float16,fp8,0,0.7386559645334879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,1,64,128,1,fp8,fp8,0,1.0065066814422607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,1,64,0,1,float16,fp8,0,0.7181173165639242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,1,64,0,1,fp8,fp8,0,0.6671413580576578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,2,64,128,1,float16,float16,0,0.7501653035481771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,2,64,0,1,float16,float16,0,0.7305333614349365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,2,64,128,1,float16,fp8,0,0.7517120043436686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,2,64,128,1,fp8,fp8,0,1.020074685414632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,2,64,0,1,float16,fp8,0,0.7292106946309408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,2,64,0,1,fp8,fp8,0,0.6885973612467448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,4,64,128,1,float16,float16,0,0.7824532985687256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,4,64,0,1,float16,float16,0,0.7602240244547526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,4,64,128,1,float16,fp8,0,0.7751039663950602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,4,64,0,1,float16,fp8,0,0.7559786637624105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,4,64,128,1,fp8,fp8,0,1.0657813549041748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,4,64,0,1,fp8,fp8,0,0.7239253520965576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,8,64,128,1,float16,float16,0,0.7882773081461588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,8,64,0,1,float16,float16,0,0.773535966873169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,8,64,128,1,float16,fp8,0,0.7857546806335449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,8,64,0,1,float16,fp8,0,0.7707253297170004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,8,64,128,1,fp8,fp8,0,1.0618027051289876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,24,64,128,1,float16,float16,0,0.4312693277994792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,8,64,0,1,fp8,fp8,0,0.7234186331431071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,24,64,0,1,float16,float16,0,0.42561066150665283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,24,64,128,1,float16,fp8,0,0.4283359845479329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,24,64,128,1,fp8,fp8,0,0.5468266805013021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,24,64,0,1,float16,fp8,0,0.41630399227142334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,24,64,0,1,fp8,fp8,0,0.3753173351287842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,1,64,128,1,float16,float16,0,0.38309868176778156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,1,64,0,1,float16,float16,0,0.37510399023691815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,1,64,128,1,float16,fp8,0,0.38313599427541095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,1,64,128,1,fp8,fp8,0,0.5156053304672241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,1,64,0,1,float16,fp8,0,0.37194132804870605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,1,64,0,1,fp8,fp8,0,0.343509316444397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,2,64,128,1,float16,float16,0,0.38865065574645996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,2,64,0,1,float16,float16,0,0.37883734703063965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,2,64,128,1,float16,fp8,0,0.3874560197194417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,2,64,128,1,fp8,fp8,0,0.5235146681467692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,2,64,0,1,float16,fp8,0,0.3793066740036011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,2,64,0,1,fp8,fp8,0,0.35339732964833576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,4,64,128,1,float16,float16,0,0.4010293483734131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,4,64,0,1,float16,float16,0,0.39079999923706055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,4,64,128,1,float16,fp8,0,0.399183988571167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,4,64,128,1,fp8,fp8,0,0.5369173288345337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,4,64,0,1,float16,fp8,0,0.3893493413925171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,4,64,0,1,fp8,fp8,0,0.3667626778284709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,8,64,128,1,float16,float16,0,0.40379734834035236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,8,64,0,1,float16,float16,0,0.39684800306955975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,8,64,128,1,float16,fp8,0,0.4031146764755249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,8,64,128,1,fp8,fp8,0,0.5367946624755859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,8,64,0,1,float16,fp8,0,0.3935626745223999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,8,64,0,1,fp8,fp8,0,0.36771198113759357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,24,64,128,1,float16,float16,0,0.23197333017985025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,24,64,0,1,float16,float16,0,0.22791999578475952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,24,64,128,1,float16,fp8,0,0.22907199462254843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,24,64,128,1,fp8,fp8,0,0.2845066587130229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,24,64,0,1,float16,fp8,0,0.2246506611506144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,24,64,0,1,fp8,fp8,0,0.19845867156982422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,1,64,128,1,float16,float16,0,0.200981338818868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,1,64,0,1,float16,float16,0,0.19520533084869385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,1,64,128,1,float16,fp8,0,0.2004586656888326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,1,64,128,1,fp8,fp8,0,0.26759467522303265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,1,64,0,1,float16,fp8,0,0.1950826644897461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,1,64,0,1,fp8,fp8,0,0.1827733318010966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,2,64,128,1,float16,float16,0,0.20338666439056396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,2,64,0,1,fp8,fp8,0,0.1869386633237203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,2,64,0,1,float16,float16,0,0.1974560022354126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,2,64,128,1,float16,fp8,0,0.20267200469970703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,2,64,128,1,fp8,fp8,0,0.26877333720525104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,2,64,0,1,float16,fp8,0,0.19805334011713663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,4,64,128,1,float16,float16,0,0.21077867348988852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,4,64,0,1,float16,float16,0,0.20566932360331217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,4,64,128,1,float16,fp8,0,0.20940266052881876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,4,64,128,1,fp8,fp8,0,0.2754720052083333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,4,64,0,1,float16,fp8,0,0.2039360006650289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,4,64,0,1,fp8,fp8,0,0.1912213365236918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,8,64,128,1,float16,float16,0,0.2121653358141581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,8,64,0,1,float16,float16,0,0.20845866203308105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,8,64,128,1,float16,fp8,0,0.21294933557510376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,8,64,128,1,fp8,fp8,0,0.27664534250895184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,8,64,0,1,float16,fp8,0,0.20762133598327637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,8,64,0,1,fp8,fp8,0,0.1929439902305603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,24,64,128,1,float16,float16,0,0.12984533111254373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,24,64,0,1,float16,float16,0,0.12736533085505167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,24,64,128,1,float16,fp8,0,0.12819733222325644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,24,64,128,1,fp8,fp8,0,0.15242133537928262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,24,64,0,1,float16,fp8,0,0.12737600008646646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,24,64,0,1,fp8,fp8,0,0.11109333237012227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,1,64,128,1,float16,float16,0,0.10782399773597717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,1,64,0,1,float16,float16,0,0.10585066676139832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,1,64,128,1,float16,fp8,0,0.1079253355662028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,1,64,128,1,fp8,fp8,0,0.14479466279347739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,1,64,0,1,float16,fp8,0,0.1049173374970754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,1,64,0,1,fp8,fp8,0,0.10363200306892395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,2,64,128,1,float16,float16,0,0.10968533158302307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,4,64,128,1,float16,float16,0,0.11301333705584209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,2,64,0,1,float16,float16,0,0.1074133316675822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,2,64,128,1,float16,fp8,0,0.10925867160161336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,2,64,128,1,fp8,fp8,0,0.1467466652393341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,2,64,0,1,float16,fp8,0,0.10724266370137532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,2,64,0,1,fp8,fp8,0,0.10514666636784871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,4,64,128,1,float16,fp8,0,0.11286933223406474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,4,64,0,1,float16,float16,0,0.11099732915560405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,4,64,128,1,fp8,fp8,0,0.14872533082962036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,4,64,0,1,float16,fp8,0,0.11024533708890279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,4,64,0,1,fp8,fp8,0,0.10669333736101787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,8,64,128,1,float16,float16,0,0.11691733201344807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,8,64,0,1,float16,float16,0,0.11521066228548686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,8,64,128,1,float16,fp8,0,0.11652800440788269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,8,64,128,1,fp8,fp8,0,0.14776532848676047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,8,64,0,1,float16,fp8,0,0.11499200264612834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,8,64,0,1,fp8,fp8,0,0.10709866881370544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,24,64,128,1,float16,float16,0,0.07419733206431071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,24,64,0,1,fp8,fp8,0,0.06568000217278798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,24,64,0,1,float16,float16,0,0.07671999931335449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,24,64,128,1,float16,fp8,0,0.07292266686757405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,24,64,128,1,fp8,fp8,0,0.08620799581209819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,24,64,0,1,float16,fp8,0,0.07460799813270569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,1,64,128,1,float16,float16,0,0.06301333506902058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,1,64,0,1,float16,float16,0,0.06414400041103363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,1,64,128,1,float16,fp8,0,0.06359999875227611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,1,64,128,1,fp8,fp8,0,0.07761600116888683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,1,64,0,1,float16,fp8,0,0.0642133355140686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,1,64,0,1,fp8,fp8,0,0.05825066566467285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,2,64,128,1,float16,float16,0,0.06355200211207072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,2,64,128,1,float16,fp8,0,0.06364800035953522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,2,64,0,1,float16,float16,0,0.06553600231806438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,2,64,128,1,fp8,fp8,0,0.07952000200748444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,2,64,0,1,float16,fp8,0,0.06543999910354614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,2,64,0,1,fp8,fp8,0,0.05907199780146281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,4,64,0,1,fp8,fp8,0,0.06141333281993866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,4,64,128,1,float16,float16,0,0.06537599861621857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,4,64,0,1,float16,float16,0,0.06683733562628429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,4,64,128,1,float16,fp8,0,0.06492266555627187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,4,64,128,1,fp8,fp8,0,0.08159466584523518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,4,64,0,1,float16,fp8,0,0.06725866595904033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,8,64,128,1,float16,float16,0,0.06730133295059204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,8,64,0,1,float16,float16,0,0.06779733300209045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,8,64,128,1,float16,fp8,0,0.06655466556549072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,24,64,128,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,8,64,128,1,fp8,fp8,0,0.08348799745241801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,24,64,0,1,float16,fp8,0,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,8,64,0,1,float16,fp8,0,0.06798399984836578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,8,64,0,1,fp8,fp8,0,0.06275733311971028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,24,64,128,1,float16,float16,0,0.042352000872294106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,24,64,0,1,float16,float16,0,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,24,64,128,1,fp8,fp8,0,0.04961066444714864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,24,64,0,1,fp8,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,1,64,128,1,float16,float16,0,0.039317332208156586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,1,64,0,1,float16,float16,0,0.037791999677817024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,1,64,128,1,float16,fp8,0,0.03877866764863332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,1,64,128,1,fp8,fp8,0,0.04664533336957296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,1,64,0,1,float16,fp8,0,0.03839999934037527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,1,64,0,1,fp8,fp8,0,0.03711466739575068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,2,64,128,1,float16,float16,0,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,2,64,0,1,float16,float16,0,0.03860799968242645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,2,64,128,1,float16,fp8,0,0.03993066648642222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,2,64,128,1,fp8,fp8,0,0.04780800143877665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,2,64,0,1,float16,fp8,0,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,2,64,0,1,fp8,fp8,0,0.03732266773780187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,4,64,128,1,float16,float16,0,0.04038933416207632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,4,64,0,1,float16,float16,0,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,8,64,0,1,float16,float16,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,4,64,128,1,float16,fp8,0,0.040709334115187325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,4,64,128,1,fp8,fp8,0,0.04859733581542969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,4,64,0,1,float16,fp8,0,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,4,64,0,1,fp8,fp8,0,0.03851733356714249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,8,64,128,1,float16,float16,0,0.04063999901215235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,8,64,128,1,float16,fp8,0,0.04079466561476389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,8,64,128,1,fp8,fp8,0,0.0481279989083608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,8,64,0,1,float16,fp8,0,0.04048533240954081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,8,64,0,1,fp8,fp8,0,0.03809600075085958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,24,64,128,1,float16,float16,0,0.03180799881617228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,24,64,0,1,float16,float16,0,0.03133866687615713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,24,64,128,1,float16,fp8,0,0.031717332700888314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,24,64,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,24,64,0,1,float16,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,24,64,0,1,fp8,fp8,0,0.030373332401116688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,1,64,128,1,float16,float16,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,1,64,0,1,float16,float16,0,0.02881066749493281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,1,64,128,1,float16,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,1,64,128,1,fp8,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,1,64,0,1,float16,fp8,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,1,64,0,1,fp8,fp8,0,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,2,64,128,1,float16,float16,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,2,64,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,2,64,128,1,float16,fp8,0,0.029813334345817566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,2,64,128,1,fp8,fp8,0,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,2,64,0,1,float16,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,2,64,0,1,fp8,fp8,0,0.028757333755493164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,4,64,128,1,float16,float16,0,0.03030933439731598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,4,64,0,1,float16,float16,0,0.02956266701221466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,4,64,128,1,float16,fp8,0,0.030320001145203907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,4,64,128,1,fp8,fp8,0,0.03633599976698557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,8,64,128,1,fp8,fp8,0,0.03681066632270813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,4,64,0,1,float16,fp8,0,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,4,64,0,1,fp8,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,8,64,128,1,float16,float16,0,0.030767999589443207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,8,64,0,1,float16,float16,0,0.029872000217437744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,8,64,128,1,float16,fp8,0,0.03065066784620285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,8,64,0,1,float16,fp8,0,0.03036266565322876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,8,64,0,1,fp8,fp8,0,0.029850666721661884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,24,64,128,1,float16,float16,0,0.02048533285657565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,24,64,0,1,float16,float16,0,0.020293333878119785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,24,64,128,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,24,64,128,1,fp8,fp8,0,0.024890666206677754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,24,64,0,1,float16,fp8,0,0.02035733312368393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,24,64,0,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,1,64,0,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,1,64,128,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,1,64,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,1,64,128,1,float16,fp8,0,0.019845332950353622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,1,64,128,1,fp8,fp8,0,0.02480533222357432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,1,64,0,1,fp8,fp8,0,0.020736000190178554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,2,64,128,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,2,64,0,1,float16,float16,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,2,64,128,1,float16,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,2,64,128,1,fp8,fp8,0,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,2,64,0,1,float16,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,2,64,0,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,4,64,128,1,float16,float16,0,0.019509332875410717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,4,64,0,1,float16,float16,0,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,4,64,128,1,float16,fp8,0,0.02019199977318446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,4,64,128,1,fp8,fp8,0,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,8,64,128,1,fp8,fp8,0,0.02465066562096278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,4,64,0,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,4,64,0,1,fp8,fp8,0,0.020666666328907013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,8,64,128,1,float16,float16,0,0.020010666300853092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,8,64,0,1,float16,float16,0,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,8,64,128,1,float16,fp8,0,0.02027733375628789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,8,64,0,1,float16,fp8,0,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,8,64,0,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,24,64,128,1,float16,float16,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,24,64,0,1,float16,float16,0,0.017893332988023758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,24,64,128,1,float16,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,24,64,128,1,fp8,fp8,0,0.02443733314673106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,24,64,0,1,float16,fp8,0,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,24,64,0,1,fp8,fp8,0,0.02060266708334287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,1,64,128,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,1,64,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,1,64,128,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,1,64,128,1,fp8,fp8,0,0.023898666103680927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,1,64,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,1,64,0,1,fp8,fp8,0,0.02013333390156428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,2,64,128,1,float16,float16,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,2,64,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,4,64,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,2,64,128,1,float16,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,2,64,128,1,fp8,fp8,0,0.02378133436044057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,2,64,0,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,2,64,0,1,fp8,fp8,0,0.019952000429232914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,4,64,128,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,4,64,128,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,8,64,128,1,float16,fp8,0,0.017946666727463405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,4,64,128,1,fp8,fp8,0,0.023765332996845245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,4,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,4,64,0,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,8,64,128,1,float16,float16,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,8,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,8,64,128,1,fp8,fp8,0,0.023984000086784363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,8,64,0,1,float16,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,8,64,0,1,fp8,fp8,0,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,1,64,128,1,float16,float16,0,0.28035734097162884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,1,64,0,1,float16,float16,0,0.2800533374150594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,1,64,128,1,float16,fp8,0,0.27729066212972003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,1,64,128,1,fp8,fp8,0,0.3359520037968953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,1,64,0,1,float16,fp8,0,0.2773226698239644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,1,64,0,1,fp8,fp8,0,0.33479468027750653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,2,64,128,1,float16,float16,0,0.28572267293930054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,2,64,0,1,float16,float16,0,0.2856053312619527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,2,64,128,1,float16,fp8,0,0.2761066754659017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,2,64,128,1,fp8,fp8,0,0.3436533212661743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,2,64,0,1,float16,fp8,0,0.27688533067703247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,2,64,0,1,fp8,fp8,0,0.33852267265319824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,4,64,128,1,float16,float16,0,0.2932746609052022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,4,64,0,1,float16,float16,0,0.2939893404642741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,4,64,128,1,float16,fp8,0,0.2990933259328206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,4,64,128,1,fp8,fp8,0,0.37012799580891925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,4,64,0,1,float16,fp8,0,0.2984480063120524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,4,64,0,1,fp8,fp8,0,0.3707520167032878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,8,64,128,1,float16,float16,0,0.2951893409093221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,8,64,0,1,float16,float16,0,0.2941546638806661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,8,64,128,1,float16,fp8,0,0.29356799523035687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,8,64,128,1,fp8,fp8,0,0.36441067854563397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,8,64,0,1,float16,fp8,0,0.29414933919906616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,24,64,128,1,float16,float16,0,0.17575999101003012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,8,64,0,1,fp8,fp8,0,0.36612268288930255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,24,64,0,1,float16,float16,0,0.1759679913520813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,24,64,128,1,float16,fp8,0,0.17177067200342813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,24,64,128,1,fp8,fp8,0,0.19608000914255777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,24,64,0,1,float16,fp8,0,0.1710453430811564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,24,64,0,1,fp8,fp8,0,0.19619200627009073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,1,64,128,1,float16,float16,0,0.14685333768526712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,1,64,0,1,float16,float16,0,0.14645866552988687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,1,64,128,1,float16,fp8,0,0.14757333199183145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,1,64,128,1,fp8,fp8,0,0.17897067467371622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,1,64,0,1,float16,fp8,0,0.14551466703414917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,1,64,0,1,fp8,fp8,0,0.17940266927083334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,2,64,128,1,float16,float16,0,0.14893333117167154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,2,64,0,1,float16,float16,0,0.14904000361760458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,2,64,128,1,float16,fp8,0,0.1474293371041616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,2,64,128,1,fp8,fp8,0,0.17961599429448447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,2,64,0,1,float16,fp8,0,0.14636266231536865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,2,64,0,1,fp8,fp8,0,0.17906665802001953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,4,64,128,1,float16,float16,0,0.1523253321647644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,4,64,0,1,float16,float16,0,0.15252799789110819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,4,64,128,1,float16,fp8,0,0.1527679959932963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,4,64,128,1,fp8,fp8,0,0.19463467597961426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,4,64,0,1,float16,fp8,0,0.15337066849072775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,4,64,0,1,fp8,fp8,0,0.19320533672968546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,8,64,128,1,float16,float16,0,0.15702399611473083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,8,64,0,1,float16,float16,0,0.15646933515866598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,8,64,128,1,float16,fp8,0,0.15491732954978943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,8,64,128,1,fp8,fp8,0,0.19058666626612344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,8,64,0,1,float16,fp8,0,0.1560640037059784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,8,64,0,1,fp8,fp8,0,0.19096000989278158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,24,64,128,1,float16,float16,0,0.0999893347422282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,24,64,0,1,fp8,fp8,0,0.1111199955145518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,24,64,0,1,float16,float16,0,0.0990826686223348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,24,64,128,1,float16,fp8,0,0.09774933258692424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,24,64,128,1,fp8,fp8,0,0.10922132929166158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,24,64,0,1,float16,fp8,0,0.0969546635945638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,1,64,128,1,float16,float16,0,0.08099199831485748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,1,64,0,1,float16,float16,0,0.08067200084527333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,1,64,128,1,float16,fp8,0,0.08111466467380524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,1,64,128,1,fp8,fp8,0,0.10002133250236511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,1,64,0,1,float16,fp8,0,0.07946133116881053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,1,64,0,1,fp8,fp8,0,0.09940266609191895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,2,64,128,1,float16,float16,0,0.08258666594823201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,2,64,0,1,float16,float16,0,0.08228800197442372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,2,64,128,1,float16,fp8,0,0.08141333361466725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,2,64,0,1,float16,fp8,0,0.08226666847864787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,2,64,128,1,fp8,fp8,0,0.1011253297328949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,2,64,0,1,fp8,fp8,0,0.10046399633089702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,4,64,128,1,float16,float16,0,0.08426666259765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,4,64,0,1,float16,float16,0,0.08475733796755473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,4,64,128,1,float16,fp8,0,0.08513066172599792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,4,64,128,1,fp8,fp8,0,0.10812800129254659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,4,64,0,1,float16,fp8,0,0.08553066849708557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,4,64,0,1,fp8,fp8,0,0.10812266667683919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,8,64,128,1,float16,float16,0,0.08859200278917949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,8,64,0,1,float16,float16,0,0.08704533179601033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,8,64,128,1,float16,fp8,0,0.08731733759244283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,8,64,128,1,fp8,fp8,0,0.10604266325632732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,8,64,0,1,float16,fp8,0,0.08745599786440532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,8,64,0,1,fp8,fp8,0,0.10708266496658325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,24,64,0,1,fp8,fp8,0,0.06412800153096516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,24,64,128,1,float16,float16,0,0.05844266712665558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,24,64,0,1,float16,float16,0,0.05804799993832906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,24,64,128,1,float16,fp8,0,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,24,64,128,1,fp8,fp8,0,0.06388799846172333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,24,64,0,1,float16,fp8,0,0.05608533322811127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,1,64,128,1,float16,float16,0,0.0462719996770223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,1,64,0,1,float16,float16,0,0.04680533210436503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,1,64,128,1,float16,fp8,0,0.04667200148105621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,1,64,128,1,fp8,fp8,0,0.05821333328882853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,1,64,0,1,float16,fp8,0,0.047055999437967934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,1,64,0,1,fp8,fp8,0,0.05811200042565664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,2,64,128,1,float16,float16,0,0.04710400104522705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,2,64,0,1,float16,float16,0,0.04691733419895172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,2,64,128,1,float16,fp8,0,0.04731733103593191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,2,64,128,1,fp8,fp8,0,0.058229332168896995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,2,64,0,1,float16,fp8,0,0.04714666803677877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,4,64,0,1,float16,fp8,0,0.04868799944718679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,2,64,0,1,fp8,fp8,0,0.0584746648867925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,4,64,128,1,float16,float16,0,0.0496319979429245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,4,64,0,1,float16,float16,0,0.049226666490236916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,4,64,128,1,float16,fp8,0,0.04866133133570353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,4,64,128,1,fp8,fp8,0,0.06081599990526835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,4,64,0,1,fp8,fp8,0,0.0609440008799235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,8,64,128,1,float16,float16,0,0.04980266590913137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,8,64,0,1,float16,float16,0,0.05042133231957754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,8,64,128,1,float16,fp8,0,0.04924799998601278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,8,64,128,1,fp8,fp8,0,0.06257600088914235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,8,64,0,1,float16,fp8,0,0.04942933221658071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,8,64,0,1,fp8,fp8,0,0.061386664708455406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,24,64,128,1,float16,float16,0,0.03054933249950409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,24,64,0,1,float16,float16,0,0.03141333411137263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,24,64,128,1,float16,fp8,0,0.030778666337331135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,24,64,128,1,fp8,fp8,0,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,24,64,0,1,float16,fp8,0,0.030767999589443207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,24,64,0,1,fp8,fp8,0,0.039162665605545044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,1,64,128,1,float16,float16,0,0.02827200045188268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,1,64,0,1,float16,float16,0,0.027744000156720478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,1,64,128,1,float16,fp8,0,0.027679999669392902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,1,64,128,1,fp8,fp8,0,0.036474667489528656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,1,64,0,1,float16,fp8,0,0.028607999285062153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,1,64,0,1,fp8,fp8,0,0.03624533365170161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,2,64,128,1,float16,float16,0,0.029088000456492107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,2,64,0,1,float16,float16,0,0.028607999285062153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,2,64,128,1,float16,fp8,0,0.028751999139785767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,4,64,128,1,float16,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,2,64,128,1,fp8,fp8,0,0.03630933413902918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,2,64,0,1,float16,fp8,0,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,2,64,0,1,fp8,fp8,0,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,4,64,128,1,float16,float16,0,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,4,64,0,1,float16,float16,0,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,4,64,128,1,fp8,fp8,0,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,4,64,0,1,float16,fp8,0,0.029792000850041706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,4,64,0,1,fp8,fp8,0,0.03832533210515976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,8,64,128,1,float16,float16,0,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,8,64,0,1,float16,float16,0,0.029919999341169994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,8,64,128,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,8,64,128,1,fp8,fp8,0,0.037962667644023895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,8,64,0,1,float16,fp8,0,0.02985599885384242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,8,64,0,1,fp8,fp8,0,0.03841600070397059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,24,64,128,1,float16,float16,0,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,24,64,0,1,float16,float16,0,0.024090667565663654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,24,64,128,1,float16,fp8,0,0.02497600018978119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,24,64,128,1,fp8,fp8,0,0.02972800036271413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,24,64,0,1,float16,fp8,0,0.02405333270629247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,24,64,0,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,1,64,128,1,float16,float16,0,0.022405333817005157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,1,64,0,1,float16,float16,0,0.02254933367172877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,1,64,128,1,float16,fp8,0,0.022298666338125866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,1,64,128,1,fp8,fp8,0,0.02847466617822647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,2,64,128,1,fp8,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,1,64,0,1,float16,fp8,0,0.022629333039124806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,1,64,0,1,fp8,fp8,0,0.027962667246659596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,2,64,128,1,float16,float16,0,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,2,64,0,1,float16,float16,0,0.022543999056021374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,2,64,128,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,2,64,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,2,64,0,1,fp8,fp8,0,0.0276053324341774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,4,64,128,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,4,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,4,64,128,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,4,64,128,1,fp8,fp8,0,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,4,64,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,8,64,128,1,float16,float16,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,4,64,0,1,fp8,fp8,0,0.028837333122889202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,8,64,0,1,float16,float16,0,0.022704000274340313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,8,64,128,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,8,64,128,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,8,64,0,1,float16,fp8,0,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,8,64,0,1,fp8,fp8,0,0.029701332251230877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,24,64,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,24,64,0,1,float16,float16,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,24,64,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,24,64,128,1,fp8,fp8,0,0.019850666324297588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,24,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,24,64,0,1,fp8,fp8,0,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,1,64,128,1,float16,float16,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,1,64,0,1,float16,float16,0,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,1,64,128,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,1,64,128,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,1,64,0,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,1,64,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,2,64,128,1,float16,float16,0,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,2,64,0,1,float16,float16,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,2,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,2,64,128,1,fp8,fp8,0,0.02040533348917961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,2,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,2,64,0,1,fp8,fp8,0,0.01966399947802226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,4,64,128,1,float16,float16,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,4,64,0,1,float16,float16,0,0.016186666985352833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,4,64,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,4,64,128,1,fp8,fp8,0,0.02013333390156428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,4,64,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,4,64,0,1,fp8,fp8,0,0.0206133338312308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,8,64,128,1,float16,float16,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,8,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,8,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,8,64,128,1,fp8,fp8,0,0.019925333559513092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,8,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,8,64,0,1,fp8,fp8,0,0.020010666300853092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,24,64,128,1,float16,float16,0,0.014602666099866232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,24,64,0,1,float16,float16,0,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,24,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,24,64,128,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,24,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,24,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,1,64,128,1,float16,float16,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,1,64,0,1,float16,float16,0,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,1,64,128,1,float16,fp8,0,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,1,64,128,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,1,64,0,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,1,64,0,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,2,64,128,1,float16,float16,0,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,2,64,0,1,float16,float16,0,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,2,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,2,64,128,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,2,64,0,1,float16,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,2,64,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,4,64,128,1,float16,float16,0,0.014346666634082794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,4,64,0,1,float16,float16,0,0.014389333625634512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,4,64,128,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,4,64,128,1,fp8,fp8,0,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,4,64,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,4,64,0,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,8,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,8,64,0,1,float16,float16,0,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,8,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,8,64,128,1,fp8,fp8,0,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,8,64,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,8,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,24,64,128,1,float16,float16,0,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,24,64,0,1,float16,float16,0,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,24,64,128,1,float16,fp8,0,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,24,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,24,64,0,1,float16,fp8,0,0.01370666672786077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,24,64,0,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,1,64,128,1,float16,float16,0,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,1,64,0,1,float16,float16,0,0.013674666484196981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,1,64,128,1,float16,fp8,0,0.013690666606028875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,1,64,128,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,1,64,0,1,float16,fp8,0,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,1,64,0,1,fp8,fp8,0,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,2,64,128,1,float16,float16,0,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,2,64,0,1,float16,float16,0,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,2,64,128,1,float16,fp8,0,0.013568000247081121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,2,64,128,1,fp8,fp8,0,0.018063999712467194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,2,64,0,1,float16,fp8,0,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,2,64,0,1,fp8,fp8,0,0.0182239996890227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,4,64,128,1,float16,float16,0,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,4,64,0,1,float16,float16,0,0.013408000270525614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,4,64,128,1,float16,fp8,0,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,4,64,128,1,fp8,fp8,0,0.017792000124851864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,4,64,0,1,float16,fp8,0,0.013631999492645264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,4,64,0,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,8,64,128,1,float16,float16,0,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,8,64,0,1,float16,float16,0,0.01381333296497663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,1,64,128,1,float16,float16,0,0.13541866342226663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,8,64,128,1,float16,fp8,0,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,8,64,128,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,8,64,0,1,float16,fp8,0,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,8,64,0,1,fp8,fp8,0,0.017711999515692394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,1,64,0,1,float16,float16,0,0.13591999808947244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,1,64,128,1,float16,fp8,0,0.13582932949066162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,1,64,0,1,float16,fp8,0,0.1360053320725759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,1,64,128,1,fp8,fp8,0,0.24089600642522177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,1,64,0,1,fp8,fp8,0,0.24125866095225015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,2,64,128,1,float16,float16,0,0.13743467132250467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,2,64,0,1,float16,float16,0,0.13763200243314108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,2,64,128,1,float16,fp8,0,0.13523733615875244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,2,64,128,1,fp8,fp8,0,0.23984533548355103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,2,64,0,1,float16,fp8,0,0.1365173359711965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,2,64,0,1,fp8,fp8,0,0.23875200748443604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,4,64,128,1,float16,float16,0,0.14296000202496847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,4,64,0,1,float16,float16,0,0.14292266964912415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,4,64,128,1,float16,fp8,0,0.14346667130788168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,4,64,128,1,fp8,fp8,0,0.2532479961713155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,4,64,0,1,float16,fp8,0,0.14251200358072916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,4,64,0,1,fp8,fp8,0,0.2542933424313863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,8,64,128,1,float16,float16,0,0.14332800110181174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,8,64,0,1,float16,float16,0,0.14331199725468954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,8,64,128,1,float16,fp8,0,0.1425333321094513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,8,64,128,1,fp8,fp8,0,0.25438932577768963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,8,64,0,1,float16,fp8,0,0.1432960033416748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,8,64,0,1,fp8,fp8,0,0.25147199630737305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,24,64,128,1,float16,float16,0,0.09298666318257649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,24,64,0,1,float16,float16,0,0.09334400296211243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,24,64,128,1,float16,fp8,0,0.09063999851544698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,24,64,128,1,fp8,fp8,0,0.14168000221252441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,24,64,0,1,float16,fp8,0,0.09050666292508443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,24,64,0,1,fp8,fp8,0,0.14175466696421304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,1,64,128,1,float16,float16,0,0.0754559983809789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,1,64,0,1,float16,float16,0,0.07561066746711731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,1,64,128,1,float16,fp8,0,0.07543999950091045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,1,64,128,1,fp8,fp8,0,0.13318933049837747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,1,64,0,1,float16,fp8,0,0.07563733557860057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,1,64,0,1,fp8,fp8,0,0.1311360001564026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,2,64,128,1,float16,float16,0,0.07669333120187123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,2,64,0,1,float16,float16,0,0.07696000238259633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,2,64,128,1,float16,fp8,0,0.07517333328723907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,2,64,128,1,fp8,fp8,0,0.13241066535313925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,2,64,0,1,float16,fp8,0,0.07609599828720093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,2,64,0,1,fp8,fp8,0,0.13199466466903687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,4,64,128,1,float16,float16,0,0.08005333443482716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,4,64,0,1,float16,float16,0,0.0803306649128596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,8,64,128,1,float16,float16,0,0.08165866633256276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,8,64,0,1,float16,float16,0,0.0812799980243047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,4,64,128,1,float16,fp8,0,0.08061333497365315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,4,64,128,1,fp8,fp8,0,0.13934933145840964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,4,64,0,1,float16,fp8,0,0.07999466856320699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,4,64,0,1,fp8,fp8,0,0.13876799742380777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,8,64,128,1,float16,fp8,0,0.08197333415349324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,8,64,128,1,fp8,fp8,0,0.13611732920010886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,8,64,0,1,float16,fp8,0,0.08183999856313069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,8,64,0,1,fp8,fp8,0,0.1381013294061025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,24,64,128,1,float16,float16,0,0.05545066793759664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,24,64,0,1,float16,float16,0,0.052943999568621315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,24,64,128,1,float16,fp8,0,0.05108266572157542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,24,64,128,1,fp8,fp8,0,0.08201600114504497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,24,64,0,1,float16,fp8,0,0.050554667909940086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,24,64,0,1,fp8,fp8,0,0.08098133405049641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,1,64,128,1,float16,float16,0,0.043103997906049095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,1,64,0,1,float16,float16,0,0.043247997760772705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,1,64,128,1,float16,fp8,0,0.04340800146261851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,2,64,128,1,float16,fp8,0,0.043285335103670754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,1,64,128,1,fp8,fp8,0,0.07474666833877563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,1,64,0,1,float16,fp8,0,0.04362666606903076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,1,64,0,1,fp8,fp8,0,0.07411199808120728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,2,64,128,1,float16,float16,0,0.044256001710891724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,2,64,0,1,float16,float16,0,0.043712000052134194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,2,64,128,1,fp8,fp8,0,0.07481066882610321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,2,64,0,1,float16,fp8,0,0.04358933369318644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,2,64,0,1,fp8,fp8,0,0.0736053337653478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,4,64,128,1,float16,float16,0,0.04483733574549357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,4,64,0,1,float16,float16,0,0.04507199923197428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,4,64,128,1,float16,fp8,0,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,4,64,128,1,fp8,fp8,0,0.07632000247637431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,4,64,0,1,float16,fp8,0,0.04543466866016388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,4,64,0,1,fp8,fp8,0,0.07692799965540568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,8,64,128,1,float16,float16,0,0.04572266836961111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,8,64,0,1,float16,float16,0,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,8,64,128,1,float16,fp8,0,0.045696000258127846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,8,64,128,1,fp8,fp8,0,0.07902400195598602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,8,64,0,1,float16,fp8,0,0.04611733555793762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,8,64,0,1,fp8,fp8,0,0.07788800199826558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,24,64,128,1,float16,float16,0,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,24,64,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,24,64,128,1,float16,fp8,0,0.029130667448043823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,24,64,128,1,fp8,fp8,0,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,24,64,0,1,float16,fp8,0,0.028933333853880566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,24,64,0,1,fp8,fp8,0,0.04651733239491781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,1,64,128,1,float16,float16,0,0.02682666728893916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,1,64,0,1,float16,float16,0,0.026917333404223125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,1,64,128,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,1,64,128,1,fp8,fp8,0,0.04490133126576742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,1,64,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,1,64,0,1,fp8,fp8,0,0.044480000933011375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,2,64,128,1,float16,float16,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,2,64,0,1,float16,float16,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,2,64,128,1,float16,fp8,0,0.02697066714366277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,2,64,128,1,fp8,fp8,0,0.04446400205294291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,2,64,0,1,float16,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,2,64,0,1,fp8,fp8,0,0.04509866734345754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,4,64,128,1,float16,float16,0,0.027962667246659596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,4,64,0,1,float16,float16,0,0.027669332921504974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,8,64,128,1,float16,fp8,0,0.028277332584063213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,4,64,128,1,float16,fp8,0,0.028149334092934925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,4,64,128,1,fp8,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,4,64,0,1,float16,fp8,0,0.0276853342851003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,4,64,0,1,fp8,fp8,0,0.04526400069395701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,8,64,128,1,float16,float16,0,0.028325334191322327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,8,64,0,1,float16,float16,0,0.02794133375088374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,8,64,128,1,fp8,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,8,64,0,1,float16,fp8,0,0.028021333118279774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,8,64,0,1,fp8,fp8,0,0.04568533102671305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,24,64,128,1,float16,float16,0,0.022128000855445862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,24,64,0,1,float16,float16,0,0.022053333620230358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,1,64,0,1,float16,float16,0,0.020629333953062694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,24,64,128,1,float16,fp8,0,0.02218666672706604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,24,64,128,1,fp8,fp8,0,0.032586666444937386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,24,64,0,1,float16,fp8,0,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,24,64,0,1,fp8,fp8,0,0.032442666590213776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,1,64,128,1,float16,float16,0,0.020655999581019085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,1,64,128,1,float16,fp8,0,0.020848001043001812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,1,64,128,1,fp8,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,1,64,0,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,2,64,0,1,fp8,fp8,0,0.031045332551002502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,1,64,0,1,fp8,fp8,0,0.031008000175158184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,2,64,128,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,2,64,0,1,float16,float16,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,2,64,128,1,float16,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,2,64,128,1,fp8,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,2,64,0,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,4,64,128,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,8,64,128,1,float16,float16,0,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,4,64,0,1,float16,float16,0,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,4,64,128,1,float16,fp8,0,0.022122666239738464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,4,64,128,1,fp8,fp8,0,0.03215466688076655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,4,64,0,1,float16,fp8,0,0.021589333812395733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,4,64,0,1,fp8,fp8,0,0.03242133309443792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,8,64,0,1,float16,float16,0,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,8,64,128,1,float16,fp8,0,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,8,64,128,1,fp8,fp8,0,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,8,64,0,1,float16,fp8,0,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,8,64,0,1,fp8,fp8,0,0.03194133440653483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,24,64,128,1,float16,float16,0,0.01603200038274129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,24,64,0,1,float16,float16,0,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,24,64,128,1,float16,fp8,0,0.016208000481128693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,1,64,128,1,float16,fp8,0,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,24,64,128,1,fp8,fp8,0,0.024538666009902954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,24,64,0,1,float16,fp8,0,0.016336000214020412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,1,64,0,1,fp8,fp8,0,0.024362665911515553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,24,64,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,1,64,128,1,float16,float16,0,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,1,64,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,2,64,128,1,fp8,fp8,0,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,1,64,128,1,fp8,fp8,0,0.024085332949956257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,1,64,0,1,float16,fp8,0,0.015813333292802174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,2,64,128,1,float16,float16,0,0.015882667154073715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,4,64,0,1,float16,float16,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,2,64,0,1,float16,float16,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,2,64,128,1,float16,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,2,64,0,1,float16,fp8,0,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,2,64,0,1,fp8,fp8,0,0.023951999843120575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,4,64,128,1,float16,float16,0,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,4,64,128,1,float16,fp8,0,0.01603200038274129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,4,64,128,1,fp8,fp8,0,0.023904000719388325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,4,64,0,1,float16,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,4,64,0,1,fp8,fp8,0,0.024277334411938984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,8,64,128,1,float16,float16,0,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,8,64,0,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,8,64,128,1,float16,fp8,0,0.01603200038274129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,8,64,128,1,fp8,fp8,0,0.024549332757790882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,8,64,0,1,float16,fp8,0,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,8,64,0,1,fp8,fp8,0,0.024133334557215374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,24,64,0,1,float16,fp8,0,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,24,64,128,1,float16,float16,0,0.013749333719412485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,24,64,0,1,float16,float16,0,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,24,64,128,1,float16,fp8,0,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,24,64,128,1,fp8,fp8,0,0.019941333681344986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,24,64,0,1,fp8,fp8,0,0.02013333390156428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,1,64,128,1,float16,float16,0,0.013653332988421122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,1,64,0,1,float16,float16,0,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,1,64,128,1,float16,fp8,0,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,1,64,128,1,fp8,fp8,0,0.019850666324297588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,1,64,0,1,float16,fp8,0,0.013781332721312841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,1,64,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,2,64,128,1,float16,float16,0,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,2,64,0,1,float16,float16,0,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,2,64,128,1,float16,fp8,0,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,2,64,128,1,fp8,fp8,0,0.020213333268960316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,2,64,0,1,float16,fp8,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,2,64,0,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,4,64,128,1,float16,float16,0,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,4,64,0,1,float16,float16,0,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,4,64,128,1,float16,fp8,0,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,4,64,128,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,4,64,0,1,float16,fp8,0,0.014453332871198654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,8,64,128,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,4,64,0,1,fp8,fp8,0,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,8,64,128,1,float16,float16,0,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,8,64,0,1,float16,float16,0,0.01434133326013883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,8,64,128,1,float16,fp8,0,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,24,64,0,1,float16,fp8,0,0.013338666409254074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,8,64,0,1,float16,fp8,0,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,8,64,0,1,fp8,fp8,0,0.020288000504175823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,24,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,24,64,0,1,float16,float16,0,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,24,64,128,1,float16,fp8,0,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,24,64,128,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,24,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,1,64,128,1,float16,float16,0,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,1,64,0,1,float16,float16,0,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,1,64,128,1,float16,fp8,0,0.013461332768201828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,1,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,1,64,0,1,float16,fp8,0,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,1,64,0,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,2,64,128,1,float16,float16,0,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,2,64,0,1,float16,float16,0,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,2,64,128,1,float16,fp8,0,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,2,64,128,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,2,64,0,1,float16,fp8,0,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,2,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,4,64,128,1,float16,float16,0,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,8,64,128,1,float16,float16,0,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,4,64,0,1,float16,float16,0,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,4,64,128,1,float16,fp8,0,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,4,64,128,1,fp8,fp8,0,0.02029866725206375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,4,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,4,64,0,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,8,64,0,1,float16,float16,0,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,8,64,128,1,float16,fp8,0,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,8,64,128,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,8,64,0,1,float16,fp8,0,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,8,64,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,24,64,128,1,float16,float16,0,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,24,64,0,1,float16,float16,0,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,24,64,128,1,float16,fp8,0,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,24,64,128,1,fp8,fp8,0,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,24,64,0,1,float16,fp8,0,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,24,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,1,64,128,1,float16,float16,0,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,1,64,0,1,float16,float16,0,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,1,64,128,1,float16,fp8,0,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,1,64,128,1,fp8,fp8,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,1,64,0,1,float16,fp8,0,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,1,64,0,1,fp8,fp8,0,0.0182239996890227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,2,64,128,1,float16,float16,0,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,2,64,0,1,float16,float16,0,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,2,64,128,1,float16,fp8,0,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,2,64,128,1,fp8,fp8,0,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,2,64,0,1,float16,fp8,0,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,2,64,0,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,4,64,128,1,float16,float16,0,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,4,64,0,1,float16,float16,0,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,4,64,128,1,float16,fp8,0,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,4,64,128,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,4,64,0,1,float16,fp8,0,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,4,64,0,1,fp8,fp8,0,0.018346666047970455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,8,64,128,1,float16,float16,0,0.012437333663304647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,8,64,0,1,float16,float16,0,0.012479999413092932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,8,64,128,1,float16,fp8,0,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,8,64,128,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,8,64,0,1,float16,fp8,0,0.013301332791646322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,8,64,0,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,1,64,128,1,float16,float16,0,0.08434666196505229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,1,64,0,1,float16,float16,0,0.08385066191355388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,1,64,128,1,float16,fp8,0,0.08425600330034892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,1,64,128,1,fp8,fp8,0,0.20048532883326212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,1,64,0,1,float16,fp8,0,0.08427733182907104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,1,64,0,1,fp8,fp8,0,0.20105600357055664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,2,64,128,1,float16,float16,0,0.08571199576059978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,2,64,0,1,float16,float16,0,0.08551466464996338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,2,64,128,1,float16,fp8,0,0.08452799916267395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,2,64,128,1,fp8,fp8,0,0.20058133204778036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,2,64,0,1,float16,fp8,0,0.08416000008583069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,2,64,0,1,fp8,fp8,0,0.20158400138219199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,4,64,128,1,float16,float16,0,0.088837335507075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,4,64,0,1,float16,float16,0,0.08850133419036865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,4,64,128,1,float16,fp8,0,0.08851733803749084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,4,64,128,1,fp8,fp8,0,0.20968000094095865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,4,64,0,1,float16,fp8,0,0.08840533097585042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,4,64,0,1,fp8,fp8,0,0.20934933423995972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,8,64,128,1,fp8,fp8,0,0.20801067352294922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,8,64,128,1,float16,float16,0,0.09092266360918681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,8,64,0,1,float16,float16,0,0.09099200367927551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,8,64,128,1,float16,fp8,0,0.09047466516494751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,8,64,0,1,float16,fp8,0,0.0906773308912913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,8,64,0,1,fp8,fp8,0,0.20639467239379883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,24,64,128,1,float16,float16,0,0.056405335664749146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,24,64,0,1,float16,float16,0,0.05532800157864889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,24,64,128,1,float16,fp8,0,0.05332799752553304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,24,64,128,1,fp8,fp8,0,0.11681600411732991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,24,64,0,1,float16,fp8,0,0.052853330969810486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,24,64,0,1,fp8,fp8,0,0.11771733562151591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,1,64,128,1,float16,float16,0,0.046581332882245384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,1,64,0,1,float16,float16,0,0.04693333307902018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,1,64,128,1,fp8,fp8,0,0.10991467038790385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,1,64,128,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,1,64,0,1,float16,fp8,0,0.0462666650613149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,1,64,0,1,fp8,fp8,0,0.109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,2,64,128,1,float16,float16,0,0.047024001677831016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,2,64,0,1,float16,float16,0,0.047082667549451195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,2,64,128,1,float16,fp8,0,0.04684266448020935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,2,64,128,1,fp8,fp8,0,0.1097866694132487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,2,64,0,1,float16,fp8,0,0.046480000019073486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,2,64,0,1,fp8,fp8,0,0.10955733060836792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,4,64,128,1,float16,float16,0,0.04842133323351542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,4,64,0,1,float16,float16,0,0.04779199759165446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,4,64,128,1,float16,fp8,0,0.04853333532810211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,4,64,128,1,fp8,fp8,0,0.11351466178894043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,4,64,0,1,float16,fp8,0,0.047770669062932335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,8,64,0,1,fp8,fp8,0,0.11461866895357768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,4,64,0,1,fp8,fp8,0,0.11238400141398112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,24,64,128,1,float16,float16,0,0.030005333324273426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,8,64,128,1,float16,float16,0,0.049327999353408813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,8,64,0,1,float16,float16,0,0.049365331729253135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,8,64,128,1,float16,fp8,0,0.04933333396911621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,8,64,128,1,fp8,fp8,0,0.11246933539708455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,8,64,0,1,float16,fp8,0,0.04925866425037384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,24,64,0,1,float16,float16,0,0.030437332888444264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,24,64,128,1,float16,fp8,0,0.029845332105954487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,24,64,128,1,fp8,fp8,0,0.0659093310435613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,24,64,0,1,float16,fp8,0,0.02979733298222224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,24,64,0,1,fp8,fp8,0,0.06584533552328746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,1,64,128,1,float16,float16,0,0.02846933404604594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,1,64,0,1,float16,float16,0,0.028543998797734577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,1,64,128,1,float16,fp8,0,0.028517333169778187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,1,64,128,1,fp8,fp8,0,0.06373866895834605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,1,64,0,1,float16,fp8,0,0.02874133239189784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,1,64,0,1,fp8,fp8,0,0.06309866905212402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,2,64,128,1,float16,float16,0,0.028922667105992634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,2,64,0,1,float16,float16,0,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,2,64,128,1,float16,fp8,0,0.0288426677385966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,2,64,128,1,fp8,fp8,0,0.06320533156394958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,2,64,0,1,float16,fp8,0,0.02870933214823405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,2,64,0,1,fp8,fp8,0,0.06387199958165486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,4,64,128,1,float16,float16,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,8,64,128,1,float16,float16,0,0.029904000461101532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,4,64,0,1,float16,float16,0,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,4,64,128,1,float16,fp8,0,0.029792000850041706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,4,64,128,1,fp8,fp8,0,0.06479999919732411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,4,64,0,1,float16,fp8,0,0.029802667597929638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,4,64,0,1,fp8,fp8,0,0.06509866813818614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,8,64,0,1,float16,float16,0,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,8,64,128,1,float16,fp8,0,0.03025600065787633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,8,64,128,1,fp8,fp8,0,0.06513600051403046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,8,64,0,1,float16,fp8,0,0.030005333324273426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,8,64,0,1,fp8,fp8,0,0.06516266862551372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,24,64,128,1,float16,float16,0,0.021744000415007275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,24,64,0,1,float16,float16,0,0.02199999988079071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,24,64,128,1,float16,fp8,0,0.021877333521842957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,24,64,128,1,fp8,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,24,64,0,1,float16,fp8,0,0.021941334009170532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,24,64,0,1,fp8,fp8,0,0.042538667718569435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,1,64,128,1,float16,float16,0,0.020725333442290623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,1,64,0,1,float16,float16,0,0.02072000006834666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,1,64,128,1,float16,fp8,0,0.020842666427294414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,1,64,128,1,fp8,fp8,0,0.04104000081618627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,1,64,0,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,1,64,0,1,fp8,fp8,0,0.04055466751257578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,2,64,128,1,float16,float16,0,0.02080533280968666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,2,64,0,1,float16,float16,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,2,64,128,1,float16,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,2,64,128,1,fp8,fp8,0,0.041162667175134025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,2,64,0,1,float16,fp8,0,0.020810666183630627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,2,64,0,1,fp8,fp8,0,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,4,64,128,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,4,64,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,4,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,4,64,128,1,fp8,fp8,0,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,4,64,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,4,64,0,1,fp8,fp8,0,0.04128533353408178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,8,64,128,1,float16,float16,0,0.021514666577180225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,8,64,0,1,float16,float16,0,0.02179733415444692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,8,64,128,1,float16,fp8,0,0.02187199890613556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,8,64,128,1,fp8,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,8,64,0,1,float16,fp8,0,0.02179733415444692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,8,64,0,1,fp8,fp8,0,0.04135466615358988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,24,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,24,64,0,1,float16,float16,0,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,24,64,128,1,float16,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,1,64,128,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,24,64,128,1,fp8,fp8,0,0.0283146674434344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,24,64,0,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,24,64,0,1,fp8,fp8,0,0.027989332874615986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,1,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,1,64,128,1,fp8,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,1,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,1,64,0,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,2,64,128,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,2,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,2,64,128,1,float16,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,2,64,128,1,fp8,fp8,0,0.027642667293548584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,2,64,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,2,64,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,4,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,4,64,0,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,4,64,128,1,float16,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,4,64,128,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,4,64,0,1,float16,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,4,64,0,1,fp8,fp8,0,0.02775466690460841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,8,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,8,64,0,1,float16,float16,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,8,64,128,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,8,64,128,1,fp8,fp8,0,0.028058665494124096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,8,64,0,1,float16,fp8,0,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,8,64,0,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,24,64,128,1,float16,float16,0,0.013999999811251959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,24,64,0,1,float16,float16,0,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,24,64,128,1,float16,fp8,0,0.014426667243242264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,24,64,128,1,fp8,fp8,0,0.023872000475724537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,24,64,0,1,float16,fp8,0,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,24,64,0,1,fp8,fp8,0,0.023941333095232647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,1,64,128,1,float16,float16,0,0.014101333916187286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,1,64,0,1,float16,float16,0,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,1,64,128,1,float16,fp8,0,0.014426667243242264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,1,64,128,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,1,64,0,1,float16,fp8,0,0.014602666099866232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,1,64,0,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,2,64,128,1,float16,float16,0,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,4,64,128,1,float16,float16,0,0.014165333161751429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,2,64,0,1,float16,float16,0,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,2,64,128,1,float16,fp8,0,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,2,64,128,1,fp8,fp8,0,0.02290133386850357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,2,64,0,1,float16,fp8,0,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,2,64,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,4,64,0,1,float16,float16,0,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,4,64,128,1,float16,fp8,0,0.014309333016475042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,4,64,128,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,4,64,0,1,float16,fp8,0,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,4,64,0,1,fp8,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,8,64,128,1,float16,float16,0,0.013989333063364029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,8,64,0,1,float16,float16,0,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,8,64,128,1,float16,fp8,0,0.014159999787807465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,8,64,128,1,fp8,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,8,64,0,1,float16,fp8,0,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,8,64,0,1,fp8,fp8,0,0.023647998770078022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,24,64,128,1,float16,float16,0,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,24,64,0,1,float16,float16,0,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,24,64,128,1,float16,fp8,0,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,24,64,128,1,fp8,fp8,0,0.018751999984184902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,24,64,0,1,float16,fp8,0,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,24,64,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,1,64,128,1,float16,float16,0,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,1,64,0,1,float16,float16,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,1,64,128,1,float16,fp8,0,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,1,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,1,64,0,1,float16,fp8,0,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,2,64,0,1,float16,fp8,0,0.013274667163689932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,1,64,0,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,2,64,128,1,float16,float16,0,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,2,64,0,1,float16,float16,0,0.012517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,2,64,128,1,float16,fp8,0,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,2,64,128,1,fp8,fp8,0,0.02213866760333379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,2,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,4,64,128,1,float16,float16,0,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,4,64,0,1,float16,float16,0,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,4,64,128,1,float16,fp8,0,0.012538666526476542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,4,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,4,64,0,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,4,64,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,8,64,128,1,float16,float16,0,0.0122079998254776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,8,64,0,1,float16,float16,0,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,8,64,128,1,float16,fp8,0,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,8,64,128,1,fp8,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,8,64,0,1,float16,fp8,0,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,8,64,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,24,64,128,1,float16,float16,0,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,24,64,0,1,float16,float16,0,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,24,64,128,1,float16,fp8,0,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,1,64,0,1,float16,float16,0,0.012469333906968435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,1,64,128,1,float16,fp8,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,24,64,128,1,fp8,fp8,0,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,24,64,0,1,float16,fp8,0,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,24,64,0,1,fp8,fp8,0,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,1,64,128,1,float16,float16,0,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,1,64,128,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,1,64,0,1,float16,fp8,0,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,1,64,0,1,fp8,fp8,0,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,2,64,128,1,float16,float16,0,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,4,64,128,1,float16,float16,0,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,2,64,0,1,float16,float16,0,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,2,64,128,1,float16,fp8,0,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,2,64,128,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,2,64,0,1,float16,fp8,0,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,2,64,0,1,fp8,fp8,0,0.018751999984184902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,4,64,0,1,float16,float16,0,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,4,64,128,1,float16,fp8,0,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,4,64,128,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,4,64,0,1,float16,fp8,0,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,4,64,0,1,fp8,fp8,0,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,8,64,128,1,float16,float16,0,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,8,64,0,1,float16,float16,0,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,8,64,128,1,float16,fp8,0,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,8,64,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,8,64,0,1,float16,fp8,0,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,8,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,24,64,128,1,float16,float16,0,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,24,64,0,1,float16,float16,0,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,24,64,128,1,float16,fp8,0,0.012074666718641916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,24,64,128,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,24,64,0,1,float16,fp8,0,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,24,64,0,1,fp8,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,1,64,0,1,fp8,fp8,0,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,1,64,128,1,float16,float16,0,0.012170666207869848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,1,64,0,1,float16,float16,0,0.012229333321253458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,1,64,128,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,1,64,128,1,fp8,fp8,0,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,1,64,0,1,float16,fp8,0,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,2,64,128,1,float16,float16,0,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,2,64,0,1,float16,float16,0,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,2,64,128,1,float16,fp8,0,0.01257066677014033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,2,64,128,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,2,64,0,1,float16,fp8,0,0.012618667135636011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,2,64,0,1,fp8,fp8,0,0.018085333208243053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,4,64,128,1,float16,float16,0,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,4,64,0,1,float16,float16,0,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,4,64,128,1,float16,fp8,0,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,4,64,128,1,fp8,fp8,0,0.018298666924238205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,4,64,0,1,float16,fp8,0,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,4,64,0,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,8,64,128,1,float16,float16,0,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,8,64,0,1,float16,float16,0,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,8,64,128,1,float16,fp8,0,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,8,64,128,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,8,64,0,1,float16,fp8,0,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,8,64,0,1,fp8,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,1,64,128,1,float16,float16,0,0.06087466577688853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,1,64,0,1,float16,float16,0,0.06085866689682007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,1,64,128,1,float16,fp8,0,0.06074133515357971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,1,64,128,1,fp8,fp8,0,0.17781867583592734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,1,64,0,1,float16,fp8,0,0.06057066718737284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,1,64,0,1,fp8,fp8,0,0.17881067593892416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,2,64,128,1,float16,float16,0,0.06192533175150553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,2,64,0,1,float16,float16,0,0.06122666597366333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,2,64,128,1,float16,fp8,0,0.06145066519578298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,2,64,128,1,fp8,fp8,0,0.1800160010655721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,2,64,0,1,float16,fp8,0,0.060933331648508705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,2,64,0,1,fp8,fp8,0,0.17867734034856161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,4,64,128,1,float16,float16,0,0.06320000191529591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,4,64,0,1,float16,float16,0,0.06273599962393443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,4,64,128,1,float16,fp8,0,0.06337066491444905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,4,64,128,1,fp8,fp8,0,0.18239466349283853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,4,64,0,1,float16,fp8,0,0.06273066500822704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,4,64,0,1,fp8,fp8,0,0.18220800161361694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,8,64,128,1,float16,float16,0,0.063701331615448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,8,64,0,1,float16,float16,0,0.0637546678384145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,8,64,128,1,float16,fp8,0,0.0636053333679835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,8,64,128,1,fp8,fp8,0,0.18424532810846964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,8,64,0,1,float16,fp8,0,0.0633653352657954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,8,64,0,1,fp8,fp8,0,0.18320000171661377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,24,64,128,1,float16,float16,0,0.03585600107908249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,24,64,0,1,float16,float16,0,0.03630933413902918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,24,64,128,1,float16,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,24,64,128,1,fp8,fp8,0,0.10393066207567851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,24,64,0,1,float16,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,24,64,0,1,fp8,fp8,0,0.10150933265686035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,1,64,128,1,float16,float16,0,0.03601066768169403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,1,64,0,1,float16,float16,0,0.03589333345492681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,1,64,128,1,float16,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,1,64,128,1,fp8,fp8,0,0.09859733780225118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,1,64,0,1,float16,fp8,0,0.03595199932654699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,1,64,0,1,fp8,fp8,0,0.09814932942390442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,2,64,128,1,float16,float16,0,0.03617066641648611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,2,64,0,1,float16,float16,0,0.036389333506425224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,2,64,128,1,float16,fp8,0,0.03605866680542628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,2,64,128,1,fp8,fp8,0,0.09822400410970052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,2,64,0,1,float16,fp8,0,0.0359253336985906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,2,64,0,1,fp8,fp8,0,0.09889066219329834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,4,64,128,1,float16,float16,0,0.03685333331425985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,4,64,0,1,float16,float16,0,0.03681066632270813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,4,64,128,1,float16,fp8,0,0.03716266651948293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,4,64,128,1,fp8,fp8,0,0.10035733381907146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,4,64,0,1,float16,fp8,0,0.03663466622432073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,4,64,0,1,fp8,fp8,0,0.0995199978351593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,8,64,128,1,float16,float16,0,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,8,64,0,1,float16,float16,0,0.03717333326737086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,8,64,128,1,float16,fp8,0,0.036943999429543815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,8,64,128,1,fp8,fp8,0,0.10055466492970784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,24,64,128,1,fp8,fp8,0,0.05922133227189382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,8,64,0,1,float16,fp8,0,0.03723733375469843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,8,64,0,1,fp8,fp8,0,0.0997920036315918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,24,64,128,1,float16,float16,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,24,64,0,1,float16,float16,0,0.02495466669400533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,24,64,128,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,24,64,0,1,float16,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,24,64,0,1,fp8,fp8,0,0.05963733295599619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,1,64,128,1,float16,float16,0,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,1,64,0,1,float16,float16,0,0.024271999796231587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,1,64,128,1,float16,fp8,0,0.02476266771554947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,1,64,128,1,fp8,fp8,0,0.06001600126425425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,2,64,128,1,float16,fp8,0,0.02476799984773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,1,64,0,1,float16,fp8,0,0.024688000480333965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,1,64,0,1,fp8,fp8,0,0.0602453351020813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,2,64,128,1,float16,float16,0,0.024501333634058636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,2,64,0,1,float16,float16,0,0.024522667129834492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,2,64,128,1,fp8,fp8,0,0.05991466840108236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,2,64,0,1,float16,fp8,0,0.024735999604066212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,2,64,0,1,fp8,fp8,0,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,4,64,128,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,4,64,0,1,float16,float16,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,4,64,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,4,64,128,1,fp8,fp8,0,0.0606879989306132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,4,64,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,8,64,128,1,fp8,fp8,0,0.05938666562239329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,4,64,0,1,fp8,fp8,0,0.06058133145173391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,8,64,128,1,float16,float16,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,8,64,0,1,float16,float16,0,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,8,64,128,1,float16,fp8,0,0.02571200082699458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,8,64,0,1,float16,fp8,0,0.025978667040665943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,8,64,0,1,fp8,fp8,0,0.059765333930651345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,24,64,128,1,float16,float16,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,24,64,0,1,float16,float16,0,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,24,64,128,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,24,64,128,1,fp8,fp8,0,0.03758399933576584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,24,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,24,64,0,1,fp8,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,1,64,0,1,fp8,fp8,0,0.03660800059636434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,1,64,128,1,float16,float16,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,1,64,0,1,float16,float16,0,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,1,64,128,1,float16,fp8,0,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,1,64,128,1,fp8,fp8,0,0.03606933355331421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,1,64,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,2,64,0,1,fp8,fp8,0,0.03632533301909765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,2,64,128,1,float16,float16,0,0.016341333587964375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,2,64,0,1,float16,float16,0,0.016352000335852306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,2,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,2,64,128,1,fp8,fp8,0,0.03677333394686381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,2,64,0,1,float16,fp8,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,4,64,128,1,float16,float16,0,0.016021333634853363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,4,64,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,4,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,4,64,128,1,fp8,fp8,0,0.03686933219432831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,4,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,4,64,0,1,fp8,fp8,0,0.03738666574160258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,8,64,128,1,float16,float16,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,8,64,0,1,float16,float16,0,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,8,64,128,1,float16,fp8,0,0.016645333419243496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,8,64,128,1,fp8,fp8,0,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,8,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,8,64,0,1,fp8,fp8,0,0.036677333215872444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,24,64,128,1,float16,float16,0,0.014298666268587112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,24,64,0,1,float16,float16,0,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,24,64,128,1,float16,fp8,0,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,24,64,128,1,fp8,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,24,64,0,1,float16,fp8,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,1,64,0,1,float16,fp8,0,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,24,64,0,1,fp8,fp8,0,0.02775999903678894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,1,64,128,1,float16,float16,0,0.014303999642531076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,2,64,0,1,float16,float16,0,0.014325333138306936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,1,64,0,1,float16,float16,0,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,1,64,128,1,float16,fp8,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,1,64,128,1,fp8,fp8,0,0.026874666412671406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,1,64,0,1,fp8,fp8,0,0.02658133457104365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,2,64,128,1,float16,float16,0,0.013722666849692663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,2,64,128,1,float16,fp8,0,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,2,64,128,1,fp8,fp8,0,0.026586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,2,64,0,1,float16,fp8,0,0.014442666123310724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,2,64,0,1,fp8,fp8,0,0.02661866694688797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,4,64,128,1,float16,float16,0,0.014426667243242264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,4,64,0,1,float16,float16,0,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,4,64,128,1,float16,fp8,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,4,64,128,1,fp8,fp8,0,0.02641066660483678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,4,64,0,1,float16,fp8,0,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,4,64,0,1,fp8,fp8,0,0.026687999566396076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,8,64,128,1,float16,float16,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,8,64,0,1,float16,float16,0,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,8,64,128,1,float16,fp8,0,0.014629332969586054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,8,64,128,1,fp8,fp8,0,0.026554666459560394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,8,64,0,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,24,64,128,1,fp8,fp8,0,0.02229333420594533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,24,64,0,1,float16,fp8,0,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,8,64,0,1,fp8,fp8,0,0.026709333062171936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,24,64,128,1,float16,float16,0,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,24,64,0,1,float16,float16,0,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,24,64,128,1,float16,fp8,0,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,24,64,0,1,fp8,fp8,0,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,1,64,128,1,float16,float16,0,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,1,64,0,1,float16,float16,0,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,1,64,128,1,float16,fp8,0,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,2,64,0,1,float16,float16,0,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,1,64,128,1,fp8,fp8,0,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,1,64,0,1,float16,fp8,0,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,1,64,0,1,fp8,fp8,0,0.022789334257443745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,2,64,128,1,float16,float16,0,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,2,64,128,1,float16,fp8,0,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,2,64,128,1,fp8,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,2,64,0,1,float16,fp8,0,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,2,64,0,1,fp8,fp8,0,0.022597332795461018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,4,64,128,1,float16,float16,0,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,4,64,0,1,float16,float16,0,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,4,64,128,1,float16,fp8,0,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,4,64,128,1,fp8,fp8,0,0.02257599929968516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,4,64,0,1,float16,fp8,0,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,4,64,0,1,fp8,fp8,0,0.022821334501107533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,8,64,0,1,float16,fp8,0,0.013514666507641474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,8,64,128,1,float16,float16,0,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,8,64,0,1,float16,float16,0,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,8,64,128,1,float16,fp8,0,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,8,64,128,1,fp8,fp8,0,0.022384000321229298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,8,64,0,1,fp8,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,24,64,128,1,float16,float16,0,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,24,64,0,1,float16,float16,0,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,24,64,128,1,float16,fp8,0,0.012543999900420507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,24,64,128,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,24,64,0,1,float16,fp8,0,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,24,64,0,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,1,64,128,1,float16,float16,0,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,1,64,0,1,float16,float16,0,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,1,64,128,1,float16,fp8,0,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,1,64,128,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,2,64,128,1,float16,fp8,0,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,2,64,128,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,1,64,0,1,float16,fp8,0,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,4,64,128,1,float16,float16,0,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,1,64,0,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,2,64,128,1,float16,float16,0,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,2,64,0,1,float16,float16,0,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,2,64,0,1,float16,fp8,0,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,2,64,0,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,4,64,0,1,float16,float16,0,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,4,64,128,1,float16,fp8,0,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,4,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,4,64,0,1,float16,fp8,0,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,4,64,0,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,8,64,128,1,float16,float16,0,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,8,64,0,1,float16,float16,0,0.011957333733638128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,8,64,128,1,float16,fp8,0,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,8,64,128,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,8,64,0,1,float16,fp8,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,8,64,0,1,fp8,fp8,0,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,24,64,128,1,float16,float16,0,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,24,64,0,1,float16,float16,0,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,24,64,128,1,float16,fp8,0,0.01157333329319954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,24,64,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,24,64,0,1,float16,fp8,0,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,24,64,0,1,fp8,fp8,0,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,1,64,128,1,float16,float16,0,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,1,64,0,1,float16,float16,0,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,1,64,128,1,float16,fp8,0,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,1,64,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,1,64,0,1,float16,fp8,0,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,1,64,0,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,2,64,128,1,float16,float16,0,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,2,64,0,1,float16,float16,0,0.011733333269755045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,2,64,128,1,float16,fp8,0,0.012426666915416718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,2,64,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,2,64,0,1,float16,fp8,0,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,2,64,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,4,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,4,64,128,1,float16,float16,0,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,4,64,0,1,float16,float16,0,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,4,64,128,1,float16,fp8,0,0.012517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,4,64,128,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,4,64,0,1,float16,fp8,0,0.012618667135636011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,8,64,128,1,float16,float16,0,0.012229333321253458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,8,64,0,1,float16,float16,0,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,8,64,128,1,float16,fp8,0,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,8,64,128,1,fp8,fp8,0,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,8,64,0,1,float16,fp8,0,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,8,64,0,1,fp8,fp8,0,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,24,64,128,1,float16,float16,0,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,24,64,0,1,float16,float16,0,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,24,64,128,1,float16,fp8,0,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,24,64,128,1,fp8,fp8,0,0.01834133391578992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,24,64,0,1,float16,fp8,0,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,24,64,0,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,1,64,128,1,float16,float16,0,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,1,64,0,1,float16,float16,0,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,1,64,128,1,float16,fp8,0,0.011882666498422623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,1,64,128,1,fp8,fp8,0,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,1,64,0,1,float16,fp8,0,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,1,64,0,1,fp8,fp8,0,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,2,64,128,1,float16,float16,0,0.011962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,2,64,0,1,float16,float16,0,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,2,64,128,1,float16,fp8,0,0.011930666863918304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,2,64,128,1,fp8,fp8,0,0.018298666924238205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,2,64,0,1,float16,fp8,0,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,2,64,0,1,fp8,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,4,64,128,1,float16,float16,0,0.012074666718641916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,4,64,0,1,float16,float16,0,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,4,64,128,1,float16,fp8,0,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,4,64,128,1,fp8,fp8,0,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,8,64,128,1,float16,fp8,0,0.012586666891972223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,4,64,0,1,float16,fp8,0,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,4,64,0,1,fp8,fp8,0,0.01773333301146825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,8,64,0,1,fp8,fp8,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,8,64,128,1,float16,float16,0,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,8,64,0,1,float16,float16,0,0.011930666863918304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,8,64,128,1,fp8,fp8,0,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,8,64,0,1,float16,fp8,0,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,float16,0,1.4268693923950195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,fp8,0,1.406010627746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,64,128,1,fp8,fp8,0,1.8607999483744304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,float16,0,1.4496639569600422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,fp8,0,1.4357973734537761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,64,128,1,fp8,fp8,0,1.8894507090250652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,float16,0,1.4691680272420247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,float16,0,9.591477076212565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,64,0,1,fp8,fp8,0,8.658224105834961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,fp8,0,9.574607849121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,float16,0,9.608218510945639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,fp8,0,1.4594613711039226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,64,128,1,fp8,fp8,0,1.9000053405761719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,fp8,0,9.432026545206705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,64,0,1,fp8,fp8,0,8.680314381917318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,float16,0,1.5066293080647786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,fp8,0,1.4872852961222331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,float16,0,0.8075359662373861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,64,128,1,fp8,fp8,0,1.947722593943278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,float16,0,9.632240295410156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,64,0,1,fp8,fp8,0,8.825733184814453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,fp8,0,0.8057706356048584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,fp8,0,9.75388272603353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,64,128,1,fp8,fp8,0,1.0644746621449788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,float16,0,9.599653244018555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,float16,0,0.737178643544515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,float16,0,4.980661392211914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,64,0,1,fp8,fp8,0,8.747429529825846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,fp8,0,0.7262773513793945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,fp8,0,9.803749084472656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,64,128,1,fp8,fp8,0,0.9665760199228922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,fp8,0,4.931957244873047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,64,0,1,fp8,fp8,0,4.488383928934733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,fp8,0,0.7397119998931885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,float16,0,0.7497866948445638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,float16,0,4.884586652119954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,64,128,1,fp8,fp8,0,0.9890399773915609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,fp8,0,4.858400026957194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,64,0,1,fp8,fp8,0,4.395306587219238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,float16,0,0.7586666742960612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,fp8,0,0.7493066787719727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,float16,0,4.914266586303711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,64,128,1,fp8,fp8,0,0.9962133566538492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,float16,0,0.7726293404897054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,fp8,0,4.850735982259114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,64,0,1,fp8,fp8,0,4.405904134114583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,fp8,0,0.7693599859873453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,float16,0,4.911194801330566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,64,128,1,fp8,fp8,0,1.0214293003082275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,float16,0,0.4312479893366496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,64,0,1,fp8,fp8,0,4.420805295308431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,fp8,0,4.882181485493978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,fp8,0,0.43800532817840576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,float16,0,4.909712155659993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,64,128,1,fp8,fp8,0,0.5740213394165039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,float16,0,2.5094827016194663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,float16,0,0.40642134348551434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,fp8,0,4.846975962320964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,64,0,1,fp8,fp8,0,4.44757874806722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,fp8,0,2.4969654083251953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,fp8,0,0.40196800231933594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,64,0,1,fp8,fp8,0,2.350927988688151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,64,128,1,fp8,fp8,0,0.528661330540975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,float16,0,2.501034736633301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,float16,0,0.4069013198216756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,fp8,0,0.40189866224924725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,64,128,1,fp8,fp8,0,0.5316799879074097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,fp8,0,2.482581297556559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,64,0,1,fp8,fp8,0,2.274186611175537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,float16,0,0.4092479944229126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,float16,0,2.4761120478312173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,fp8,0,0.40889068444569904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,64,128,1,fp8,fp8,0,0.5373813311258951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,fp8,0,2.5100746154785156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,64,0,1,fp8,fp8,0,2.293290615081787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,float16,0,0.41767998536427814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,float16,0,2.4741439819335938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,fp8,0,0.417520006497701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,64,128,1,fp8,fp8,0,0.5482240120569865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,fp8,0,2.478778680165609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,64,0,1,fp8,fp8,0,2.2797813415527344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,float16,0,2.4912586212158203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,float16,0,0.2908906737963359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,fp8,0,0.29045333464940387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,fp8,0,2.492186705271403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,float16,0,1.37937593460083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,fp8,0,1.3604052861531575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,64,128,1,fp8,fp8,0,0.3556160132090251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,64,0,1,fp8,fp8,0,2.2944480578104653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,float16,0,0.2910613417625427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,64,0,1,fp8,fp8,0,1.1905653476715088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,float16,0,1.3610773086547852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,fp8,0,0.2918933431307475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,64,128,1,fp8,fp8,0,0.3541119893391927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,float16,0,0.29046932856241864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,fp8,0,1.363935947418213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,64,0,1,fp8,fp8,0,1.1796159744262695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,fp8,0,0.2924373348553975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,64,128,1,fp8,fp8,0,0.3547146717707316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,float16,0,1.3686240514119465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,float16,0,0.28969067335128784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,fp8,0,1.3617760340372722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,64,0,1,fp8,fp8,0,1.1733760039011638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,fp8,0,0.29107733567555744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,64,128,1,fp8,fp8,0,0.35443735122680664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,float16,0,1.3579306602478027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,float16,0,0.2894773284594218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,fp8,0,1.3715039889017742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,64,0,1,fp8,fp8,0,1.1805493036905925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,fp8,0,0.2906186580657959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,64,128,1,fp8,fp8,0,0.3540000120798747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,float16,0,1.3529225985209148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,fp8,0,1.3612000147501628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,64,0,1,fp8,fp8,0,1.18123197555542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,float16,0,1.0690346558888753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,fp8,0,1.048810640970866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,64,128,1,fp8,fp8,0,1.3955893516540527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,float16,0,1.0876746972401936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,fp8,0,1.0786453088124592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,64,128,1,fp8,fp8,0,1.43723726272583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,float16,0,5.683818817138672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,float16,0,1.1027413209279378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,fp8,0,5.5278879801432295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,64,0,1,fp8,fp8,0,5.071189244588216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,float16,0,5.668634414672852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,fp8,0,1.1046240329742432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,64,128,1,fp8,fp8,0,1.4403626124064128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,fp8,0,5.7045440673828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,64,0,1,fp8,fp8,0,5.089983940124512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,float16,0,1.1373120148976643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,fp8,0,1.1222346623738606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,float16,0,5.644890467325847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,64,128,1,fp8,fp8,0,1.4850133260091145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,fp8,0,5.582485198974609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,64,0,1,fp8,fp8,0,5.107280095418294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,float16,0,0.6040746768315634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,fp8,0,0.6096320152282715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,float16,0,5.69926389058431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,64,128,1,fp8,fp8,0,0.8022613525390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,float16,0,2.866618792215983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,float16,0,0.5598506530125936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,fp8,0,5.594858805338542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,fp8,0,2.8825867970784507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,64,0,1,fp8,fp8,0,2.6651573181152344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,64,0,1,fp8,fp8,0,5.21127986907959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,fp8,0,0.5456159909566244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,64,128,1,fp8,fp8,0,0.7329760392506918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,float16,0,0.5616480112075806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,float16,0,2.8515307108561196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,fp8,0,0.5527520179748535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,fp8,0,2.8191092809041343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,64,0,1,fp8,fp8,0,2.6011679967244468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,64,128,1,fp8,fp8,0,0.7406187057495117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,float16,0,2.8058293660481772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,float16,0,0.5670986572901408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,fp8,0,0.5620319843292236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,64,128,1,fp8,fp8,0,0.7517813046773275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,fp8,0,2.8016160329182944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,64,0,1,fp8,fp8,0,2.6107892990112305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,float16,0,0.5806986490885416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,float16,0,2.8338826497395835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,fp8,0,0.5777279933293661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,64,128,1,fp8,fp8,0,0.7712906996409098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,fp8,0,2.7834399541219077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,64,0,1,fp8,fp8,0,2.6041386922200522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,float16,0,0.3293760021527608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,fp8,0,0.3335893154144287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,float16,0,2.863727887471517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,64,128,1,fp8,fp8,0,0.43556801478068036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,float16,0,1.498517354329427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,fp8,0,2.8565918604532876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,64,0,1,fp8,fp8,0,2.6234985987345376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,float16,0,0.31068267424901325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,fp8,0,1.497989336649577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,fp8,0,0.3046773274739583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,64,0,1,fp8,fp8,0,1.3319893678029378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,64,128,1,fp8,fp8,0,0.40277334054311115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,float16,0,1.4782346089680989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,fp8,0,1.4734880129496257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,float16,0,0.3121386567751567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,64,0,1,fp8,fp8,0,1.291215976079305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,fp8,0,0.307151993115743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,float16,0,1.4694132804870605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,64,128,1,fp8,fp8,0,0.4042559862136841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,float16,0,0.3145013252894084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,fp8,0,1.4644160270690918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,64,0,1,fp8,fp8,0,1.3059306939442952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,fp8,0,0.3105600078900655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,64,128,1,fp8,fp8,0,0.41017067432403564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,float16,0,1.471962610880534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,float16,0,0.3181706666946411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,fp8,0,1.471295992533366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,64,0,1,fp8,fp8,0,1.3012479941050212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,fp8,0,0.31779734293619794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,64,128,1,fp8,fp8,0,0.4201066493988037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,float16,0,1.4879627227783203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,float16,0,0.2243679960568746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,fp8,0,1.482800006866455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,fp8,0,0.22336532672246298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,64,0,1,fp8,fp8,0,1.310197353363037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,float16,0,0.7711359659830729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,64,128,1,fp8,fp8,0,0.27595200141270954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,fp8,0,0.7712533473968506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,64,0,1,fp8,fp8,0,0.7129706541697184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,float16,0,0.22300267219543457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,float16,0,0.769221305847168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,fp8,0,0.223306675752004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,64,128,1,fp8,fp8,0,0.27405865987141925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,fp8,0,0.7738080024719238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,float16,0,0.22324800491333008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,64,0,1,fp8,fp8,0,0.710197369257609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,fp8,0,0.22365333636601767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,float16,0,0.769599994023641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,64,128,1,fp8,fp8,0,0.27402132749557495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,fp8,0,0.7733173370361328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,float16,0,0.22369066874186197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,64,0,1,fp8,fp8,0,0.7133866945902506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,fp8,0,0.22429333130518594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,float16,0,0.7694239616394043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,64,128,1,fp8,fp8,0,0.27501867214838666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,fp8,0,0.7723093032836914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,float16,0,0.22374399503072104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,64,0,1,fp8,fp8,0,0.7127466996510824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,fp8,0,0.2248213291168213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,float16,0,0.7738933563232422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,64,128,1,fp8,fp8,0,0.2746293346087138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,fp8,0,0.7707146803538004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,64,0,1,fp8,fp8,0,0.7143359978993734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,float16,0,0.8826506932576498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,fp8,0,0.8718346754709879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,64,128,1,fp8,fp8,0,1.1679306825002034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,float16,0,0.9038240114847819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,fp8,0,0.8951839605967203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,64,128,1,fp8,fp8,0,1.1885440349578857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,float16,0,3.960399945576986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,64,0,1,fp8,fp8,0,3.609269460042318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,float16,0,0.9155733585357666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,fp8,0,3.969658533732096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,fp8,0,0.9129439989725748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,float16,0,4.025967915852864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,fp8,0,4.06110413869222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,64,128,1,fp8,fp8,0,1.1993866761525471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,64,0,1,fp8,fp8,0,3.6426080067952475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,float16,0,0.9478773276011149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,float16,0,4.0244747797648115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,fp8,0,0.9404160181681315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,64,128,1,fp8,fp8,0,1.2320000330607097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,fp8,0,4.036527951558431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,float16,0,0.5041386683781942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,64,0,1,fp8,fp8,0,3.7107467651367188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,fp8,0,0.5048906803131104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,float16,0,4.089733441670735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,64,128,1,fp8,fp8,0,0.6736160119374593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,float16,0,2.0651466051737466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,float16,0,0.46343998114267987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,fp8,0,4.0418345133463545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,64,0,1,fp8,fp8,0,3.7175572713216147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,fp8,0,2.0652052561442056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,fp8,0,0.4601493279139201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,64,0,1,fp8,fp8,0,1.9450987180074055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,64,128,1,fp8,fp8,0,0.6144479910532633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,float16,0,2.0282719930013022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,float16,0,0.4697226683298747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,fp8,0,0.465178648630778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,fp8,0,2.002122720082601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,64,128,1,fp8,fp8,0,0.6213173468907675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,64,0,1,fp8,fp8,0,1.8675467173258464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,float16,0,2.0332694053649902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,float16,0,0.47322134176890057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,fp8,0,0.4675840139389038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,fp8,0,2.024474620819092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,64,128,1,fp8,fp8,0,0.6261226733525594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,64,0,1,fp8,fp8,0,1.8742666244506836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,float16,0,2.034282684326172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,float16,0,0.48104532559712726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,fp8,0,2.0470453898111978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,fp8,0,0.4861706495285034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,64,0,1,fp8,fp8,0,1.8587466875712078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,64,128,1,fp8,fp8,0,0.6439520120620728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,float16,0,0.2786986629168193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,float16,0,2.02181339263916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,fp8,0,0.2824373245239258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,fp8,0,2.0549333890279136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,64,128,1,fp8,fp8,0,0.37165868282318115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,float16,0,1.0823413530985515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,64,0,1,fp8,fp8,0,1.9012160301208496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,float16,0,0.2610986630121867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,fp8,0,1.083743969599406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,64,0,1,fp8,fp8,0,0.9563466707865397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,fp8,0,0.2600640058517456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,float16,0,1.0752320289611816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,64,128,1,fp8,fp8,0,0.3423786560694377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,float16,0,0.264085332552592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,fp8,0,1.0563093026479085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,64,0,1,fp8,fp8,0,0.9229013125101725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,fp8,0,0.262170672416687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,float16,0,1.062282641728719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,64,128,1,fp8,fp8,0,0.34464534123738605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,float16,0,0.26582932472229004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,fp8,0,1.0612586339314778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,64,0,1,fp8,fp8,0,0.9307200113932291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,fp8,0,0.2653759916623433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,float16,0,1.060149351755778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,fp8,0,1.0669279893239338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,64,128,1,fp8,fp8,0,0.3480106592178345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,float16,0,0.2701759934425354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,64,0,1,fp8,fp8,0,0.9361066818237305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,fp8,0,0.2715466618537903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,64,128,1,fp8,fp8,0,0.35416531562805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,float16,0,1.0707200368245442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,float16,0,0.19119999806086221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,fp8,0,1.0700853665669758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,64,0,1,fp8,fp8,0,0.9426453113555908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,fp8,0,0.1925599972407023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,float16,0,0.5630826552708944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,64,128,1,fp8,fp8,0,0.23546665906906128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,fp8,0,0.5629599889119467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,float16,0,0.19023466110229492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,64,0,1,fp8,fp8,0,0.5238986810048422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,float16,0,0.562064011891683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,fp8,0,0.1911733349164327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,64,128,1,fp8,fp8,0,0.2342346707979838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,fp8,0,0.5599786837895712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,float16,0,0.190720001856486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,64,0,1,fp8,fp8,0,0.52129065990448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,fp8,0,0.5628480116526285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,float16,0,0.5646933317184448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,fp8,0,0.18994667132695517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,64,128,1,fp8,fp8,0,0.23544534047444662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,64,0,1,fp8,fp8,0,0.5202560027440389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,float16,0,0.19083199898401895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,fp8,0,0.19058134158452353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,float16,0,0.5641866525014242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,64,128,1,fp8,fp8,0,0.23559999465942383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,fp8,0,0.5638506809870402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,float16,0,0.19022399187088013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,64,0,1,fp8,fp8,0,0.5258239905039469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,fp8,0,0.5645866791407267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,fp8,0,0.19078399737675986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,float16,0,0.5643893480300903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,64,128,1,fp8,fp8,0,0.2365866700808207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,64,0,1,fp8,fp8,0,0.5236639976501465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,float16,0,1.3900480270385742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,fp8,0,1.3802560170491536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,64,128,1,fp8,fp8,0,1.832357406616211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,float16,0,1.436570644378662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,fp8,0,1.4043307304382324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,float16,0,5.216026624043782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,64,128,1,fp8,fp8,0,1.8630666732788086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,64,0,1,fp8,fp8,0,4.770378748575847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,float16,0,1.4386080106099446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,fp8,0,5.242682774861653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,float16,0,5.319216092427571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,fp8,0,1.4366614023844402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,fp8,0,5.358773549397786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,64,0,1,fp8,fp8,0,4.83460267384847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,64,128,1,fp8,fp8,0,1.8783733050028484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,float16,0,1.5073973337809246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,fp8,0,1.4792532920837402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,float16,0,5.335594813028972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,64,128,1,fp8,fp8,0,1.9368586540222168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,float16,0,0.7951359748840332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,fp8,0,5.219850540161133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,64,0,1,fp8,fp8,0,4.862501462300618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,fp8,0,0.7858453591664633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,float16,0,5.357397079467773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,64,128,1,fp8,fp8,0,1.041210651397705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,float16,0,2.7139145533243814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,float16,0,0.7088800271352133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,fp8,0,5.35371208190918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,64,0,1,fp8,fp8,0,4.9076534907023115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,fp8,0,2.68778133392334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,fp8,0,0.6986880302429199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,64,0,1,fp8,fp8,0,2.53548796971639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,64,128,1,fp8,fp8,0,0.9375200271606445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,float16,0,2.6466827392578125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,float16,0,0.7247359752655029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,64,0,1,fp8,fp8,0,2.438213348388672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,fp8,0,0.7152106761932373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,fp8,0,2.616005261739095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,64,128,1,fp8,fp8,0,0.9500532944997152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,float16,0,0.73198930422465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,float16,0,2.626021385192871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,fp8,0,0.7189760208129883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,fp8,0,2.660282611846924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,64,0,1,fp8,fp8,0,2.4395039876302085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,64,128,1,fp8,fp8,0,0.9654186566670736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,float16,0,2.6522347132364907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,float16,0,0.7454346815745035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,fp8,0,0.7396159966786703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,fp8,0,2.645520051320394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,64,128,1,fp8,fp8,0,0.990991989771525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,64,0,1,fp8,fp8,0,2.450256029764811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,float16,0,0.403872013092041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,float16,0,2.6477972666422525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,fp8,0,0.40861332416534424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,64,128,1,fp8,fp8,0,0.5421866575876871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,fp8,0,2.667642593383789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,float16,0,1.3860054016113281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,64,0,1,fp8,fp8,0,2.4767093658447266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,float16,0,0.37489600976308185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,fp8,0,1.3802453676859539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,64,0,1,fp8,fp8,0,1.306069294611613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,fp8,0,0.37347733974456787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,64,128,1,fp8,fp8,0,0.49742400646209717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,float16,0,1.3417226473490398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,float16,0,0.37802668412526447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,fp8,0,0.3778453270594279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,64,0,1,fp8,fp8,0,1.2483733495076497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,fp8,0,1.3443412780761719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,64,128,1,fp8,fp8,0,0.502618670463562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,float16,0,1.3643306096394856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,fp8,0,1.3380746841430664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,float16,0,0.3850133419036865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,64,0,1,fp8,fp8,0,1.2520639896392822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,fp8,0,0.3793333371480306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,64,128,1,fp8,fp8,0,0.5083146492640177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,float16,0,1.355072021484375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,float16,0,0.3914986848831177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,fp8,0,1.3626453081766765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,fp8,0,0.38714667161305744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,64,0,1,fp8,fp8,0,1.2556053002675374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,64,128,1,fp8,fp8,0,0.522325317064921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,float16,0,0.2272640069325765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,float16,0,1.3605920473734539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,fp8,0,1.3602986335754395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,fp8,0,0.2297066648801168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,64,0,1,fp8,fp8,0,1.2845760186513264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,float16,0,0.7309280236562093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,64,128,1,fp8,fp8,0,0.3047200043996175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,float16,0,0.2132586638132731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,fp8,0,0.7368586858113607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,64,0,1,fp8,fp8,0,0.6526506741841634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,float16,0,0.7183732986450195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,fp8,0,0.21009600162506104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,64,128,1,fp8,fp8,0,0.2813280026117961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,float16,0,0.21444799502690634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,fp8,0,0.7182933489481608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,64,0,1,fp8,fp8,0,0.6327040195465088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,float16,0,0.7204053401947021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,fp8,0,0.2137226661046346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,64,128,1,fp8,fp8,0,0.28145599365234375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,fp8,0,0.7153973579406738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,float16,0,0.21735467513402304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,64,0,1,fp8,fp8,0,0.6308480103810629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,fp8,0,0.21640533208847046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,float16,0,0.7221333185831705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,64,128,1,fp8,fp8,0,0.2845226724942525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,fp8,0,0.7254666487375895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,64,0,1,fp8,fp8,0,0.6323999961217245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,float16,0,0.21921066443125406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,fp8,0,0.22197333971659342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,float16,0,0.7261333465576172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,64,128,1,fp8,fp8,0,0.29233066240946454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,fp8,0,0.7319839795430502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,float16,0,0.1580586632092794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,64,0,1,fp8,fp8,0,0.6430133183797201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,float16,0,0.3898293177286784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,fp8,0,0.15712533394495645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,64,128,1,fp8,fp8,0,0.19722133874893188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,fp8,0,0.387935996055603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,64,0,1,fp8,fp8,0,0.3664693435033162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,float16,0,0.15517333149909973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,float16,0,0.3896799882253011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,fp8,0,0.15688000122706094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,64,128,1,fp8,fp8,0,0.18774400154749551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,fp8,0,0.3874826828638713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,64,0,1,fp8,fp8,0,0.36233067512512207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,float16,0,0.15576000014940897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,float16,0,0.38982399304707843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,fp8,0,0.1565013329188029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,64,128,1,fp8,fp8,0,0.19437867403030396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,fp8,0,0.38997332255045575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,64,0,1,fp8,fp8,0,0.36110401153564453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,float16,0,0.15661333004633585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,fp8,0,0.15620799859364828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,float16,0,0.3894346555074056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,64,128,1,fp8,fp8,0,0.19553599754969278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,fp8,0,0.3887093464533488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,64,0,1,fp8,fp8,0,0.3654239972432454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,float16,0,0.16140799721082053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,float16,0,0.38708265622456867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,fp8,0,0.15743466218312582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,64,128,1,fp8,fp8,0,0.19664533933003744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,fp8,0,0.3906559944152832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,64,0,1,fp8,fp8,0,0.362826665242513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,float16,0,1.0446826616923015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,fp8,0,1.0241920153299968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,64,128,1,fp8,fp8,0,1.3695200284322102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,float16,0,1.0779146353403728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,float16,0,3.0989440282185874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,fp8,0,1.0575679937998455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,fp8,0,3.1338027318318686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,64,0,1,fp8,fp8,0,2.8815625508626304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,64,128,1,fp8,fp8,0,1.3999679883321126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,float16,0,3.1236000061035156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,float16,0,1.0840319792429607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,fp8,0,3.1167786916097007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,64,0,1,fp8,fp8,0,2.8883094787597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,fp8,0,1.0770186583201091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,64,128,1,fp8,fp8,0,1.4135039647420247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,float16,0,1.1191946665445964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,float16,0,3.2103039423624673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,fp8,0,1.1061440308888753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,fp8,0,3.156895955403646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,64,0,1,fp8,fp8,0,2.903562545776367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,float16,0,0.5910079876581827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,64,128,1,fp8,fp8,0,1.4480907122294109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,float16,0,3.1784585316975913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,fp8,0,0.5909440120061239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,float16,0,1.618501345316569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,64,128,1,fp8,fp8,0,0.7786666552225748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,64,0,1,fp8,fp8,0,2.9655466079711914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,fp8,0,3.2162612279256186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,float16,0,0.5336426496505737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,fp8,0,1.6334400177001953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,64,0,1,fp8,fp8,0,1.5339733759562175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,fp8,0,0.523360013961792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,64,128,1,fp8,fp8,0,0.700154701868693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,float16,0,1.5751466751098633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,float16,0,0.5391466617584229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,fp8,0,1.5680640538533528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,64,0,1,fp8,fp8,0,1.464255968729655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,fp8,0,0.5289973417917887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,64,128,1,fp8,fp8,0,0.711189349492391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,float16,0,1.5810559590657551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,fp8,0,1.5697333017985027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,fp8,0,1.5634719530741374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,float16,0,0.5476106802622477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,64,0,1,fp8,fp8,0,1.4743893941243489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,fp8,0,0.5409333308537801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,float16,0,1.5752426783243816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,64,128,1,fp8,fp8,0,0.7219200134277344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,float16,0,0.5540053447087606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,64,0,1,fp8,fp8,0,1.4749600092569988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,fp8,0,0.5514080127080282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,float16,0,1.5829013188680012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,64,128,1,fp8,fp8,0,0.747920036315918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,fp8,0,1.5801973342895508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,float16,0,0.30827732880910236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,64,0,1,fp8,fp8,0,1.5094505945841472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,float16,0,0.8368746439615885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,fp8,0,0.313482662041982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,64,128,1,fp8,fp8,0,0.41181333859761554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,fp8,0,0.8439040184020996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,64,0,1,fp8,fp8,0,0.7657279968261719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,float16,0,0.2869866689046224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,float16,0,0.820192019144694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,fp8,0,0.2839306592941284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,64,128,1,fp8,fp8,0,0.3787733316421509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,fp8,0,0.8171786467234293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,float16,0,0.28948267300923664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,64,0,1,fp8,fp8,0,0.7296373049418131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,fp8,0,0.28549333413441974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,float16,0,0.8265386422475179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,64,128,1,fp8,fp8,0,0.38277868429819745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,fp8,0,0.8246613343556722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,64,0,1,fp8,fp8,0,0.7335360050201416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,float16,0,0.29306666056315106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,float16,0,0.8245493570963541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,fp8,0,0.28963200251261395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,64,128,1,fp8,fp8,0,0.3861226638158162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,fp8,0,0.8277866840362549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,64,0,1,fp8,fp8,0,0.7377333641052246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,float16,0,0.2991146643956502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,float16,0,0.8293653329213461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,fp8,0,0.29867200056711835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,64,128,1,fp8,fp8,0,0.3954453468322754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,float16,0,0.1776319940884908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,64,128,1,fp8,fp8,0,0.23727999130884805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,fp8,0,0.8319466908772787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,64,0,1,fp8,fp8,0,0.7452267011006674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,float16,0,0.4272586504618327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,fp8,0,0.1800373395284017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,fp8,0,0.433135986328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,64,0,1,fp8,fp8,0,0.4030666748682658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,float16,0,0.1667626698811849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,float16,0,0.4156053463617961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,fp8,0,0.16358400384585062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,64,128,1,fp8,fp8,0,0.2177600065867106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,fp8,0,0.41354668140411377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,64,0,1,fp8,fp8,0,0.38929065068562824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,float16,0,0.16726400454839072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,float16,0,0.4174400170644124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,fp8,0,0.16474666198094687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,64,128,1,fp8,fp8,0,0.2206666668256124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,fp8,0,0.4137920141220093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,64,0,1,fp8,fp8,0,0.3878399928410848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,fp8,0,0.41590933005015057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,float16,0,0.1688693364461263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,float16,0,0.42002665996551514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,fp8,0,0.16885866721471152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,64,128,1,fp8,fp8,0,0.2230400045712789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,64,0,1,fp8,fp8,0,0.3916533390680949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,float16,0,0.17113065719604492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,float16,0,0.4209760030110677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,fp8,0,0.17291200160980225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,64,128,1,fp8,fp8,0,0.2294506629308065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,fp8,0,0.42156267166137695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,64,0,1,fp8,fp8,0,0.3966133197148641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,float16,0,0.12596266468365988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,float16,0,0.24878400564193726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,fp8,0,0.12527466813723245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,64,128,1,fp8,fp8,0,0.15745066603024802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,fp8,0,0.2499786615371704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,64,0,1,fp8,fp8,0,0.23473066091537476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,float16,0,0.12251200278600057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,float16,0,0.24495466550191244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,fp8,0,0.1222879985968272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,64,128,1,fp8,fp8,0,0.14645866552988687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,fp8,0,0.24732800324757895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,64,0,1,fp8,fp8,0,0.2323733369509379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,float16,0,0.12225600083669026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,float16,0,0.24615466594696045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,fp8,0,0.12211733063062032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,64,128,1,fp8,fp8,0,0.15043200055758157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,fp8,0,0.24551467100779215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,64,0,1,fp8,fp8,0,0.23160000642140707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,float16,0,0.12322133779525757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,float16,0,0.2481279969215393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,fp8,0,0.12288533647855122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,64,128,1,fp8,fp8,0,0.15310399731000265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,fp8,0,0.2456159989039103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,64,0,1,fp8,fp8,0,0.23248000939687094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,float16,0,0.12350400288899739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,float16,0,0.2483253280321757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,fp8,0,0.12495999534924825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,64,128,1,fp8,fp8,0,0.1571466624736786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,fp8,0,0.2476159930229187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,64,0,1,fp8,fp8,0,0.23378666241963705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,float16,0,1.3784906069437664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,fp8,0,1.3577280044555664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,64,128,1,fp8,fp8,0,1.8219146728515625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,float16,0,3.014122645060221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,float16,0,1.4114240010579426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,fp8,0,1.3977279663085938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,fp8,0,3.0520480473836265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,64,0,1,fp8,fp8,0,2.838080088297526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,64,128,1,fp8,fp8,0,1.8336000442504883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,float16,0,3.0664052963256836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,float16,0,1.431861400604248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,fp8,0,3.071008046468099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,64,0,1,fp8,fp8,0,2.838442802429199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,fp8,0,1.4266506830851238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,64,128,1,fp8,fp8,0,1.8668319384257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,float16,0,3.128368059794108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,float16,0,1.4681013425191243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,fp8,0,3.064154624938965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,fp8,0,1.4481706619262695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,64,0,1,fp8,fp8,0,2.9009278615315757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,float16,0,3.1474507649739585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,64,128,1,fp8,fp8,0,1.9211573600769043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,float16,0,0.7814453442891439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,fp8,0,3.1266771952311196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,64,0,1,fp8,fp8,0,2.9475199381510415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,fp8,0,0.7668960094451904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,float16,0,1.613141377766927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,64,128,1,fp8,fp8,0,1.0234933694203694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,fp8,0,1.6156427065531414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,64,0,1,fp8,fp8,0,1.5322240193684895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,float16,0,0.6882879734039307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,float16,0,1.5326879819234211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,fp8,0,0.6784106890360514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,64,128,1,fp8,fp8,0,0.9127893447875977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,fp8,0,1.5113600095113118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,64,0,1,fp8,fp8,0,1.4346292813618977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,float16,0,0.7035253047943115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,fp8,0,0.6934613386789957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,64,128,1,fp8,fp8,0,0.9394559860229492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,float16,0,1.538549264272054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,float16,0,0.7144266764322916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,fp8,0,1.5256479581197102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,64,0,1,fp8,fp8,0,1.4472853342692058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,fp8,0,0.7057440280914307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,float16,0,1.557439963022868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,64,128,1,fp8,fp8,0,0.94540802637736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,float16,0,0.7327573299407959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,fp8,0,1.5454079310099285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,64,0,1,fp8,fp8,0,1.4643093744913738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,fp8,0,0.7222560246785482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,float16,0,1.5600852966308594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,64,128,1,fp8,fp8,0,0.9777119954427084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,float16,0,0.3957759936650594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,fp8,0,1.5682719548543294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,64,0,1,fp8,fp8,0,1.4854507446289062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,fp8,0,0.3950986862182617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,float16,0,0.8228693008422852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,64,128,1,fp8,fp8,0,0.5229066610336304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,fp8,0,0.8252639770507812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,float16,0,0.36177066961924237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,64,0,1,fp8,fp8,0,0.7862880229949951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,float16,0,0.7894079685211182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,fp8,0,0.35395201047261554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,64,128,1,fp8,fp8,0,0.47933868567148846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,fp8,0,0.7817707061767578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,64,0,1,fp8,fp8,0,0.7384746869405111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,float16,0,0.36456000804901123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,fp8,0,0.3595679998397827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,float16,0,0.7967573006947836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,64,128,1,fp8,fp8,0,0.4819466670354207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,fp8,0,0.7877333164215088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,float16,0,0.7977973620096842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,float16,0,0.3683146635691325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,64,0,1,fp8,fp8,0,0.7384533087412516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,fp8,0,0.36512001355489093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,64,128,1,fp8,fp8,0,0.4912586609522502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,fp8,0,0.7898666858673096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,float16,0,0.37619733810424805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,64,0,1,fp8,fp8,0,0.7483039697011312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,float16,0,0.8007573286692301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,fp8,0,0.3752693335215251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,64,128,1,fp8,fp8,0,0.5007413228352865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,fp8,0,0.8022933006286621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,float16,0,0.21570134162902832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,64,0,1,fp8,fp8,0,0.759173313776652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,float16,0,0.4367893139521281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,fp8,0,0.217738668123881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,64,128,1,fp8,fp8,0,0.2881760001182556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,fp8,0,0.43910932540893555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,64,0,1,fp8,fp8,0,0.39962132771809894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,float16,0,0.19894933700561523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,float16,0,0.42298134167989093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,fp8,0,0.1973173419634501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,64,128,1,fp8,fp8,0,0.2653653422991435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,fp8,0,0.4178239901860555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,64,0,1,fp8,fp8,0,0.37381335099538165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,float16,0,0.20011200507481894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,float16,0,0.4243626594543457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,fp8,0,0.1986293395360311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,64,128,1,fp8,fp8,0,0.26737066109975177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,fp8,0,0.42130132516225177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,64,0,1,fp8,fp8,0,0.3773920138676961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,float16,0,0.20163200298945108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,float16,0,0.4251946608225505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,fp8,0,0.20112532377243042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,64,128,1,fp8,fp8,0,0.27089067300160724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,fp8,0,0.42582933108011883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,64,0,1,fp8,fp8,0,0.3808853228886922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,float16,0,0.20763200521469116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,float16,0,0.4302133321762085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,fp8,0,0.20635199546813965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,64,128,1,fp8,fp8,0,0.27561599016189575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,fp8,0,0.4275360107421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,64,0,1,fp8,fp8,0,0.3875093460083008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,float16,0,0.12734400232632956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,float16,0,0.22630933920542398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,float16,0,0.2183039983113607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,fp8,0,0.1288533310095469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,64,128,1,fp8,fp8,0,0.17075733343760172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,fp8,0,0.22549333175023398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,64,0,1,fp8,fp8,0,0.2164799968401591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,float16,0,0.11780800422032674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,fp8,0,0.11730133493741353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,64,128,1,fp8,fp8,0,0.15406933426856995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,64,128,1,fp8,fp8,0,0.15676800409952799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,fp8,0,0.21775466203689575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,64,0,1,fp8,fp8,0,0.204858660697937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,float16,0,0.11768533786137898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,float16,0,0.21704532702763876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,fp8,0,0.11716799934705098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,fp8,0,0.21904534101486206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,64,0,1,fp8,fp8,0,0.20547733704249063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,fp8,0,0.21785600980122885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,float16,0,0.119077334801356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,float16,0,0.2198186715443929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,fp8,0,0.12004266182581584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,64,128,1,fp8,fp8,0,0.159770667552948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,64,0,1,fp8,fp8,0,0.20822399854660034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,float16,0,0.12333866953849792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,64,0,1,fp8,fp8,0,0.2108853260676066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,float16,0,0.22224533557891846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,fp8,0,0.12220266461372375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,64,128,1,fp8,fp8,0,0.1628106633822123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,fp8,0,0.22220265865325928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,float16,0,0.09126399954160054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,float16,0,0.1381653348604838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,fp8,0,0.09213866790135701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,64,128,1,fp8,fp8,0,0.11588799953460693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,fp8,0,0.13955199718475342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,64,0,1,fp8,fp8,0,0.13149333000183105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,float16,0,0.0881173312664032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,float16,0,0.1379680037498474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,fp8,0,0.08827733000119527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,64,128,1,fp8,fp8,0,0.10537599523862202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,fp8,0,0.13738666971524557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,64,0,1,fp8,fp8,0,0.1293653349081675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,float16,0,0.08872000376383464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,fp8,0,0.13743999600410461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,float16,0,0.1365546683470408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,fp8,0,0.08877866466840108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,64,128,1,fp8,fp8,0,0.10582933823267619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,64,0,1,fp8,fp8,0,0.1292746663093567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,fp8,0,0.13778133193651834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,float16,0,0.08866133292516072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,float16,0,0.13682666420936584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,fp8,0,0.08902933200200398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,64,128,1,fp8,fp8,0,0.1056160032749176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,64,0,1,fp8,fp8,0,0.1300373375415802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,float16,0,0.08845866719881694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,float16,0,0.13766400019327799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,fp8,0,0.08867733677228291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,64,128,1,fp8,fp8,0,0.10748266180356343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,fp8,0,0.14035733540852866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,64,0,1,fp8,fp8,0,0.12967466314633688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,float16,0,1.0268320242563884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,fp8,0,1.013935963312785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,float16,0,1.8730559349060059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,64,128,1,fp8,fp8,0,1.3507253328959148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,float16,0,1.0432960192362468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,fp8,0,1.8579626083374023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,64,0,1,fp8,fp8,0,1.7647946675618489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,fp8,0,1.0345760186513264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,float16,0,1.893210728963216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,64,128,1,fp8,fp8,0,1.373301347096761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,fp8,0,1.8826452891031902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,float16,0,1.0577600002288818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,64,0,1,fp8,fp8,0,1.7808960278828938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,64,128,1,fp8,fp8,0,1.4016906420389812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,fp8,0,1.0524746576944988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,float16,0,1.9185387293497722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,float16,0,1.0874933401743572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,fp8,0,1.8976106643676758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,64,0,1,fp8,fp8,0,1.801088015238444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,fp8,0,1.0812266667683919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,float16,0,1.9350560506184895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,64,128,1,fp8,fp8,0,1.4306027094523113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,fp8,0,1.9429866472880046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,float16,0,0.5811573266983032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,64,0,1,fp8,fp8,0,1.8434027036031086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,fp8,0,0.57587198416392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,float16,0,1.0132640202840169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,64,128,1,fp8,fp8,0,0.7655946413675944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,fp8,0,1.0049546559651692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,64,0,1,fp8,fp8,0,0.9693813323974609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,float16,0,0.5223306814829508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,float16,0,0.9502613544464111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,fp8,0,0.5113173325856527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,64,128,1,fp8,fp8,0,0.6877439816792806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,fp8,0,0.9420639673868815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,float16,0,0.5243200063705444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,64,0,1,fp8,fp8,0,0.889024019241333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,fp8,0,0.5204960107803345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,float16,0,0.9576693375905355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,64,128,1,fp8,fp8,0,0.6963466803232828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,fp8,0,0.9539199670155843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,64,0,1,fp8,fp8,0,0.9009919961293539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,float16,0,0.5348320007324219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,float16,0,0.9617813428243002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,fp8,0,0.9579892953236898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,fp8,0,0.5266186793645223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,64,128,1,fp8,fp8,0,0.7112212975819906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,float16,0,0.5458453496297201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,64,0,1,fp8,fp8,0,0.9079573154449463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,fp8,0,0.541871984799703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,float16,0,0.9756480058034261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,fp8,0,0.303439994653066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,64,128,1,fp8,fp8,0,0.7240800062815348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,fp8,0,0.9717546304066976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,float16,0,0.30190932750701904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,64,0,1,fp8,fp8,0,0.928874651590983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,float16,0,0.5244106849034628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,64,128,1,fp8,fp8,0,0.4007680018742879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,fp8,0,0.5237226486206055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,64,0,1,fp8,fp8,0,0.4859520196914673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,float16,0,0.27517332633336383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,float16,0,0.49874667326609295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,fp8,0,0.2735146681467692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,64,128,1,fp8,fp8,0,0.36533331871032715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,fp8,0,0.4963359832763672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,64,0,1,fp8,fp8,0,0.4512480099995931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,float16,0,0.278656005859375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,fp8,0,0.2742026646931966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,float16,0,0.5019040107727051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,64,128,1,fp8,fp8,0,0.370410680770874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,fp8,0,0.49952534834543866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,64,0,1,fp8,fp8,0,0.4535253445307414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,float16,0,0.28309865792592365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,float16,0,0.5044746796290079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,fp8,0,0.28170132637023926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,64,128,1,fp8,fp8,0,0.3728213310241699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,fp8,0,0.49987200895945233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,64,0,1,fp8,fp8,0,0.45922664801279706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,float16,0,0.28837867577870685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,float16,0,0.5099200010299683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,fp8,0,0.2873013416926066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,64,128,1,fp8,fp8,0,0.38355199495951336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,fp8,0,0.5084640185038248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,64,0,1,fp8,fp8,0,0.4675626754760742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,float16,0,0.1665386656920115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,float16,0,0.268613338470459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,fp8,0,0.16927466789881387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,64,128,1,fp8,fp8,0,0.2244373361269633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,fp8,0,0.2707039912541707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,64,0,1,fp8,fp8,0,0.25834665695826214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,64,128,1,fp8,fp8,0,0.20732265710830688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,float16,0,0.15357866883277893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,float16,0,0.2531733314196269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,fp8,0,0.15203733245531717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,fp8,0,0.2521653374036153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,64,0,1,fp8,fp8,0,0.2409706711769104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,float16,0,0.15427199999491373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,float16,0,0.2558880050977071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,fp8,0,0.15203199783960977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,64,128,1,fp8,fp8,0,0.2076373298962911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,fp8,0,0.25271467367808026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,64,0,1,fp8,fp8,0,0.2424959937731425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,float16,0,0.15717333555221558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,float16,0,0.25753066937128705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,fp8,0,0.15498133500417074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,64,128,1,fp8,fp8,0,0.21167999505996704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,fp8,0,0.2547253370285034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,64,0,1,fp8,fp8,0,0.24505066871643066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,float16,0,0.16008533040682474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,float16,0,0.26048533121744794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,fp8,0,0.10129599769910176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,fp8,0,0.1606613298257192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,64,128,1,fp8,fp8,0,0.21678932507832846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,fp8,0,0.25960532824198407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,64,0,1,fp8,fp8,0,0.250218669573466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,float16,0,0.09984532992045085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,float16,0,0.14806399742762247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,64,128,1,fp8,fp8,0,0.13766400019327799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,fp8,0,0.14923200011253357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,64,0,1,fp8,fp8,0,0.14566933115323386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,float16,0,0.09185066819190979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,float16,0,0.1393333375453949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,fp8,0,0.09181867043177287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,64,128,1,fp8,fp8,0,0.11593066652615865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,fp8,0,0.14061333735783896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,64,0,1,fp8,fp8,0,0.13542399803797403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,float16,0,0.0923520028591156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,float16,0,0.1409280002117157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,fp8,0,0.092357337474823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,64,128,1,fp8,fp8,0,0.11606933673222859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,fp8,0,0.14123200376828512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,64,0,1,fp8,fp8,0,0.1348960002263387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,float16,0,0.09372267127037048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,float16,0,0.14299733440081278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,fp8,0,0.09299199779828389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,64,128,1,fp8,fp8,0,0.12485866745313008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,fp8,0,0.14241600036621094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,64,0,1,fp8,fp8,0,0.13623467087745667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,float16,0,0.09665066997210185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,float16,0,0.14452800154685974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,fp8,0,0.09675733248392741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,64,128,1,fp8,fp8,0,0.1318933367729187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,fp8,0,0.1442293326059977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,64,0,1,fp8,fp8,0,0.14179733395576477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,float16,0,0.07194666564464569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,float16,0,0.09567466378211975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,fp8,0,0.0710506687561671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,64,128,1,fp8,fp8,0,0.09250666697820027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,fp8,0,0.09660800298055013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,64,0,1,fp8,fp8,0,0.09198400378227234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,float16,0,0.06930666665236156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,float16,0,0.09567999839782715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,fp8,0,0.07002666592597961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,64,128,1,fp8,fp8,0,0.08506133159001668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,fp8,0,0.09636800487836202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,64,0,1,fp8,fp8,0,0.09114133318265279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,float16,0,0.06937600175539653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,float16,0,0.09609066446622212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,fp8,0,0.0691840002934138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,fp8,0,0.07000533243020375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,64,128,1,fp8,fp8,0,0.08596799770991008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,fp8,0,0.09582400321960449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,64,0,1,fp8,fp8,0,0.09079999725023906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,float16,0,0.07054933408896129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,float16,0,0.0957973301410675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,64,128,1,fp8,fp8,0,0.08816533287366231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,fp8,0,0.0958026647567749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,64,0,1,fp8,fp8,0,0.09123200178146362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,float16,0,0.07008533179759979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,float16,0,0.09622933467229207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,fp8,0,0.06989333530267079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,64,128,1,fp8,fp8,0,0.08867733677228291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,fp8,0,0.09538666407267253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,64,0,1,fp8,fp8,0,0.09239466985066731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,float16,0,1.3620266914367676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,float16,0,1.9664746920267742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,fp8,0,1.3341387112935383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,64,128,1,fp8,fp8,0,1.756608009338379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,fp8,0,1.9503733317057292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,float16,0,1.357434590657552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,64,0,1,fp8,fp8,0,1.8289333979288738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,fp8,0,1.3414239883422852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,float16,0,1.9935946464538574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,64,128,1,fp8,fp8,0,1.7830185890197754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,fp8,0,1.9540905952453613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,64,0,1,fp8,fp8,0,1.8572853406270344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,float16,0,1.37553071975708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,fp8,0,1.3584693272908528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,float16,0,1.9919145901997883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,64,128,1,fp8,fp8,0,1.8085333506266277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,fp8,0,1.9797546068827312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,float16,0,1.4157172838846843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,64,0,1,fp8,fp8,0,1.8808959325154622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,fp8,0,1.3937759399414062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,float16,0,2.0312959353129068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,64,128,1,fp8,fp8,0,1.8489759763081868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,float16,0,0.7587839762369791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,fp8,0,2.00764799118042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,64,0,1,fp8,fp8,0,1.9198826154073079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,float16,0,1.0690666834513347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,fp8,0,0.7564746538798014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,64,128,1,fp8,fp8,0,0.9983946482340494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,fp8,0,1.0739946365356445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,64,0,1,fp8,fp8,0,1.0404746532440186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,float16,0,0.6815573374430338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,float16,0,0.9902986685434977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,fp8,0,0.6681386629740397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,64,128,1,fp8,fp8,0,0.8911146322886149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,fp8,0,0.9774719874064127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,64,0,1,fp8,fp8,0,0.9253706932067871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,float16,0,0.6914026737213135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,float16,0,1.003754695256551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,fp8,0,0.6800159613291422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,64,128,1,fp8,fp8,0,0.9013120333353678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,fp8,0,0.9970026810963949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,64,0,1,fp8,fp8,0,0.935861349105835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,float16,0,0.6942666371663412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,float16,0,1.0034080346425374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,fp8,0,0.6899893283843994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,64,128,1,fp8,fp8,0,0.9179253578186035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,fp8,0,0.9996586640675863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,64,0,1,fp8,fp8,0,0.9491573174794515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,float16,0,0.7124426364898682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,float16,0,1.0177866617838542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,fp8,0,0.7062613169352213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,64,128,1,fp8,fp8,0,0.9318880240122477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,fp8,0,1.0213119983673096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,64,0,1,fp8,fp8,0,0.9752480189005533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,float16,0,0.38904531796773273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,float16,0,0.5459626515706381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,fp8,0,0.3893653154373169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,64,128,1,fp8,fp8,0,0.5119786659876505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,fp8,0,0.5480266809463501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,64,0,1,fp8,fp8,0,0.525605320930481
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,float16,0,0.3539359966913859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,float16,0,0.5148906707763672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,fp8,0,0.3475573460261027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,64,128,1,fp8,fp8,0,0.46463465690612793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,fp8,0,0.5083413521448771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,64,0,1,fp8,fp8,0,0.47997868061065674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,float16,0,0.3561280171076457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,float16,0,0.515941341718038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,fp8,0,0.35239466031392414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,64,128,1,fp8,fp8,0,0.4734026590983073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,fp8,0,0.5116373300552368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,64,0,1,fp8,fp8,0,0.48422932624816895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,float16,0,0.3610453208287557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,float16,0,0.5163146654764811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,fp8,0,0.35731732845306396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,64,128,1,fp8,fp8,0,0.47738667329152423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,fp8,0,0.5171999931335449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,64,0,1,fp8,fp8,0,0.4928319851557414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,float16,0,0.36664001146952313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,float16,0,0.5236959854761759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,fp8,0,0.36720001697540283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,64,128,1,fp8,fp8,0,0.48609066009521484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,fp8,0,0.5242933432261149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,float16,0,0.20868800083796182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,64,0,1,fp8,fp8,0,0.5016853411992391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,float16,0,0.29209067424138385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,float16,0,0.27476267019907635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,fp8,0,0.20990933974583945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,64,128,1,fp8,fp8,0,0.2776693304379781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,fp8,0,0.2934986750284831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,64,0,1,fp8,fp8,0,0.26900800069173175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,float16,0,0.19104532400767008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,fp8,0,0.18672533830006918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,64,128,1,fp8,fp8,0,0.25642667214075726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,fp8,0,0.27323200305302936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,64,0,1,fp8,fp8,0,0.2486613392829895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,float16,0,0.19155732790629068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,float16,0,0.27697600920995075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,fp8,0,0.1886613368988037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,64,128,1,fp8,fp8,0,0.25845332940419513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,fp8,0,0.27246399720509845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,64,0,1,fp8,fp8,0,0.2523786624272664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,float16,0,0.19662400086720785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,float16,0,0.27872000137964886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,fp8,0,0.19487466414769491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,64,128,1,fp8,fp8,0,0.2590346733729045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,fp8,0,0.277349332968394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,64,0,1,fp8,fp8,0,0.25365867217381793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,float16,0,0.19909334182739258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,float16,0,0.28200000524520874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,fp8,0,0.19933867454528809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,64,128,1,fp8,fp8,0,0.26547733942667645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,fp8,0,0.28019734223683673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,64,0,1,fp8,fp8,0,0.2601439952850342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,float16,0,0.11787199974060059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,float16,0,0.15263467033704123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,fp8,0,0.12040533622105916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,64,128,1,fp8,fp8,0,0.16065067052841187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,fp8,0,0.1532373329003652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,64,0,1,fp8,fp8,0,0.14812800288200378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,float16,0,0.10770666599273682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,float16,0,0.14250133434931436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,fp8,0,0.1055413285891215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,64,128,1,fp8,fp8,0,0.1453439990679423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,fp8,0,0.1404906709988912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,64,0,1,fp8,fp8,0,0.13549333810806274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,float16,0,0.10846400260925293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,float16,0,0.14134400089581808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,fp8,0,0.10796266794204712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,64,128,1,fp8,fp8,0,0.14723733067512512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,fp8,0,0.14043200016021729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,64,0,1,fp8,fp8,0,0.1369706690311432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,float16,0,0.11038933197657268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,float16,0,0.1437226633230845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,fp8,0,0.10910399754842122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,64,128,1,fp8,fp8,0,0.14839466412862143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,fp8,0,0.14324266711870828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,64,0,1,fp8,fp8,0,0.14029866456985474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,float16,0,0.11275733510653178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,float16,0,0.14756799737612405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,float16,0,0.08741333087285359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,fp8,0,0.114464004834493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,64,128,1,fp8,fp8,0,0.15523733695348105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,fp8,0,0.14570132891337076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,64,0,1,fp8,fp8,0,0.14274133245150247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,float16,0,0.07281066477298737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,fp8,0,0.07397333284219106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,64,128,1,fp8,fp8,0,0.10204799969991048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,fp8,0,0.08826133608818054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,64,0,1,fp8,fp8,0,0.08844799796740214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,float16,0,0.06860800087451935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,float16,0,0.08307733138402303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,fp8,0,0.06797333558400472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,64,128,1,fp8,fp8,0,0.08525333801905315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,fp8,0,0.08338133494059245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,64,0,1,fp8,fp8,0,0.08044266700744629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,float16,0,0.0688213308652242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,float16,0,0.08386666576067607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,fp8,0,0.06871466835339864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,64,128,1,fp8,fp8,0,0.08559999863306682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,64,128,1,fp8,fp8,0,0.0872373382250468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,fp8,0,0.08284799754619598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,64,0,1,fp8,fp8,0,0.08051200211048126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,float16,0,0.06849599877993266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,float16,0,0.08456533153851827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,fp8,0,0.06949866811434428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,fp8,0,0.08424533406893413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,64,128,1,fp8,fp8,0,0.08959999680519104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,64,0,1,fp8,fp8,0,0.08177066842714946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,float16,0,0.0713866651058197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,float16,0,0.08434133728345235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,fp8,0,0.07080533107121785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,fp8,0,0.08608000477155049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,64,0,1,fp8,fp8,0,0.08387733499209087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,float16,0,0.05412266651789347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,float16,0,0.059978668888409935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,fp8,0,0.05468266705671946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,64,128,1,fp8,fp8,0,0.06579199930032094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,fp8,0,0.060421332716941833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,64,0,1,fp8,fp8,0,0.0582826683918635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,float16,0,0.05426133175690969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,float16,0,0.06035199761390686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,fp8,0,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,64,128,1,fp8,fp8,0,0.06446933249632518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,fp8,0,0.060965334375699363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,64,0,1,fp8,fp8,0,0.058789332707722984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,float16,0,0.053930665055910744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,float16,0,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,fp8,0,0.05444799860318502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,64,128,1,fp8,fp8,0,0.06517866750558217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,fp8,0,0.06038933495680491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,64,0,1,fp8,fp8,0,0.0576853354771932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,float16,0,0.05442133545875549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,float16,0,0.06099733213583628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,fp8,0,0.05369600156943003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,64,128,1,fp8,fp8,0,0.06551999847094218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,fp8,0,0.06132799883683523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,64,0,1,fp8,fp8,0,0.05774933099746704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,float16,0,0.054229333996772766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,float16,0,0.06066666543483734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,fp8,0,0.05463466544946035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,64,128,1,fp8,fp8,0,0.06551999847094218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,fp8,0,0.059658666451772056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,64,0,1,fp8,fp8,0,0.058362667759259544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,float16,0,1.0159306526184082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,float16,0,1.2919572989145915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,fp8,0,0.994704008102417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,64,128,1,fp8,fp8,0,1.313482681910197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,fp8,0,1.281984011332194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,64,0,1,fp8,fp8,0,1.1933973630269368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,float16,0,1.0153226852416992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,float16,0,1.3066133658091228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,fp8,0,1.0034613609313965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,64,128,1,fp8,fp8,0,1.3318506876627605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,64,0,1,fp8,fp8,0,1.2220746676127117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,fp8,0,1.2810986836751301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,float16,0,1.0324853261311848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,float16,0,1.3121333122253418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,fp8,0,1.0178399880727131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,64,128,1,fp8,fp8,0,1.364181359608968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,fp8,0,1.3053599993387859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,64,0,1,fp8,fp8,0,1.2412746747334797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,float16,0,1.059338649113973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,float16,0,1.339962641398112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,fp8,0,1.0516160329182942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,64,128,1,fp8,fp8,0,1.3873546918233235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,float16,0,0.5728586514790853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,fp8,0,1.3351306915283203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,64,0,1,fp8,fp8,0,1.2876053651173909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,float16,0,0.7155840396881104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,fp8,0,0.5707733233769735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,64,128,1,fp8,fp8,0,0.7483200232187907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,fp8,0,0.714458703994751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,64,0,1,fp8,fp8,0,0.6861706574757894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,float16,0,0.5148053169250488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,float16,0,0.6574133237202963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,fp8,0,0.5056853294372559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,64,128,1,fp8,fp8,0,0.6729493141174316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,fp8,0,0.6485493183135986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,64,0,1,fp8,fp8,0,0.6107946634292603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,float16,0,0.5200373331705729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,float16,0,0.6594773530960083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,fp8,0,0.5137973229090372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,64,128,1,fp8,fp8,0,0.68012801806132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,fp8,0,0.6594506502151489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,64,0,1,fp8,fp8,0,0.6218080123265585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,float16,0,0.5261439879735311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,float16,0,0.66921599706014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,fp8,0,0.5218559900919596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,64,128,1,fp8,fp8,0,0.6888746420542399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,fp8,0,0.6668053468068441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,64,0,1,fp8,fp8,0,0.6299839814503988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,float16,0,0.540229320526123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,float16,0,0.6809226671854655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,fp8,0,0.5359786748886108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,64,128,1,fp8,fp8,0,0.7106719811757406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,fp8,0,0.6794400215148926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,64,0,1,fp8,fp8,0,0.647706667582194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,float16,0,0.2972319920857747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,float16,0,0.3686506748199463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,fp8,0,0.2974986632664998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,64,128,1,fp8,fp8,0,0.3916906515757243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,fp8,0,0.3710933526357015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,64,0,1,fp8,fp8,0,0.34825066725413006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,float16,0,0.2688373327255249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,float16,0,0.3426719903945923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,fp8,0,0.26545600096384686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,64,128,1,fp8,fp8,0,0.35633599758148193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,fp8,0,0.3407520055770874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,64,0,1,fp8,fp8,0,0.3127466638882955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,float16,0,0.27163199583689374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,float16,0,0.3466453154881795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,fp8,0,0.26796799898147583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,64,128,1,fp8,fp8,0,0.3595199982325236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,fp8,0,0.344218651453654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,64,0,1,fp8,fp8,0,0.3184586763381958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,float16,0,0.27612266937891644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,float16,0,0.34940266609191895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,fp8,0,0.27405865987141925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,64,128,1,fp8,fp8,0,0.364464004834493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,fp8,0,0.3468426863352458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,64,0,1,fp8,fp8,0,0.32130134105682373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,float16,0,0.281333327293396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,float16,0,0.3537280162175496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,fp8,0,0.281605343023936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,64,128,1,fp8,fp8,0,0.37460267543792725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,fp8,0,0.3566186825434367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,64,0,1,fp8,fp8,0,0.33034666379292804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,float16,0,0.16245333353678384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,float16,0,0.1939093271891276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,fp8,0,0.16375466187795004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,64,128,1,fp8,fp8,0,0.2184106707572937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,fp8,0,0.19881600141525269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,64,0,1,fp8,fp8,0,0.18721065918604532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,float16,0,0.14726932843526205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,float16,0,0.175546665986379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,fp8,0,0.14639467000961304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,64,128,1,fp8,fp8,0,0.19754666090011597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,fp8,0,0.1743946671485901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,64,0,1,fp8,fp8,0,0.16863999764124551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,float16,0,0.14850667119026184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,float16,0,0.17673067251841226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,fp8,0,0.146096001068751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,64,128,1,fp8,fp8,0,0.19828800360361734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,fp8,0,0.17522666851679483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,64,0,1,fp8,fp8,0,0.1707786719004313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,float16,0,0.15107733011245728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,float16,0,0.18046400944391885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,fp8,0,0.14989333351453146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,64,128,1,fp8,fp8,0,0.20466132958730063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,fp8,0,0.17761067549387613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,64,0,1,fp8,fp8,0,0.17547200123469034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,float16,0,0.15397333105405173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,float16,0,0.18402133385340372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,fp8,0,0.15500266353289285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,64,128,1,fp8,fp8,0,0.20805867513020834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,fp8,0,0.18338133891423544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,64,0,1,fp8,fp8,0,0.1780746579170227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,float16,0,0.09346133470535278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,float16,0,0.10656533638636272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,fp8,0,0.09485866626103719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,64,128,1,fp8,fp8,0,0.12895466883977255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,fp8,0,0.1072266697883606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,64,0,1,fp8,fp8,0,0.10601066549619038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,float16,0,0.08389332890510559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,float16,0,0.09724799791971843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,fp8,0,0.083146666487058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,64,128,1,fp8,fp8,0,0.11019200086593628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,fp8,0,0.0949173370997111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,64,0,1,fp8,fp8,0,0.09330667058626811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,float16,0,0.08460799853006999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,float16,0,0.09822932879130046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,fp8,0,0.08412800232569377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,64,128,1,fp8,fp8,0,0.11116799712181091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,fp8,0,0.09676800171534221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,64,0,1,fp8,fp8,0,0.09425066908200581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,float16,0,0.08648533622423808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,float16,0,0.09970133503278096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,fp8,0,0.08653866251309712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,64,128,1,fp8,fp8,0,0.11540800333023071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,fp8,0,0.09903466701507568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,64,0,1,fp8,fp8,0,0.09665066997210185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,float16,0,0.08890666564305623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,float16,0,0.10170132915178935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,fp8,0,0.0886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,64,128,1,fp8,fp8,0,0.12388267119725545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,fp8,0,0.1023573378721873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,64,0,1,fp8,fp8,0,0.10156266887982686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,float16,0,0.05710933109124502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,float16,0,0.0634986658891042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,fp8,0,0.057114665706952415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,64,128,1,fp8,fp8,0,0.08095466593901317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,fp8,0,0.06306666632493337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,64,0,1,fp8,fp8,0,0.06398400167624156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,float16,0,0.05473599831263224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,float16,0,0.061253334085146584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,fp8,0,0.05530133346716563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,64,128,1,fp8,fp8,0,0.07102400064468384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,fp8,0,0.06136000156402588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,64,0,1,fp8,fp8,0,0.05930666625499725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,float16,0,0.054287999868392944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,float16,0,0.062165334820747375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,fp8,0,0.05479466418425242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,64,128,1,fp8,fp8,0,0.0728000005086263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,fp8,0,0.062165334820747375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,64,0,1,fp8,fp8,0,0.05969599882761637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,float16,0,0.05542399982611338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,float16,0,0.061679999033610024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,fp8,0,0.054586668809254967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,64,128,1,fp8,fp8,0,0.07371733089288075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,fp8,0,0.06177600224812826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,64,0,1,fp8,fp8,0,0.06046933432420095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,float16,0,0.056314667065938316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,float16,0,0.06289066871007283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,fp8,0,0.05612266560395559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,64,128,1,fp8,fp8,0,0.07481599847475688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,fp8,0,0.06208533545335134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,64,0,1,fp8,fp8,0,0.06140799820423126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,float16,0,0.044821331898371376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,float16,0,0.04747733473777771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,fp8,0,0.04515199859937032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,64,128,1,fp8,fp8,0,0.05129600067933401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,fp8,0,0.047594666481018066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,64,0,1,fp8,fp8,0,0.04534400006135305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,float16,0,0.04823466638724009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,float16,0,0.04734933376312256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,fp8,0,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,64,128,1,fp8,fp8,0,0.05106666684150696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,fp8,0,0.047226667404174805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,64,0,1,fp8,fp8,0,0.04510400195916494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,float16,0,0.04445866743723551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,float16,0,0.04731200138727824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,fp8,0,0.04486933350563049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,fp8,0,0.047695999344189964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,64,128,1,fp8,fp8,0,0.051370665431022644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,fp8,0,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,64,0,1,fp8,fp8,0,0.044810667634010315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,float16,0,0.04515733321507772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,float16,0,0.04721599817276001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,64,128,1,fp8,fp8,0,0.05117333432038625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,fp8,0,0.04693333307902018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,64,0,1,fp8,fp8,0,0.044394666949907936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,float16,0,0.04935466746489207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,float16,0,0.04775999983151754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,fp8,0,0.04517333209514618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,64,128,1,fp8,fp8,0,0.05062933266162872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,fp8,0,0.04704533517360687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,64,0,1,fp8,fp8,0,0.04497066636880239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,float16,0,1.21505602200826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,float16,0,1.3854400316874187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,fp8,0,1.223306655883789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,64,128,1,fp8,fp8,0,1.6483306884765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,fp8,0,1.3856693903605144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,64,0,1,fp8,fp8,0,1.284874677658081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,float16,0,1.226159969965617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,float16,0,1.3843199412027996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,fp8,0,1.2287999788920085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,fp8,0,1.4035199483235676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,64,128,1,fp8,fp8,0,1.6553120613098145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,64,0,1,fp8,fp8,0,1.310149351755778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,float16,0,1.2635680039723713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,float16,0,1.4271893501281738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,fp8,0,1.2653333346048992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,64,128,1,fp8,fp8,0,1.7158026695251465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,fp8,0,1.4299999872843425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,64,0,1,fp8,fp8,0,1.3787733713785808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,float16,0,1.2581546306610107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,float16,0,1.4264373779296875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,fp8,0,1.2725706895192463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,64,128,1,fp8,fp8,0,1.7286453247070312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,fp8,0,1.4400320053100586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,64,0,1,fp8,fp8,0,1.38155730565389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,float16,0,0.678389310836792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,float16,0,0.7621653079986572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,fp8,0,0.6675093173980713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,64,128,1,fp8,fp8,0,0.8940693537394205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,fp8,0,0.7517866293589274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,64,0,1,fp8,fp8,0,0.7053226629892985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,float16,0,0.6233599980672201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,float16,0,0.6985973517100016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,fp8,0,0.6218400001525879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,64,128,1,fp8,fp8,0,0.8369279702504476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,fp8,0,0.6991893450419108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,64,0,1,fp8,fp8,0,0.6516693433125814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,float16,0,0.6237013339996338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,float16,0,0.7039840221405029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,fp8,0,0.627402663230896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,64,128,1,fp8,fp8,0,0.836837371190389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,fp8,0,0.7084373633066813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,64,0,1,fp8,fp8,0,0.666159987449646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,float16,0,0.6369280020395914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,float16,0,0.7219626903533936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,fp8,0,0.6391253471374512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,64,128,1,fp8,fp8,0,0.8638400236765543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,fp8,0,0.7275413672129313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,64,0,1,fp8,fp8,0,0.6832266648610433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,float16,0,0.6419733365376791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,float16,0,0.7271680037180582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,fp8,0,0.641541322072347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,64,128,1,fp8,fp8,0,0.8821280002593994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,fp8,0,0.7221013704935709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,64,0,1,fp8,fp8,0,0.6991093158721924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,float16,0,0.3524266481399536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,float16,0,0.3919680118560791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,fp8,0,0.34935466448465985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,64,128,1,fp8,fp8,0,0.4633920192718506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,fp8,0,0.3882720073064168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,64,0,1,fp8,fp8,0,0.36434133847554523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,float16,0,0.32336533069610596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,float16,0,0.3636106650034587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,fp8,0,0.3233226736386617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,64,128,1,fp8,fp8,0,0.43062400817871094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,fp8,0,0.365013321240743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,64,0,1,fp8,fp8,0,0.3388959964116414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,float16,0,0.32823999722798664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,float16,0,0.36739734808603924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,fp8,0,0.32520532608032227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,64,128,1,fp8,fp8,0,0.43590935071309406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,fp8,0,0.36934932072957355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,64,0,1,fp8,fp8,0,0.33957334359486896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,float16,0,0.333077331384023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,float16,0,0.37751468022664386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,fp8,0,0.3342026472091675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,64,128,1,fp8,fp8,0,0.44649600982666016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,fp8,0,0.3739306529362996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,64,0,1,fp8,fp8,0,0.3508426745732625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,float16,0,0.3366560141245524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,float16,0,0.37486398220062256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,fp8,0,0.33528534571329754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,64,128,1,fp8,fp8,0,0.45254401365915936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,fp8,0,0.3739200035730998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,64,0,1,fp8,fp8,0,0.3595946629842122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,float16,0,0.1908586621284485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,float16,0,0.21275200446446738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,fp8,0,0.18986133734385172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,64,128,1,fp8,fp8,0,0.24702399969100952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,fp8,0,0.21204266945521036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,64,0,1,fp8,fp8,0,0.18955200910568237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,float16,0,0.1747466723124186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,float16,0,0.19885333379109701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,fp8,0,0.17537599802017212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,64,128,1,fp8,fp8,0,0.23432532946268717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,fp8,0,0.20004800955454508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,64,0,1,fp8,fp8,0,0.176639993985494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,float16,0,0.17760000626246134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,float16,0,0.19989866018295288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,fp8,0,0.17702933152516684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,64,128,1,fp8,fp8,0,0.23918400208155313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,fp8,0,0.19754666090011597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,64,0,1,fp8,fp8,0,0.18042133251825967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,float16,0,0.1792693336804708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,float16,0,0.2034026583035787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,fp8,0,0.18108266592025757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,64,128,1,fp8,fp8,0,0.24126400550206503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,fp8,0,0.20336000124613443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,64,0,1,fp8,fp8,0,0.18267732858657837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,float16,0,0.181386669476827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,float16,0,0.2056480050086975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,fp8,0,0.1805866758028666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,64,128,1,fp8,fp8,0,0.24247999986012778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,fp8,0,0.20493332544962564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,64,0,1,fp8,fp8,0,0.18198400735855103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,fp8,0,0.11191999912261963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,float16,0,0.10930132865905762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,float16,0,0.11449066797892253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,fp8,0,0.10875733693440755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,64,128,1,fp8,fp8,0,0.14380266269048056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,64,0,1,fp8,fp8,0,0.10522133111953735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,float16,0,0.09799999992052714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,64,0,1,fp8,fp8,0,0.09671466549237569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,float16,0,0.10390399893124898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,fp8,0,0.09642133116722107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,64,128,1,fp8,fp8,0,0.13186132907867432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,fp8,0,0.10498666763305664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,float16,0,0.09975999593734741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,float16,0,0.10680533448855083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,fp8,0,0.09914666414260864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,64,128,1,fp8,fp8,0,0.13718400398890176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,64,128,1,fp8,fp8,0,0.13290666540463766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,fp8,0,0.10549333691596985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,64,0,1,fp8,fp8,0,0.09744000434875488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,float16,0,0.10181333621342976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,float16,0,0.10787733395894368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,fp8,0,0.10224533081054688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,fp8,0,0.1086346705754598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,64,0,1,fp8,fp8,0,0.10020800431569417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,float16,0,0.1043839951356252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,float16,0,0.10910933216412862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,fp8,0,0.10373866558074951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,64,128,1,fp8,fp8,0,0.1376426617304484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,fp8,0,0.10939733187357585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,64,0,1,fp8,fp8,0,0.10122133294741313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,float16,0,0.06417599817117055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,float16,0,0.06622399886449178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,fp8,0,0.06328533093134563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,64,128,1,fp8,fp8,0,0.0846666693687439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,fp8,0,0.0641653339068095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,64,0,1,fp8,fp8,0,0.061280002196629844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,float16,0,0.05871466795603434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,float16,0,0.06061333417892456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,fp8,0,0.05851200222969055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,64,128,1,fp8,fp8,0,0.07693333427111308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,fp8,0,0.060346667965253196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,64,0,1,fp8,fp8,0,0.05625600119431814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,float16,0,0.059119999408721924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,float16,0,0.060746664802233376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,fp8,0,0.060085331400235496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,64,128,1,fp8,fp8,0,0.07760000228881836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,fp8,0,0.06128533184528351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,64,0,1,fp8,fp8,0,0.0574239989121755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,float16,0,0.060453335444132485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,float16,0,0.06200533111890157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,fp8,0,0.06017066538333893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,64,128,1,fp8,fp8,0,0.07832533121109009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,fp8,0,0.06232533355553945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,64,0,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,float16,0,0.06218666831652323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,float16,0,0.06330666442712148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,fp8,0,0.06234666705131531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,64,128,1,fp8,fp8,0,0.08286933104197185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,fp8,0,0.06281066437562306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,fp8,0,0.03997333347797394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,64,0,1,fp8,fp8,0,0.05917333563168844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,float16,0,0.04180799921353658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,float16,0,0.040789333482583366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,fp8,0,0.042303999265034996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,64,128,1,fp8,fp8,0,0.05102399984995524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,64,128,1,fp8,fp8,0,0.05279466509819031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,64,0,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,float16,0,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,float16,0,0.03782933453718821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,fp8,0,0.04018666595220566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,fp8,0,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,64,0,1,fp8,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,float16,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,float16,0,0.038575999438762665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,fp8,0,0.04004266609748205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,64,128,1,fp8,fp8,0,0.04987733562787374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,fp8,0,0.03813866774241129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,64,0,1,fp8,fp8,0,0.03640533238649368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,float16,0,0.04081599911053976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,64,0,1,fp8,fp8,0,0.037077332536379494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,float16,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,float16,0,0.040549332896868386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,fp8,0,0.04127999891837438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,64,128,1,fp8,fp8,0,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,float16,0,0.04137066751718521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,fp8,0,0.04192000130812327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,64,128,1,fp8,fp8,0,0.0525439977645874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,fp8,0,0.03994133323431015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,64,0,1,fp8,fp8,0,0.03716800113519033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,float16,0,0.027669332921504974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,float16,0,0.029909332593282063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,64,128,1,fp8,fp8,0,0.03430933256944021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,fp8,0,0.029738667110602062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,64,0,1,fp8,fp8,0,0.028933333853880566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,float16,0,0.026261332134405773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,float16,0,0.029029332101345062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,fp8,0,0.02658133457104365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,64,128,1,fp8,fp8,0,0.032261334359645844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,fp8,0,0.028586665789286297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,64,0,1,fp8,fp8,0,0.027957332630952198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,float16,0,0.026389333109060924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,float16,0,0.028858666618665058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,fp8,0,0.026543999711672466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,64,128,1,fp8,fp8,0,0.0340639998515447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,fp8,0,0.028890666862328846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,64,0,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,float16,0,0.026373334228992462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,float16,0,0.029088000456492107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,64,128,1,fp8,fp8,0,0.03482666611671448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,fp8,0,0.02871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,64,0,1,fp8,fp8,0,0.02900800108909607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,float16,0,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,float16,0,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,64,128,1,fp8,fp8,0,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,64,0,1,fp8,fp8,0,0.028880000114440918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,1,64,128,1,float16,float16,0,1.1596266428629558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,1,64,0,1,float16,float16,0,1.1391039689381917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,1,64,128,1,float16,fp8,0,1.1551466782887776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,1,64,128,1,fp8,fp8,0,1.5302400588989258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,1,64,0,1,float16,fp8,0,1.1356639862060547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,2,64,128,1,float16,fp8,0,1.1697440147399902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,1,64,0,1,fp8,fp8,0,1.0313706398010254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,2,64,128,1,float16,float16,0,1.1642613410949707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,2,64,0,1,float16,float16,0,1.1518399715423584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,2,64,0,1,fp8,fp8,0,1.0583306948343914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,2,64,0,1,float16,fp8,0,1.146405299504598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,2,64,128,1,fp8,fp8,0,1.5669066111246746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,4,64,0,1,float16,float16,0,1.195466677347819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,4,64,128,1,float16,float16,0,1.2072319984436035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,4,64,128,1,float16,fp8,0,1.1947786808013916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,4,64,128,1,fp8,fp8,0,1.62500794728597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,4,64,0,1,float16,fp8,0,1.1843146483103435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,4,64,0,1,fp8,fp8,0,1.1155786514282227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,8,64,128,1,float16,float16,0,1.1977120240529378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,8,64,0,1,float16,float16,0,1.187461296717326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,8,64,128,1,float16,fp8,0,1.2160639762878418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,8,64,0,1,float16,fp8,0,1.1871519883473713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,8,64,128,1,fp8,fp8,0,1.644997278849284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,8,64,0,1,fp8,fp8,0,1.1346506277720134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,16,64,128,1,float16,float16,0,0.6469546556472778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,16,64,0,1,float16,float16,0,0.6317546765009562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,16,64,128,1,float16,fp8,0,0.6363573471705118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,16,64,0,1,float16,fp8,0,0.629034678141276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,16,64,128,1,fp8,fp8,0,0.843839963277181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,16,64,0,1,fp8,fp8,0,0.5782293478647867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,1,64,128,1,float16,float16,0,0.5899893442789713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,1,64,0,1,float16,float16,0,0.5768053531646729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,1,64,128,1,float16,fp8,0,0.5917813380559286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,2,64,0,1,float16,float16,0,0.5787893136342367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,1,64,128,1,fp8,fp8,0,0.7813279628753662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,1,64,0,1,float16,fp8,0,0.5781013170878092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,1,64,0,1,fp8,fp8,0,0.5292426745096842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,2,64,128,1,float16,float16,0,0.5948479970296224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,2,64,128,1,float16,fp8,0,0.5948319832483927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,2,64,0,1,float16,fp8,0,0.5851253271102905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,2,64,128,1,fp8,fp8,0,0.7936480045318604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,2,64,0,1,fp8,fp8,0,0.5356693267822266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,4,64,128,1,float16,float16,0,0.607258677482605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,4,64,0,1,float16,float16,0,0.5972319841384888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,4,64,128,1,float16,fp8,0,0.6111946503321329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,4,64,0,1,float16,fp8,0,0.5968639850616455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,4,64,128,1,fp8,fp8,0,0.8247093359629313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,4,64,0,1,fp8,fp8,0,0.5682080189387003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,8,64,128,1,float16,float16,0,0.6091786623001099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,8,64,0,1,float16,float16,0,0.5967733462651571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,8,64,128,1,float16,fp8,0,0.6062080065409342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,8,64,0,1,float16,fp8,0,0.5984853506088257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,8,64,128,1,fp8,fp8,0,0.8225546677907308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,16,64,128,1,float16,float16,0,0.3364266554514567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,8,64,0,1,fp8,fp8,0,0.5783146619796753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,16,64,0,1,float16,float16,0,0.3293493390083313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,16,64,128,1,float16,fp8,0,0.3338453372319539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,16,64,128,1,fp8,fp8,0,0.43449600537618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,16,64,0,1,float16,fp8,0,0.32359999418258667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,16,64,0,1,fp8,fp8,0,0.30296534299850464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,1,64,128,1,float16,float16,0,0.3086186647415161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,1,64,0,1,float16,float16,0,0.30050132671991986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,1,64,128,1,float16,fp8,0,0.30796800057093304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,1,64,128,1,fp8,fp8,0,0.40814932187398273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,1,64,0,1,float16,fp8,0,0.29850133260091144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,1,64,0,1,fp8,fp8,0,0.27453333139419556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,2,64,128,1,float16,float16,0,0.310805340607961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,2,64,0,1,float16,float16,0,0.3022773265838623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,2,64,128,1,float16,fp8,0,0.31164799133936566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,2,64,128,1,fp8,fp8,0,0.4162026643753052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,4,64,0,1,float16,float16,0,0.3091040054957072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,2,64,0,1,float16,fp8,0,0.30248000224431354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,2,64,0,1,fp8,fp8,0,0.2777493397394816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,4,64,128,1,float16,float16,0,0.31566933790842694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,4,64,128,1,float16,fp8,0,0.3172159989674886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,4,64,128,1,fp8,fp8,0,0.4221973419189453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,8,64,0,1,float16,float16,0,0.31017067035039264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,4,64,0,1,float16,fp8,0,0.30949866771698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,4,64,0,1,fp8,fp8,0,0.28329066435496014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,8,64,128,1,float16,float16,0,0.3192853331565857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,8,64,128,1,float16,fp8,0,0.3171146710713704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,8,64,128,1,fp8,fp8,0,0.42846401532491046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,8,64,0,1,float16,fp8,0,0.31095999479293823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,8,64,0,1,fp8,fp8,0,0.29503466685612995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,16,64,128,1,float16,float16,0,0.18421334028244019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,16,64,0,1,float16,float16,0,0.17853333552678427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,16,64,128,1,float16,fp8,0,0.1824373404184977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,16,64,128,1,fp8,fp8,0,0.23616000016530356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,16,64,0,1,float16,fp8,0,0.1743839979171753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,16,64,0,1,fp8,fp8,0,0.15959999958674112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,1,64,128,1,float16,float16,0,0.16866666078567505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,1,64,0,1,float16,float16,0,0.1621226668357849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,1,64,128,1,float16,fp8,0,0.168613334496816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,2,64,128,1,float16,fp8,0,0.17009600003560385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,1,64,128,1,fp8,fp8,0,0.22452267011006674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,1,64,0,1,float16,fp8,0,0.16394133369127908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,1,64,0,1,fp8,fp8,0,0.14739200472831726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,4,64,128,1,float16,float16,0,0.17480534315109253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,2,64,128,1,float16,float16,0,0.16896533966064453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,2,64,0,1,float16,float16,0,0.1653333306312561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,2,64,128,1,fp8,fp8,0,0.22533865769704184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,2,64,0,1,float16,fp8,0,0.16478400429089865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,2,64,0,1,fp8,fp8,0,0.15243200461069742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,4,64,0,1,float16,float16,0,0.16809600591659546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,4,64,128,1,float16,fp8,0,0.1745599905649821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,4,64,128,1,fp8,fp8,0,0.23079466819763184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,4,64,0,1,float16,fp8,0,0.16790932416915894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,4,64,0,1,fp8,fp8,0,0.15311466654141745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,8,64,128,1,float16,float16,0,0.17502933740615845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,8,64,0,1,float16,float16,0,0.1699840029080709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,8,64,128,1,float16,fp8,0,0.17564799388249716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,8,64,128,1,fp8,fp8,0,0.23053866624832153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,8,64,0,1,float16,fp8,0,0.17060800393422446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,8,64,0,1,fp8,fp8,0,0.15407466888427734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,16,64,128,1,float16,float16,0,0.10497066378593445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,16,64,0,1,float16,float16,0,0.0992693305015564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,16,64,128,1,float16,fp8,0,0.10506133238474528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,16,64,128,1,fp8,fp8,0,0.13639466961224875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,16,64,0,1,float16,fp8,0,0.09945066769917806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,16,64,0,1,fp8,fp8,0,0.09174399574597676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,1,64,128,1,float16,float16,0,0.09474133451779683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,1,64,0,1,float16,float16,0,0.08974400162696838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,1,64,128,1,float16,fp8,0,0.09381866455078125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,1,64,128,1,fp8,fp8,0,0.12582932909329733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,1,64,0,1,float16,fp8,0,0.08966400225957234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,1,64,0,1,fp8,fp8,0,0.08317866424719493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,2,64,128,1,float16,float16,0,0.0960053304831187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,2,64,0,1,float16,float16,0,0.09112000465393066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,2,64,128,1,float16,fp8,0,0.09554133812586467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,2,64,128,1,fp8,fp8,0,0.12779733538627625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,2,64,0,1,float16,fp8,0,0.09118933478991191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,2,64,0,1,fp8,fp8,0,0.08462400237719218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,4,64,128,1,float16,float16,0,0.0976106623808543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,4,64,0,1,float16,float16,0,0.09443733096122742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,4,64,128,1,float16,fp8,0,0.09866666793823242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,4,64,128,1,fp8,fp8,0,0.13089600205421448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,4,64,0,1,float16,fp8,0,0.09308800101280212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,4,64,0,1,fp8,fp8,0,0.08721066514650981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,8,64,128,1,float16,float16,0,0.10083733002344768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,8,64,0,1,float16,float16,0,0.09527466694513957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,8,64,128,1,float16,fp8,0,0.09973333279291789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,8,64,128,1,fp8,fp8,0,0.13082133730252585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,8,64,0,1,float16,fp8,0,0.09572800000508626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,8,64,0,1,fp8,fp8,0,0.08630399902661641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,16,64,128,1,float16,float16,0,0.062074666221936546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,16,64,0,1,float16,float16,0,0.05680533250172933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,16,64,128,1,float16,fp8,0,0.0618399977684021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,16,64,128,1,fp8,fp8,0,0.08502399921417236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,16,64,0,1,float16,fp8,0,0.05684266487757365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,16,64,0,1,fp8,fp8,0,0.053904001911481224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,1,64,128,1,float16,float16,0,0.056885331869125366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,1,64,0,1,float16,float16,0,0.05267733335494995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,1,64,128,1,float16,fp8,0,0.056943997740745544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,1,64,128,1,fp8,fp8,0,0.07425066828727722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,1,64,0,1,float16,fp8,0,0.05225066840648651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,1,64,0,1,fp8,fp8,0,0.04823466638724009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,2,64,128,1,float16,float16,0,0.057477335135142006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,2,64,0,1,float16,float16,0,0.05339199801286062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,2,64,128,1,float16,fp8,0,0.05733866492907206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,2,64,128,1,fp8,fp8,0,0.07554666697978973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,2,64,0,1,float16,fp8,0,0.05269333223501841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,2,64,0,1,fp8,fp8,0,0.04924266537030538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,4,64,128,1,float16,float16,0,0.05844266712665558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,4,64,0,1,float16,float16,0,0.05393599967161814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,4,64,128,1,float16,fp8,0,0.0583840012550354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,4,64,128,1,fp8,fp8,0,0.0753119985262553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,4,64,0,1,float16,fp8,0,0.054485330979029335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,4,64,0,1,fp8,fp8,0,0.05029866596062978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,8,64,128,1,float16,float16,0,0.059674665331840515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,8,64,0,1,float16,float16,0,0.054976001381874084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,8,64,128,1,float16,fp8,0,0.06001066664854685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,8,64,128,1,fp8,fp8,0,0.07788266738255818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,8,64,0,1,float16,fp8,0,0.05542933444182078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,8,64,0,1,fp8,fp8,0,0.051967998345692955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,16,64,128,1,float16,float16,0,0.0422986646493276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,16,64,0,1,float16,float16,0,0.036144000788529716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,16,64,128,1,float16,fp8,0,0.04203199843565623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,16,64,128,1,fp8,fp8,0,0.05162666738033295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,16,64,0,1,float16,fp8,0,0.03581333408753077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,16,64,0,1,fp8,fp8,0,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,1,64,128,1,float16,float16,0,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,1,64,0,1,float16,float16,0,0.03378133227427801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,1,64,128,1,float16,fp8,0,0.03978666663169861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,1,64,128,1,fp8,fp8,0,0.05005866785844167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,1,64,0,1,float16,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,1,64,0,1,fp8,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,2,64,128,1,float16,float16,0,0.0408746674656868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,2,64,0,1,float16,float16,0,0.034074666599432625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,2,64,128,1,float16,fp8,0,0.040778666734695435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,2,64,128,1,fp8,fp8,0,0.05107733110586802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,2,64,0,1,float16,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,2,64,0,1,fp8,fp8,0,0.031925333042939506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,4,64,128,1,float16,float16,0,0.04074666649103165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,4,64,0,1,float16,float16,0,0.035018667578697205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,4,64,128,1,float16,fp8,0,0.04094400008519491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,4,64,128,1,fp8,fp8,0,0.052298665046691895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,4,64,0,1,float16,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,4,64,0,1,fp8,fp8,0,0.032618666688601174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,8,64,128,1,float16,float16,0,0.04165866722663244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,8,64,0,1,float16,float16,0,0.035445332527160645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,8,64,128,1,float16,fp8,0,0.04218666752179464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,8,64,128,1,fp8,fp8,0,0.052245333790779114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,8,64,0,1,float16,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,8,64,0,1,fp8,fp8,0,0.033071999748547874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,16,64,128,1,float16,float16,0,0.02867199977238973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,16,64,0,1,float16,float16,0,0.02590399980545044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,16,64,128,1,float16,fp8,0,0.02805333336194356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,16,64,128,1,fp8,fp8,0,0.0345920001467069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,16,64,0,1,float16,fp8,0,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,16,64,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,1,64,128,1,float16,float16,0,0.02700799951950709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,1,64,0,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,1,64,128,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,1,64,128,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,1,64,0,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,1,64,0,1,fp8,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,2,64,128,1,float16,float16,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,2,64,0,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,2,64,128,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,2,64,128,1,fp8,fp8,0,0.034287999073664345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,2,64,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,2,64,0,1,fp8,fp8,0,0.024170666933059692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,4,64,128,1,float16,float16,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,4,64,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,4,64,128,1,float16,fp8,0,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,4,64,128,1,fp8,fp8,0,0.03496533383925756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,4,64,0,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,4,64,0,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,8,64,128,1,float16,float16,0,0.02812800059715907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,8,64,0,1,float16,float16,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,8,64,128,1,float16,fp8,0,0.027690666417280834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,8,64,128,1,fp8,fp8,0,0.0345920001467069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,8,64,0,1,float16,fp8,0,0.026330667237440746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,8,64,0,1,fp8,fp8,0,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,16,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,16,64,0,1,float16,float16,0,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,16,64,128,1,float16,fp8,0,0.02588266630967458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,16,64,128,1,fp8,fp8,0,0.032101333141326904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,16,64,0,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,16,64,0,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,1,64,128,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,1,64,0,1,float16,float16,0,0.026767998933792114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,1,64,128,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,1,64,128,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,1,64,0,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,1,64,0,1,fp8,fp8,0,0.02223466585079829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,2,64,128,1,float16,float16,0,0.024527999262015026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,2,64,0,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,2,64,128,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,2,64,128,1,fp8,fp8,0,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,2,64,0,1,float16,fp8,0,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,2,64,0,1,fp8,fp8,0,0.022602667411168415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,4,64,128,1,float16,float16,0,0.02554133286078771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,4,64,0,1,float16,float16,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,4,64,128,1,float16,fp8,0,0.025648000339667004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,4,64,128,1,fp8,fp8,0,0.03178666780392329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,4,64,0,1,float16,fp8,0,0.02363733450571696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,4,64,0,1,fp8,fp8,0,0.022533332308133442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,8,64,128,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,8,64,0,1,fp8,fp8,0,0.022698665658632915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,8,64,0,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,8,64,128,1,float16,fp8,0,0.026005332668622334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,8,64,128,1,fp8,fp8,0,0.0320266659061114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,8,64,0,1,float16,fp8,0,0.023919999599456787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,1,64,128,1,float16,float16,0,0.5074986616770426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,1,64,0,1,float16,float16,0,0.4960000117619832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,1,64,128,1,float16,fp8,0,0.5059680143992106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,1,64,128,1,fp8,fp8,0,0.6801599661509196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,1,64,0,1,float16,fp8,0,0.49221332867940265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,1,64,0,1,fp8,fp8,0,0.4579840103785197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,2,64,128,1,float16,float16,0,0.5101173321406046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,2,64,0,1,float16,float16,0,0.5006239811579386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,2,64,128,1,float16,fp8,0,0.5123946666717529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,4,64,128,1,float16,float16,0,0.5270346800486246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,2,64,128,1,fp8,fp8,0,0.6913812955220541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,2,64,0,1,float16,fp8,0,0.49766401449839276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,2,64,0,1,fp8,fp8,0,0.46613868077596027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,4,64,0,1,float16,float16,0,0.5123733282089233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,4,64,128,1,float16,fp8,0,0.5262506802876791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,4,64,0,1,float16,fp8,0,0.5147200028101603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,4,64,128,1,fp8,fp8,0,0.7074560324350992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,4,64,0,1,fp8,fp8,0,0.4875413179397583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,8,64,128,1,float16,float16,0,0.532042662302653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,8,64,0,1,float16,float16,0,0.5207093159357706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,8,64,128,1,float16,fp8,0,0.5276960134506226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,8,64,0,1,float16,fp8,0,0.5175893306732178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,16,64,0,1,float16,float16,0,0.2935946583747864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,8,64,128,1,fp8,fp8,0,0.7273333072662354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,16,64,128,1,float16,float16,0,0.2964906692504883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,8,64,0,1,fp8,fp8,0,0.5014506578445435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,16,64,128,1,float16,fp8,0,0.2914399902025859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,16,64,128,1,fp8,fp8,0,0.371237317721049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,16,64,0,1,float16,fp8,0,0.2890773415565491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,1,64,128,1,float16,fp8,0,0.26335465908050537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,1,64,0,1,float16,fp8,0,0.2571893334388733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,16,64,0,1,fp8,fp8,0,0.25870933135350543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,1,64,128,1,float16,float16,0,0.26412800947825116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,1,64,0,1,float16,float16,0,0.25781333446502686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,1,64,128,1,fp8,fp8,0,0.3492693503697713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,1,64,0,1,fp8,fp8,0,0.23779199520746866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,2,64,128,1,float16,float16,0,0.2669546604156494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,2,64,0,1,float16,float16,0,0.2603253324826558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,2,64,128,1,float16,fp8,0,0.2663466731707255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,2,64,128,1,fp8,fp8,0,0.3545440038045247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,2,64,0,1,float16,fp8,0,0.26106133063634235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,2,64,0,1,fp8,fp8,0,0.24420799811681113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,4,64,128,1,float16,float16,0,0.2738666733105977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,4,64,0,1,float16,float16,0,0.2675466736157735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,4,64,128,1,float16,fp8,0,0.27399466435114544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,4,64,128,1,fp8,fp8,0,0.3601866563161214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,4,64,0,1,float16,fp8,0,0.26892799139022827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,4,64,0,1,fp8,fp8,0,0.24457067251205444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,8,64,128,1,float16,float16,0,0.2790186603864034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,8,64,0,1,float16,float16,0,0.2727893392244975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,8,64,128,1,float16,fp8,0,0.2771413326263428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,8,64,128,1,fp8,fp8,0,0.3692266543706258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,8,64,0,1,float16,fp8,0,0.2716853419939677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,16,64,128,1,float16,float16,0,0.16245866815249124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,8,64,0,1,fp8,fp8,0,0.2571199933687846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,16,64,0,1,float16,float16,0,0.16126933693885803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,16,64,128,1,float16,fp8,0,0.16207999984423319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,16,64,128,1,fp8,fp8,0,0.1944213310877482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,16,64,0,1,float16,fp8,0,0.16011200348536173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,16,64,0,1,fp8,fp8,0,0.13925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,1,64,128,1,float16,float16,0,0.1411626636981964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,1,64,0,1,float16,float16,0,0.13795733451843262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,1,64,128,1,float16,fp8,0,0.1415786643822988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,1,64,128,1,fp8,fp8,0,0.18645334243774414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,1,64,0,1,float16,fp8,0,0.13821333646774292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,1,64,0,1,fp8,fp8,0,0.13166399796803793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,2,64,128,1,float16,float16,0,0.1434453328450521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,2,64,0,1,float16,float16,0,0.14056000113487244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,2,64,128,1,float16,fp8,0,0.14317333698272705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,2,64,128,1,fp8,fp8,0,0.18868800004323324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,2,64,0,1,float16,fp8,0,0.14058132966359457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,2,64,0,1,fp8,fp8,0,0.1344106694062551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,4,64,128,1,float16,float16,0,0.14946666359901428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,4,64,0,1,float16,float16,0,0.14639467000961304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,4,64,128,1,float16,fp8,0,0.14882133404413858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,4,64,128,1,fp8,fp8,0,0.1904053290685018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,4,64,0,1,float16,fp8,0,0.14615999658902487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,4,64,0,1,fp8,fp8,0,0.13709333539009094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,8,64,0,1,float16,fp8,0,0.14869333306948343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,8,64,128,1,float16,float16,0,0.15331733226776123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,8,64,0,1,float16,float16,0,0.14946132898330688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,8,64,128,1,float16,fp8,0,0.1516800026098887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,8,64,128,1,fp8,fp8,0,0.19056000312169394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,8,64,0,1,fp8,fp8,0,0.13698133826255798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,16,64,128,1,float16,float16,0,0.09199466307957967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,16,64,0,1,float16,float16,0,0.09147199988365173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,16,64,128,1,float16,fp8,0,0.09018133083979289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,16,64,128,1,fp8,fp8,0,0.10811733206113179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,16,64,0,1,float16,fp8,0,0.08945600191752116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,16,64,0,1,fp8,fp8,0,0.08190933366616567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,1,64,128,1,float16,float16,0,0.07588266829649608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,1,64,0,1,float16,float16,0,0.07419200241565704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,1,64,128,1,float16,fp8,0,0.07495466868082683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,1,64,128,1,fp8,fp8,0,0.10033599535624187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,1,64,0,1,float16,fp8,0,0.07346666852633159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,1,64,0,1,fp8,fp8,0,0.0739573339621226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,2,64,128,1,float16,float16,0,0.07691733539104462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,2,64,0,1,float16,float16,0,0.07572799921035767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,2,64,128,1,float16,fp8,0,0.07684800028800964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,2,64,128,1,fp8,fp8,0,0.10231467088063557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,2,64,0,1,float16,fp8,0,0.07517866790294647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,2,64,0,1,fp8,fp8,0,0.07480533421039581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,4,64,128,1,float16,float16,0,0.07989866534868877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,4,64,0,1,float16,float16,0,0.07745600243409474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,4,64,128,1,float16,fp8,0,0.0791786660750707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,4,64,128,1,fp8,fp8,0,0.10471466183662415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,4,64,0,1,float16,fp8,0,0.0772266685962677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,4,64,0,1,fp8,fp8,0,0.07796266674995422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,8,64,128,1,float16,float16,0,0.08157866696516673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,8,64,0,1,float16,float16,0,0.08081066608428955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,8,64,128,1,float16,fp8,0,0.08124800026416779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,8,64,128,1,fp8,fp8,0,0.10505599776903789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,16,64,128,1,float16,fp8,0,0.04721599817276001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,8,64,0,1,float16,fp8,0,0.08020799855391185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,8,64,0,1,fp8,fp8,0,0.07866666714350383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,16,64,128,1,float16,float16,0,0.04804266492525736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,16,64,0,1,float16,float16,0,0.04678933322429657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,16,64,128,1,fp8,fp8,0,0.0639626681804657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,16,64,0,1,float16,fp8,0,0.04696000119050344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,16,64,0,1,fp8,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,1,64,128,1,float16,float16,0,0.042837331692377724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,1,64,0,1,float16,float16,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,1,64,128,1,float16,fp8,0,0.04264000058174133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,1,64,128,1,fp8,fp8,0,0.056885331869125366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,1,64,0,1,float16,fp8,0,0.0421973317861557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,1,64,0,1,fp8,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,2,64,128,1,float16,float16,0,0.04370133578777313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,2,64,0,1,float16,float16,0,0.04268266757329305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,2,64,128,1,float16,fp8,0,0.04322666426499685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,2,64,128,1,fp8,fp8,0,0.057392001152038574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,2,64,0,1,float16,fp8,0,0.042591998974482216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,2,64,0,1,fp8,fp8,0,0.044549331068992615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,4,64,128,1,float16,float16,0,0.04450133442878723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,4,64,0,1,float16,float16,0,0.04364266494909922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,4,64,128,1,float16,fp8,0,0.04462933540344238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,8,64,128,1,float16,fp8,0,0.04510400195916494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,4,64,128,1,fp8,fp8,0,0.058490668733914696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,4,64,0,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,4,64,0,1,fp8,fp8,0,0.04481600224971771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,8,64,128,1,float16,float16,0,0.04549333453178406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,8,64,0,1,float16,float16,0,0.0447626660267512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,8,64,128,1,fp8,fp8,0,0.05958933134873708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,8,64,0,1,float16,fp8,0,0.04483733574549357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,8,64,0,1,fp8,fp8,0,0.04574933151404063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,16,64,128,1,float16,float16,0,0.03389333436886469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,1,64,128,1,float16,float16,0,0.03081600119670232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,16,64,0,1,float16,float16,0,0.033717334270477295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,16,64,128,1,float16,fp8,0,0.034101332227389015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,16,64,128,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,16,64,0,1,float16,fp8,0,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,16,64,0,1,fp8,fp8,0,0.03052799900372823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,1,64,0,1,float16,float16,0,0.030410667260487873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,1,64,128,1,float16,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,1,64,128,1,fp8,fp8,0,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,1,64,0,1,float16,fp8,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,1,64,0,1,fp8,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,2,64,128,1,float16,float16,0,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,2,64,0,1,float16,float16,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,2,64,128,1,float16,fp8,0,0.03170666595300039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,2,64,128,1,fp8,fp8,0,0.03621866554021835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,2,64,0,1,float16,fp8,0,0.031717332700888314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,2,64,0,1,fp8,fp8,0,0.029674666623274486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,4,64,128,1,float16,float16,0,0.03266666581233343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,4,64,0,1,float16,float16,0,0.03209600100914637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,4,64,128,1,float16,fp8,0,0.0324799989660581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,4,64,128,1,fp8,fp8,0,0.03718933214743932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,4,64,0,1,float16,fp8,0,0.032085334261258446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,4,64,0,1,fp8,fp8,0,0.030282666285832722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,8,64,128,1,float16,float16,0,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,8,64,0,1,float16,float16,0,0.03288000077009201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,8,64,128,1,float16,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,8,64,128,1,fp8,fp8,0,0.03690666705369949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,8,64,0,1,float16,fp8,0,0.03278933217128118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,8,64,0,1,fp8,fp8,0,0.030586667358875275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,16,64,0,1,fp8,fp8,0,0.022197333474953968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,16,64,128,1,float16,float16,0,0.021925332645575207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,16,64,0,1,float16,float16,0,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,16,64,128,1,float16,fp8,0,0.021856000026067097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,16,64,128,1,fp8,fp8,0,0.02651199946800868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,16,64,0,1,float16,fp8,0,0.021829334398110706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,1,64,128,1,float16,float16,0,0.020762667059898376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,1,64,0,1,float16,float16,0,0.020138667275508244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,1,64,128,1,float16,fp8,0,0.020736000190178554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,1,64,128,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,2,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,1,64,0,1,float16,fp8,0,0.02077866718173027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,1,64,0,1,fp8,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,2,64,128,1,float16,float16,0,0.02060266708334287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,2,64,0,1,float16,float16,0,0.020309332758188248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,2,64,128,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,2,64,0,1,float16,fp8,0,0.02072000006834666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,2,64,0,1,fp8,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,4,64,128,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,4,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,4,64,128,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,4,64,128,1,fp8,fp8,0,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,4,64,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,4,64,0,1,fp8,fp8,0,0.02204799900452296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,8,64,128,1,float16,float16,0,0.021530665457248688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,8,64,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,8,64,128,1,float16,fp8,0,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,8,64,128,1,fp8,fp8,0,0.026533332963784535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,8,64,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,8,64,0,1,fp8,fp8,0,0.02250133454799652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,16,64,128,1,float16,float16,0,0.017871999492247898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,16,64,0,1,float16,float16,0,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,16,64,128,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,1,64,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,16,64,128,1,fp8,fp8,0,0.024346667031447094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,16,64,0,1,float16,fp8,0,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,16,64,0,1,fp8,fp8,0,0.02024000013868014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,1,64,128,1,float16,float16,0,0.017743999759356182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,1,64,128,1,float16,fp8,0,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,1,64,128,1,fp8,fp8,0,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,1,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,1,64,0,1,fp8,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,2,64,128,1,float16,float16,0,0.01781333362062772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,2,64,0,1,float16,float16,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,2,64,128,1,float16,fp8,0,0.017765333255132038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,2,64,128,1,fp8,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,2,64,0,1,float16,fp8,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,2,64,0,1,fp8,fp8,0,0.020234666764736176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,4,64,128,1,float16,float16,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,4,64,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,4,64,128,1,float16,fp8,0,0.01846933364868164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,4,64,128,1,fp8,fp8,0,0.024400000770886738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,4,64,0,1,float16,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,4,64,0,1,fp8,fp8,0,0.02021866664290428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,8,64,128,1,float16,float16,0,0.0176959993938605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,8,64,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,8,64,128,1,float16,fp8,0,0.017957333475351334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,8,64,128,1,fp8,fp8,0,0.024122667809327442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,8,64,0,1,float16,fp8,0,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,8,64,0,1,fp8,fp8,0,0.022570667167504627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,16,64,128,1,float16,float16,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,1,64,128,1,float16,float16,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,16,64,0,1,float16,float16,0,0.016271999726692837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,16,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,16,64,128,1,fp8,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,16,64,0,1,float16,fp8,0,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,16,64,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,1,64,0,1,float16,float16,0,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,1,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,1,64,128,1,fp8,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,1,64,0,1,float16,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,1,64,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,2,64,128,1,float16,float16,0,0.016650666793187458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,2,64,0,1,float16,float16,0,0.016202667107184727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,2,64,128,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,2,64,128,1,fp8,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,2,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,2,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,4,64,128,1,float16,float16,0,0.016271999726692837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,4,64,0,1,float16,float16,0,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,4,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,4,64,128,1,fp8,fp8,0,0.022677332162857056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,4,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,4,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,8,64,128,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,8,64,0,1,float16,float16,0,0.016421332955360413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,8,64,128,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,8,64,128,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,1,64,0,1,float16,float16,0,0.19207467635472616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,8,64,0,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,8,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,1,64,128,1,float16,float16,0,0.1914506753285726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,1,64,128,1,float16,fp8,0,0.19121599197387695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,1,64,128,1,fp8,fp8,0,0.23813867568969727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,1,64,0,1,float16,fp8,0,0.1909280021985372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,1,64,0,1,fp8,fp8,0,0.2376533349355062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,2,64,128,1,float16,float16,0,0.1946559945742289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,2,64,0,1,float16,float16,0,0.19477866093317667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,2,64,128,1,float16,fp8,0,0.19285333156585693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,2,64,128,1,fp8,fp8,0,0.23639466365178427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,2,64,0,1,float16,fp8,0,0.19435199101765951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,2,64,0,1,fp8,fp8,0,0.23914666970570883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,4,64,128,1,float16,float16,0,0.20334400733311972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,4,64,0,1,float16,float16,0,0.2044480045636495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,4,64,128,1,float16,fp8,0,0.20228266716003418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,4,64,128,1,fp8,fp8,0,0.2501759926478068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,4,64,0,1,float16,fp8,0,0.20080000162124634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,4,64,0,1,fp8,fp8,0,0.24912534157435098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,8,64,128,1,float16,float16,0,0.2060693303743998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,8,64,0,1,float16,float16,0,0.20699199040730795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,8,64,128,1,float16,fp8,0,0.2044586737950643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,8,64,128,1,fp8,fp8,0,0.2569013237953186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,8,64,0,1,float16,fp8,0,0.20492267608642578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,16,64,128,1,float16,float16,0,0.12541866302490234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,8,64,0,1,fp8,fp8,0,0.25496532519658405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,16,64,0,1,float16,float16,0,0.12558933099110922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,16,64,128,1,float16,fp8,0,0.12210133671760559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,16,64,128,1,fp8,fp8,0,0.13974400361378989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,16,64,0,1,float16,fp8,0,0.12315199772516887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,16,64,0,1,fp8,fp8,0,0.1399893363316854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,1,64,128,1,float16,float16,0,0.10503466924031575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,1,64,0,1,float16,float16,0,0.10427199800809224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,1,64,128,1,float16,fp8,0,0.10501333077748616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,1,64,128,1,fp8,fp8,0,0.13114666938781738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,1,64,0,1,float16,fp8,0,0.10408533612887065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,1,64,0,1,fp8,fp8,0,0.13130666812260947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,2,64,128,1,float16,float16,0,0.10713600118954976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,2,64,0,1,float16,float16,0,0.10712533195813496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,2,64,128,1,float16,fp8,0,0.10646933317184448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,2,64,128,1,fp8,fp8,0,0.1307093302408854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,2,64,0,1,float16,fp8,0,0.1066986620426178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,2,64,0,1,fp8,fp8,0,0.13116799791653952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,4,64,128,1,float16,float16,0,0.11267200112342834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,4,64,0,1,float16,float16,0,0.11230400204658508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,4,64,128,1,float16,fp8,0,0.11066133777300517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,4,64,128,1,fp8,fp8,0,0.13618666927019754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,4,64,0,1,float16,fp8,0,0.11199466387430827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,4,64,0,1,fp8,fp8,0,0.1359946628411611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,8,64,128,1,float16,float16,0,0.11516799529393514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,8,64,0,1,float16,float16,0,0.11476266384124756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,8,64,128,1,float16,fp8,0,0.11514666676521301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,8,64,128,1,fp8,fp8,0,0.13897599776585898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,8,64,0,1,float16,fp8,0,0.11377066373825073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,8,64,0,1,fp8,fp8,0,0.13797866304715475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,16,64,128,1,float16,float16,0,0.07246399919191997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,16,64,0,1,float16,float16,0,0.0736053337653478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,16,64,128,1,float16,fp8,0,0.0718399981657664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,16,64,128,1,fp8,fp8,0,0.08084799846013387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,16,64,0,1,fp8,fp8,0,0.07992533346017201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,16,64,0,1,float16,fp8,0,0.07186133166154225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,1,64,128,1,float16,float16,0,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,1,64,0,1,float16,float16,0,0.056501333912213646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,1,64,128,1,float16,fp8,0,0.05706666906674703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,1,64,128,1,fp8,fp8,0,0.07314133147398631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,1,64,0,1,float16,fp8,0,0.056890666484832764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,1,64,0,1,fp8,fp8,0,0.07378666599591573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,2,64,128,1,float16,float16,0,0.057392001152038574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,2,64,0,1,float16,float16,0,0.058378666639328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,2,64,128,1,float16,fp8,0,0.05783999959627787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,2,64,128,1,fp8,fp8,0,0.07439466814200084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,2,64,0,1,float16,fp8,0,0.05820266902446747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,2,64,0,1,fp8,fp8,0,0.07420266668001811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,4,64,128,1,float16,float16,0,0.06090133388837179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,4,64,0,1,float16,float16,0,0.06109866499900818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,4,64,128,1,float16,fp8,0,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,4,64,128,1,fp8,fp8,0,0.07726400097211202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,4,64,0,1,float16,fp8,0,0.060677334666252136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,4,64,0,1,fp8,fp8,0,0.07665066421031952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,8,64,128,1,float16,float16,0,0.06473599870999654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,8,64,0,1,float16,float16,0,0.06462400158246358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,8,64,128,1,float16,fp8,0,0.06386666496594746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,8,64,128,1,fp8,fp8,0,0.0783733328183492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,8,64,0,1,float16,fp8,0,0.06305600206057231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,8,64,0,1,fp8,fp8,0,0.07841599980990092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,16,64,128,1,float16,float16,0,0.03798400113979975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,16,64,0,1,float16,float16,0,0.03806933263937632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,16,64,128,1,float16,fp8,0,0.03724266588687897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,16,64,128,1,fp8,fp8,0,0.04887466629346212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,16,64,0,1,float16,fp8,0,0.03847466657559077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,16,64,0,1,fp8,fp8,0,0.048938666780789696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,1,64,128,1,float16,float16,0,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,1,64,0,1,float16,float16,0,0.03368533402681351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,1,64,128,1,float16,fp8,0,0.03390933324893316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,1,64,128,1,fp8,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,1,64,0,1,float16,fp8,0,0.03403199960788091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,1,64,0,1,fp8,fp8,0,0.04427200059096018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,2,64,128,1,float16,float16,0,0.03482666611671448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,2,64,0,1,float16,float16,0,0.034261333445707955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,2,64,128,1,float16,fp8,0,0.0341333324710528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,2,64,128,1,fp8,fp8,0,0.04534933467706045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,2,64,0,1,float16,fp8,0,0.03452266752719879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,2,64,0,1,fp8,fp8,0,0.044591998060544334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,4,64,128,1,float16,float16,0,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,4,64,0,1,float16,float16,0,0.035818666219711304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,4,64,128,1,float16,fp8,0,0.03603733330965042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,4,64,128,1,fp8,fp8,0,0.04636266827583313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,4,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,4,64,0,1,fp8,fp8,0,0.045754666129748024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,8,64,128,1,float16,float16,0,0.036533333361148834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,8,64,0,1,float16,float16,0,0.036490666369597115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,8,64,128,1,float16,fp8,0,0.0365226666132609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,8,64,128,1,fp8,fp8,0,0.04730133215586344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,8,64,0,1,float16,fp8,0,0.03690666705369949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,8,64,0,1,fp8,fp8,0,0.0469813346862793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,16,64,128,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,16,64,0,1,float16,float16,0,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,16,64,128,1,float16,fp8,0,0.0249493345618248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,16,64,128,1,fp8,fp8,0,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,16,64,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,1,64,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,16,64,0,1,fp8,fp8,0,0.03014400104681651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,1,64,128,1,float16,float16,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,1,64,0,1,float16,float16,0,0.024090667565663654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,1,64,128,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,1,64,128,1,fp8,fp8,0,0.02974933385848999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,1,64,0,1,fp8,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,2,64,128,1,float16,float16,0,0.023621333142121632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,2,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,2,64,128,1,float16,fp8,0,0.023978665471076965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,2,64,128,1,fp8,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,2,64,0,1,float16,fp8,0,0.024293333292007446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,2,64,0,1,fp8,fp8,0,0.029120000700155895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,4,64,128,1,float16,float16,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,4,64,0,1,float16,float16,0,0.02497066557407379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,4,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,4,64,128,1,fp8,fp8,0,0.0301706666747729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,4,64,0,1,float16,fp8,0,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,4,64,0,1,fp8,fp8,0,0.030282666285832722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,8,64,0,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,8,64,128,1,float16,float16,0,0.024469333390394848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,8,64,0,1,float16,float16,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,8,64,128,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,8,64,128,1,fp8,fp8,0,0.0303413321574529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,8,64,0,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,16,64,128,1,float16,float16,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,16,64,0,1,float16,float16,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,16,64,128,1,float16,fp8,0,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,16,64,128,1,fp8,fp8,0,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,16,64,0,1,float16,fp8,0,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,16,64,0,1,fp8,fp8,0,0.02207999924818675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,1,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,1,64,0,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,1,64,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,1,64,128,1,fp8,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,1,64,0,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,1,64,0,1,fp8,fp8,0,0.02067733307679494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,2,64,128,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,2,64,0,1,float16,float16,0,0.017594666530688603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,2,64,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,2,64,128,1,fp8,fp8,0,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,2,64,0,1,float16,fp8,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,2,64,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,4,64,128,1,float16,float16,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,4,64,0,1,float16,float16,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,4,64,128,1,float16,fp8,0,0.018218666315078735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,4,64,128,1,fp8,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,4,64,0,1,float16,fp8,0,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,4,64,0,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,8,64,128,1,float16,float16,0,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,8,64,0,1,float16,float16,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,8,64,128,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,8,64,128,1,fp8,fp8,0,0.021781332790851593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,8,64,0,1,float16,fp8,0,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,8,64,0,1,fp8,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,16,64,128,1,float16,float16,0,0.014298666268587112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,16,64,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,16,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,16,64,128,1,fp8,fp8,0,0.01982933282852173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,16,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,16,64,0,1,fp8,fp8,0,0.019839999576409657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,1,64,128,1,float16,float16,0,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,1,64,0,1,float16,float16,0,0.014416000495354334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,1,64,128,1,float16,fp8,0,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,1,64,128,1,fp8,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,1,64,0,1,float16,fp8,0,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,1,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,2,64,128,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,2,64,0,1,float16,float16,0,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,2,64,128,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,2,64,128,1,fp8,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,2,64,0,1,float16,fp8,0,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,2,64,0,1,fp8,fp8,0,0.01993600030740102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,4,64,128,1,float16,float16,0,0.014463999619086584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,4,64,0,1,float16,float16,0,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,4,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,4,64,128,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,4,64,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,4,64,0,1,fp8,fp8,0,0.019760000209013622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,8,64,128,1,float16,float16,0,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,8,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,8,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,8,64,128,1,fp8,fp8,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,8,64,0,1,float16,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,8,64,0,1,fp8,fp8,0,0.01988799994190534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,16,64,128,1,float16,float16,0,0.01423466702302297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,16,64,0,1,float16,float16,0,0.01413333291808764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,16,64,128,1,float16,fp8,0,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,16,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,16,64,0,1,float16,fp8,0,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,16,64,0,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,1,64,128,1,float16,float16,0,0.014021333307027817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,1,64,0,1,float16,float16,0,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,1,64,128,1,float16,fp8,0,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,1,64,128,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,1,64,0,1,float16,fp8,0,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,1,64,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,2,64,128,1,float16,float16,0,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,2,64,0,1,float16,float16,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,2,64,128,1,float16,fp8,0,0.014495999862750372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,2,64,128,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,2,64,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,2,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,4,64,128,1,float16,float16,0,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,4,64,0,1,float16,float16,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,4,64,128,1,float16,fp8,0,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,4,64,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,4,64,0,1,float16,fp8,0,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,4,64,0,1,fp8,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,8,64,128,1,float16,float16,0,0.01434133326013883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,8,64,0,1,float16,float16,0,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,8,64,128,1,float16,fp8,0,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,8,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,8,64,0,1,float16,fp8,0,0.014463999619086584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,8,64,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,16,64,128,1,float16,float16,0,0.013424000392357508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,16,64,0,1,float16,float16,0,0.013306666165590286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,16,64,128,1,float16,fp8,0,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,16,64,128,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,16,64,0,1,float16,fp8,0,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,16,64,0,1,fp8,fp8,0,0.01775466650724411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,1,64,128,1,float16,float16,0,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,1,64,0,1,float16,float16,0,0.013605333864688873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,1,64,128,1,float16,fp8,0,0.013584000368913015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,1,64,128,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,1,64,0,1,float16,fp8,0,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,1,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,2,64,128,1,float16,float16,0,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,2,64,0,1,float16,float16,0,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,2,64,128,1,float16,fp8,0,0.014058666924635569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,2,64,128,1,fp8,fp8,0,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,2,64,0,1,float16,fp8,0,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,2,64,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,4,64,128,1,float16,float16,0,0.013786666095256805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,4,64,0,1,float16,float16,0,0.01392000044385592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,4,64,128,1,float16,fp8,0,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,4,64,128,1,fp8,fp8,0,0.01841066653529803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,4,64,0,1,float16,fp8,0,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,4,64,0,1,fp8,fp8,0,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,8,64,128,1,float16,float16,0,0.013440000514189402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,8,64,0,1,float16,float16,0,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,8,64,128,1,float16,fp8,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,8,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,8,64,0,1,float16,fp8,0,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,8,64,0,1,fp8,fp8,0,0.018543999642133713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,1,64,128,1,float16,float16,0,0.09676800171534221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,1,64,0,1,float16,float16,0,0.09612266222635905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,1,64,128,1,float16,fp8,0,0.09575999776522319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,1,64,128,1,fp8,fp8,0,0.172106663386027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,1,64,0,1,float16,fp8,0,0.09672000010808308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,1,64,0,1,fp8,fp8,0,0.17287466923395792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,2,64,128,1,float16,float16,0,0.09854400157928467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,2,64,0,1,float16,float16,0,0.0990826686223348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,2,64,128,1,float16,fp8,0,0.09805867075920105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,2,64,128,1,fp8,fp8,0,0.16949333747227988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,2,64,0,1,float16,fp8,0,0.0974826713403066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,2,64,0,1,fp8,fp8,0,0.17123732964197794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,4,64,128,1,float16,float16,0,0.10327999790509541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,4,64,0,1,float16,float16,0,0.10229866703351338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,4,64,128,1,float16,fp8,0,0.10247466961542766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,4,64,128,1,fp8,fp8,0,0.17710934082667032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,4,64,0,1,float16,fp8,0,0.10289067029953003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,4,64,0,1,fp8,fp8,0,0.17628800868988037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,8,64,128,1,float16,float16,0,0.1055626670519511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,8,64,0,1,float16,float16,0,0.10500799616177876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,8,64,128,1,float16,fp8,0,0.10198400417963664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,8,64,128,1,fp8,fp8,0,0.18012267351150513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,8,64,0,1,float16,fp8,0,0.10231467088063557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,8,64,0,1,fp8,fp8,0,0.17946134010950723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,16,64,128,1,float16,float16,0,0.06797333558400472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,16,64,0,1,float16,float16,0,0.06795200208822887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,16,64,128,1,float16,fp8,0,0.0653599997361501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,16,64,128,1,fp8,fp8,0,0.10248000423113506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,16,64,0,1,float16,fp8,0,0.06551466882228851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,16,64,0,1,fp8,fp8,0,0.10345600048700969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,1,64,128,1,float16,float16,0,0.05267733335494995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,1,64,0,1,float16,float16,0,0.05317866802215576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,1,64,128,1,float16,fp8,0,0.053632001082102455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,1,64,128,1,fp8,fp8,0,0.09336533149083455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,1,64,0,1,float16,fp8,0,0.05295466880003611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,1,64,0,1,fp8,fp8,0,0.09475732843081157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,2,64,128,1,float16,float16,0,0.05429866909980774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,2,64,0,1,float16,float16,0,0.05435733497142792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,2,64,128,1,float16,fp8,0,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,2,64,128,1,fp8,fp8,0,0.09586133559544881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,2,64,0,1,float16,fp8,0,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,2,64,0,1,fp8,fp8,0,0.09437333544095357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,4,64,128,1,float16,float16,0,0.057461331288019814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,4,64,0,1,float16,float16,0,0.05737066765626272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,4,64,128,1,float16,fp8,0,0.056703999638557434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,4,64,128,1,fp8,fp8,0,0.09910933176676433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,4,64,0,1,float16,fp8,0,0.05716800192991892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,4,64,0,1,fp8,fp8,0,0.09827199578285217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,8,64,128,1,float16,float16,0,0.059978668888409935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,8,64,0,1,float16,float16,0,0.06050133208433787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,8,64,128,1,float16,fp8,0,0.059215997656186424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,8,64,128,1,fp8,fp8,0,0.10097600022951762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,8,64,0,1,float16,fp8,0,0.05904000004132589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,8,64,0,1,fp8,fp8,0,0.10062932968139648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,16,64,0,1,float16,fp8,0,0.0359199990828832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,16,64,128,1,float16,float16,0,0.037104000647862755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,16,64,0,1,float16,float16,0,0.037018666664759316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,16,64,128,1,float16,fp8,0,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,16,64,128,1,fp8,fp8,0,0.059674665331840515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,16,64,0,1,fp8,fp8,0,0.06137600044409434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,1,64,128,1,float16,float16,0,0.032485333581765495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,1,64,0,1,float16,float16,0,0.032586666444937386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,1,64,128,1,float16,fp8,0,0.032170665760835014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,1,64,128,1,fp8,fp8,0,0.05529599885145823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,1,64,0,1,float16,fp8,0,0.032730666299661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,1,64,0,1,fp8,fp8,0,0.055013333757718406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,2,64,128,1,float16,float16,0,0.03287466615438461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,2,64,0,1,float16,float16,0,0.03299200038115183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,2,64,128,1,float16,fp8,0,0.03270933280388514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,2,64,128,1,fp8,fp8,0,0.05538133283456167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,2,64,0,1,float16,fp8,0,0.03251733382542928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,2,64,0,1,fp8,fp8,0,0.055120001236597695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,4,64,128,1,float16,float16,0,0.03419733295838038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,4,64,0,1,float16,float16,0,0.0340639998515447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,4,64,128,1,float16,fp8,0,0.03418133407831192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,4,64,128,1,fp8,fp8,0,0.057477335135142006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,4,64,0,1,float16,fp8,0,0.03389333436886469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,4,64,0,1,fp8,fp8,0,0.0572320024172465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,8,64,128,1,float16,float16,0,0.03439466655254364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,8,64,0,1,float16,float16,0,0.03474666674931844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,8,64,128,1,float16,fp8,0,0.03442133218050003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,8,64,128,1,fp8,fp8,0,0.0572266678015391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,8,64,0,1,float16,fp8,0,0.03457599878311157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,8,64,0,1,fp8,fp8,0,0.0574239989121755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,16,64,128,1,float16,float16,0,0.022821334501107533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,16,64,0,1,float16,float16,0,0.022890667120615642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,16,64,128,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,16,64,128,1,fp8,fp8,0,0.03612266729275385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,16,64,0,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,16,64,0,1,fp8,fp8,0,0.035930665830771126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,1,64,128,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,1,64,0,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,1,64,128,1,float16,fp8,0,0.022197333474953968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,1,64,128,1,fp8,fp8,0,0.03486400097608566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,1,64,0,1,float16,fp8,0,0.022053333620230358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,1,64,0,1,fp8,fp8,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,2,64,128,1,float16,float16,0,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,2,64,0,1,float16,float16,0,0.022175999979178112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,2,64,128,1,float16,fp8,0,0.02195200075705846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,2,64,128,1,fp8,fp8,0,0.034847999612490334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,2,64,0,1,float16,fp8,0,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,2,64,0,1,fp8,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,4,64,128,1,float16,float16,0,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,4,64,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,4,64,128,1,float16,fp8,0,0.022815999885400135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,4,64,128,1,fp8,fp8,0,0.03606933355331421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,4,64,0,1,float16,fp8,0,0.022800001005331676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,4,64,0,1,fp8,fp8,0,0.0359253336985906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,8,64,128,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,8,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,8,64,128,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,8,64,128,1,fp8,fp8,0,0.03591466695070267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,8,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,8,64,0,1,fp8,fp8,0,0.036090667049090065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,16,64,128,1,float16,float16,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,16,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,16,64,128,1,float16,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,16,64,128,1,fp8,fp8,0,0.02611200014750163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,16,64,0,1,float16,fp8,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,16,64,0,1,fp8,fp8,0,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,1,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,1,64,0,1,float16,float16,0,0.01657066618402799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,1,64,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,1,64,128,1,fp8,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,1,64,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,1,64,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,2,64,128,1,float16,float16,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,2,64,0,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,2,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,2,64,128,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,2,64,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,2,64,0,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,4,64,128,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,4,64,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,4,64,128,1,float16,fp8,0,0.017765333255132038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,4,64,128,1,fp8,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,4,64,0,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,4,64,0,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,8,64,128,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,8,64,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,8,64,128,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,8,64,128,1,fp8,fp8,0,0.025642665723959606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,8,64,0,1,float16,fp8,0,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,8,64,0,1,fp8,fp8,0,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,16,64,128,1,float16,float16,0,0.013951999445756277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,16,64,0,1,float16,float16,0,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,16,64,128,1,float16,fp8,0,0.01423466702302297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,16,64,128,1,fp8,fp8,0,0.020341333001852036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,16,64,0,1,float16,fp8,0,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,16,64,0,1,fp8,fp8,0,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,1,64,128,1,float16,float16,0,0.01434133326013883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,1,64,0,1,float16,float16,0,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,1,64,128,1,float16,fp8,0,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,1,64,128,1,fp8,fp8,0,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,1,64,0,1,float16,fp8,0,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,1,64,0,1,fp8,fp8,0,0.019834666202465694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,2,64,128,1,float16,float16,0,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,2,64,0,1,float16,float16,0,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,2,64,128,1,float16,fp8,0,0.014581333845853806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,2,64,128,1,fp8,fp8,0,0.02063999945918719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,2,64,0,1,float16,fp8,0,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,2,64,0,1,fp8,fp8,0,0.02062400057911873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,4,64,128,1,float16,float16,0,0.013957332819700241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,4,64,0,1,float16,float16,0,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,4,64,128,1,float16,fp8,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,4,64,128,1,fp8,fp8,0,0.020053333292404812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,4,64,0,1,float16,fp8,0,0.014368000129858652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,4,64,0,1,fp8,fp8,0,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,8,64,128,1,float16,float16,0,0.014352000008026758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,8,64,0,1,float16,float16,0,0.014592000593741735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,8,64,128,1,float16,fp8,0,0.014682666709025701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,8,64,128,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,8,64,0,1,float16,fp8,0,0.014549333602190018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,8,64,0,1,fp8,fp8,0,0.020213333268960316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,16,64,128,1,float16,float16,0,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,16,64,0,1,float16,float16,0,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,16,64,128,1,float16,fp8,0,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,16,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,16,64,0,1,float16,fp8,0,0.013450667262077332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,16,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,1,64,128,1,float16,float16,0,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,1,64,0,1,float16,float16,0,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,1,64,128,1,float16,fp8,0,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,1,64,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,1,64,0,1,float16,fp8,0,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,1,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,2,64,128,1,float16,float16,0,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,2,64,0,1,float16,float16,0,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,2,64,128,1,float16,fp8,0,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,2,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,2,64,0,1,float16,fp8,0,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,2,64,0,1,fp8,fp8,0,0.01874133323629697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,4,64,128,1,float16,float16,0,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,4,64,0,1,float16,float16,0,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,4,64,128,1,float16,fp8,0,0.013434667140245438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,4,64,128,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,4,64,0,1,float16,fp8,0,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,4,64,0,1,fp8,fp8,0,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,8,64,128,1,float16,float16,0,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,8,64,0,1,float16,float16,0,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,8,64,128,1,float16,fp8,0,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,8,64,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,8,64,0,1,float16,fp8,0,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,8,64,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,16,64,0,1,fp8,fp8,0,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,16,64,128,1,float16,float16,0,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,16,64,0,1,float16,float16,0,0.012618667135636011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,16,64,128,1,float16,fp8,0,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,16,64,128,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,16,64,0,1,float16,fp8,0,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,1,64,128,1,float16,float16,0,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,1,64,0,1,float16,float16,0,0.01246400053302447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,1,64,128,1,float16,fp8,0,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,1,64,128,1,fp8,fp8,0,0.01841066653529803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,1,64,0,1,float16,fp8,0,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,1,64,0,1,fp8,fp8,0,0.01855466639002164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,2,64,128,1,float16,float16,0,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,2,64,0,1,float16,float16,0,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,2,64,128,1,float16,fp8,0,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,2,64,128,1,fp8,fp8,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,2,64,0,1,float16,fp8,0,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,2,64,0,1,fp8,fp8,0,0.019962667177120846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,4,64,128,1,float16,float16,0,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,4,64,0,1,float16,float16,0,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,4,64,128,1,float16,fp8,0,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,4,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,4,64,0,1,float16,fp8,0,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,4,64,0,1,fp8,fp8,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,8,64,128,1,float16,float16,0,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,8,64,0,1,float16,float16,0,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,8,64,128,1,float16,fp8,0,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,8,64,128,1,fp8,fp8,0,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,8,64,0,1,float16,fp8,0,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,8,64,0,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,16,64,128,1,float16,float16,0,0.012319999436537424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,16,64,0,1,float16,float16,0,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,16,64,128,1,float16,fp8,0,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,16,64,128,1,fp8,fp8,0,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,16,64,0,1,float16,fp8,0,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,16,64,0,1,fp8,fp8,0,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,1,64,128,1,float16,float16,0,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,1,64,0,1,float16,float16,0,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,1,64,128,1,float16,fp8,0,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,1,64,128,1,fp8,fp8,0,0.018458666900793713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,1,64,0,1,float16,fp8,0,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,1,64,0,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,2,64,128,1,float16,float16,0,0.012618667135636011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,2,64,0,1,float16,float16,0,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,2,64,128,1,float16,fp8,0,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,2,64,128,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,2,64,0,1,float16,fp8,0,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,2,64,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,4,64,128,1,float16,float16,0,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,4,64,0,1,float16,float16,0,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,4,64,128,1,float16,fp8,0,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,4,64,128,1,fp8,fp8,0,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,4,64,0,1,float16,fp8,0,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,4,64,0,1,fp8,fp8,0,0.01851733277241389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,8,64,128,1,float16,float16,0,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,8,64,0,1,float16,float16,0,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,8,64,128,1,float16,fp8,0,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,8,64,128,1,fp8,fp8,0,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,8,64,0,1,float16,fp8,0,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,8,64,0,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,1,64,128,1,float16,float16,0,0.058287998040517174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,1,64,0,1,float16,float16,0,0.05868266522884369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,1,64,128,1,float16,fp8,0,0.05896000067392985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,1,64,128,1,fp8,fp8,0,0.1415733297665914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,1,64,0,1,float16,fp8,0,0.058917333682378135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,1,64,0,1,fp8,fp8,0,0.13991467157999674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,2,64,128,1,float16,float16,0,0.060165335734685264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,2,64,0,1,float16,float16,0,0.05991999804973602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,2,64,128,1,float16,fp8,0,0.059450666109720864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,2,64,128,1,fp8,fp8,0,0.14217600226402283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,2,64,0,1,float16,fp8,0,0.059690664211908974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,2,64,0,1,fp8,fp8,0,0.14401599764823914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,4,64,128,1,float16,float16,0,0.06160533428192139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,4,64,0,1,float16,float16,0,0.06266666452089946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,4,64,128,1,float16,fp8,0,0.061903998255729675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,4,64,128,1,fp8,fp8,0,0.14498133460680643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,4,64,0,1,float16,fp8,0,0.062074666221936546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,4,64,0,1,fp8,fp8,0,0.14570666352907816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,8,64,128,1,float16,float16,0,0.06586133440335591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,8,64,0,1,float16,float16,0,0.06543999910354614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,8,64,128,1,float16,fp8,0,0.06394133468468984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,8,64,128,1,fp8,fp8,0,0.145989328622818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,8,64,0,1,float16,fp8,0,0.06411199768384297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,8,64,0,1,fp8,fp8,0,0.14748799800872803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,16,64,128,1,float16,float16,0,0.03709333389997482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,16,64,0,1,float16,float16,0,0.03714666763941447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,16,64,128,1,float16,fp8,0,0.036544000109036766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,16,64,128,1,fp8,fp8,0,0.0832533339659373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,16,64,0,1,float16,fp8,0,0.035690667728583016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,16,64,0,1,fp8,fp8,0,0.08319999774297078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,1,64,128,1,float16,float16,0,0.0341386670867602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,1,64,0,1,float16,float16,0,0.03425599883000056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,1,64,128,1,float16,fp8,0,0.03443199892838796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,1,64,128,1,fp8,fp8,0,0.07834666470686595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,1,64,0,1,float16,fp8,0,0.034261333445707955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,1,64,0,1,fp8,fp8,0,0.07798400024573009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,2,64,128,1,float16,float16,0,0.03451200077931086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,2,64,0,1,float16,float16,0,0.03470933437347412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,2,64,128,1,float16,fp8,0,0.03459733227888743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,2,64,128,1,fp8,fp8,0,0.07924266656239827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,2,64,0,1,float16,fp8,0,0.03472533325354258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,2,64,0,1,fp8,fp8,0,0.07930666704972585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,4,64,128,1,float16,float16,0,0.03517866631348928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,4,64,0,1,float16,float16,0,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,4,64,128,1,float16,fp8,0,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,4,64,128,1,fp8,fp8,0,0.07975466549396515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,4,64,0,1,float16,fp8,0,0.03525333354870478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,4,64,0,1,fp8,fp8,0,0.08090133468310039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,8,64,128,1,float16,float16,0,0.035904000202814736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,8,64,0,1,float16,float16,0,0.035877334574858345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,8,64,128,1,float16,fp8,0,0.03615466753641764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,8,64,128,1,fp8,fp8,0,0.0809386670589447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,8,64,0,1,float16,fp8,0,0.03589866558710734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,8,64,0,1,fp8,fp8,0,0.08106666803359985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,16,64,128,1,float16,float16,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,16,64,0,1,float16,float16,0,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,16,64,128,1,float16,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,16,64,128,1,fp8,fp8,0,0.04828799764315287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,16,64,0,1,float16,fp8,0,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,16,64,0,1,fp8,fp8,0,0.04894933104515076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,1,64,128,1,float16,float16,0,0.02279466638962428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,1,64,0,1,float16,float16,0,0.022202665607134502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,1,64,128,1,float16,fp8,0,0.022821334501107533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,1,64,128,1,fp8,fp8,0,0.04825599988301595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,1,64,0,1,float16,fp8,0,0.02250133454799652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,1,64,0,1,fp8,fp8,0,0.04794666667779287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,2,64,128,1,float16,float16,0,0.022677332162857056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,2,64,0,1,float16,float16,0,0.022842665513356526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,2,64,128,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,2,64,128,1,fp8,fp8,0,0.04771733283996582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,2,64,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,2,64,0,1,fp8,fp8,0,0.04833066463470459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,4,64,128,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,4,64,0,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,4,64,128,1,float16,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,4,64,128,1,fp8,fp8,0,0.048810665806134544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,4,64,0,1,float16,fp8,0,0.023898666103680927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,4,64,0,1,fp8,fp8,0,0.048485333720842995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,8,64,128,1,float16,float16,0,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,8,64,0,1,float16,float16,0,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,8,64,128,1,float16,fp8,0,0.02386133372783661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,8,64,128,1,fp8,fp8,0,0.04868799944718679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,8,64,0,1,float16,fp8,0,0.02369066576162974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,8,64,0,1,fp8,fp8,0,0.04827199876308441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,16,64,128,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,16,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,16,64,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,16,64,128,1,fp8,fp8,0,0.03212266663710276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,16,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,16,64,0,1,fp8,fp8,0,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,1,64,128,1,float16,float16,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,1,64,0,1,float16,float16,0,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,1,64,128,1,float16,fp8,0,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,1,64,128,1,fp8,fp8,0,0.030928000807762146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,1,64,0,1,float16,fp8,0,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,1,64,0,1,fp8,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,2,64,128,1,float16,float16,0,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,2,64,0,1,float16,float16,0,0.016389333953460056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,2,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,2,64,128,1,fp8,fp8,0,0.030858665704727173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,2,64,0,1,float16,fp8,0,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,2,64,0,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,4,64,128,1,float16,float16,0,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,4,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,4,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,4,64,128,1,fp8,fp8,0,0.03197866678237915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,4,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,4,64,0,1,fp8,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,8,64,128,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,8,64,0,1,float16,float16,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,8,64,128,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,8,64,128,1,fp8,fp8,0,0.03190399954716364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,8,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,8,64,0,1,fp8,fp8,0,0.03176533430814743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,16,64,128,1,float16,float16,0,0.013999999811251959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,16,64,0,1,float16,float16,0,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,16,64,128,1,float16,fp8,0,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,16,64,128,1,fp8,fp8,0,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,16,64,0,1,float16,fp8,0,0.01463466634353002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,1,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,16,64,0,1,fp8,fp8,0,0.024031999210516613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,1,64,128,1,float16,float16,0,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,1,64,0,1,float16,float16,0,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,1,64,128,1,float16,fp8,0,0.014463999619086584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,1,64,128,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,1,64,0,1,float16,fp8,0,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,2,64,128,1,float16,float16,0,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,2,64,0,1,float16,float16,0,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,2,64,128,1,float16,fp8,0,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,2,64,128,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,2,64,0,1,float16,fp8,0,0.01444799949725469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,2,64,0,1,fp8,fp8,0,0.023914667467276256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,4,64,128,1,float16,float16,0,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,4,64,0,1,float16,float16,0,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,4,64,128,1,float16,fp8,0,0.014592000593741735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,4,64,128,1,fp8,fp8,0,0.023818666736284893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,4,64,0,1,float16,fp8,0,0.014256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,4,64,0,1,fp8,fp8,0,0.02382933348417282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,8,64,128,1,float16,float16,0,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,8,64,0,1,float16,float16,0,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,8,64,128,1,float16,fp8,0,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,8,64,128,1,fp8,fp8,0,0.023792001108328503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,8,64,0,1,float16,fp8,0,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,8,64,0,1,fp8,fp8,0,0.024357333779335022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,16,64,128,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,16,64,0,1,float16,float16,0,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,16,64,128,1,float16,fp8,0,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,16,64,128,1,fp8,fp8,0,0.019776000330845516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,16,64,0,1,float16,fp8,0,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,16,64,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,1,64,128,1,float16,float16,0,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,1,64,0,1,float16,float16,0,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,1,64,128,1,float16,fp8,0,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,1,64,128,1,fp8,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,1,64,0,1,float16,fp8,0,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,1,64,0,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,2,64,128,1,float16,float16,0,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,2,64,0,1,float16,float16,0,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,2,64,128,1,float16,fp8,0,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,2,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,2,64,0,1,float16,fp8,0,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,2,64,0,1,fp8,fp8,0,0.019786667078733444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,4,64,128,1,float16,float16,0,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,4,64,0,1,float16,float16,0,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,4,64,128,1,float16,fp8,0,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,4,64,128,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,4,64,0,1,float16,fp8,0,0.013477332890033722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,4,64,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,8,64,128,1,float16,float16,0,0.01246400053302447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,8,64,0,1,float16,float16,0,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,8,64,128,1,float16,fp8,0,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,8,64,128,1,fp8,fp8,0,0.020175999651352566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,8,64,0,1,float16,fp8,0,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,8,64,0,1,fp8,fp8,0,0.022053333620230358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,16,64,128,1,float16,float16,0,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,16,64,0,1,float16,float16,0,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,16,64,128,1,float16,fp8,0,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,16,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,16,64,0,1,float16,fp8,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,16,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,1,64,128,1,float16,float16,0,0.012074666718641916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,1,64,0,1,float16,float16,0,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,1,64,128,1,float16,fp8,0,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,1,64,128,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,1,64,0,1,float16,fp8,0,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,1,64,0,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,2,64,128,1,float16,float16,0,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,2,64,0,1,float16,float16,0,0.012346666306257248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,2,64,128,1,float16,fp8,0,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,2,64,128,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,2,64,0,1,float16,fp8,0,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,2,64,0,1,fp8,fp8,0,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,4,64,128,1,float16,float16,0,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,4,64,0,1,float16,float16,0,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,4,64,128,1,float16,fp8,0,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,4,64,128,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,8,64,128,1,fp8,fp8,0,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,4,64,0,1,float16,fp8,0,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,4,64,0,1,fp8,fp8,0,0.018778666853904724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,8,64,128,1,float16,float16,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,8,64,0,1,float16,float16,0,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,8,64,128,1,float16,fp8,0,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,8,64,0,1,float16,fp8,0,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,8,64,0,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,16,64,128,1,float16,float16,0,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,16,64,0,1,float16,float16,0,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,16,64,128,1,float16,fp8,0,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,16,64,128,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,16,64,0,1,float16,fp8,0,0.012426666915416718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,16,64,0,1,fp8,fp8,0,0.01868266612291336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,1,64,128,1,float16,float16,0,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,1,64,0,1,float16,float16,0,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,1,64,128,1,float16,fp8,0,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,1,64,128,1,fp8,fp8,0,0.018031999468803406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,1,64,0,1,float16,fp8,0,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,1,64,0,1,fp8,fp8,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,2,64,128,1,float16,float16,0,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,2,64,0,1,float16,float16,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,2,64,128,1,float16,fp8,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,2,64,128,1,fp8,fp8,0,0.0179626668492953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,2,64,0,1,float16,fp8,0,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,2,64,0,1,fp8,fp8,0,0.018298666924238205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,4,64,128,1,float16,float16,0,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,4,64,0,1,float16,float16,0,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,4,64,128,1,float16,fp8,0,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,4,64,128,1,fp8,fp8,0,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,4,64,0,1,float16,fp8,0,0.01257066677014033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,4,64,0,1,fp8,fp8,0,0.01821333294113477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,8,64,128,1,float16,float16,0,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,8,64,0,1,float16,float16,0,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,8,64,128,1,float16,fp8,0,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,8,64,128,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,8,64,0,1,float16,fp8,0,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,8,64,0,1,fp8,fp8,0,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,16,64,128,1,float16,float16,0,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,16,64,0,1,float16,float16,0,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,16,64,128,1,float16,fp8,0,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,16,64,128,1,fp8,fp8,0,0.01802666609485944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,16,64,0,1,float16,fp8,0,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,16,64,0,1,fp8,fp8,0,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,1,64,128,1,float16,float16,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,1,64,0,1,float16,float16,0,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,1,64,128,1,float16,fp8,0,0.01246400053302447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,1,64,128,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,1,64,0,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,1,64,0,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,2,64,128,1,float16,float16,0,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,2,64,0,1,float16,float16,0,0.011957333733638128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,2,64,128,1,float16,fp8,0,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,2,64,128,1,fp8,fp8,0,0.018160000443458557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,2,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,2,64,0,1,fp8,fp8,0,0.018133333573738735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,4,64,128,1,float16,float16,0,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,4,64,0,1,float16,float16,0,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,4,64,128,1,float16,fp8,0,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,4,64,128,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,4,64,0,1,float16,fp8,0,0.012549333274364471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,4,64,0,1,fp8,fp8,0,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,8,64,128,1,float16,float16,0,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,8,64,0,1,float16,float16,0,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,8,64,128,1,float16,fp8,0,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,8,64,128,1,fp8,fp8,0,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,8,64,0,1,float16,fp8,0,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,8,64,0,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,1,64,128,1,float16,float16,0,0.04450133442878723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,1,64,0,1,float16,float16,0,0.04450133442878723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,1,64,128,1,float16,fp8,0,0.0444160004456838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,1,64,128,1,fp8,fp8,0,0.12617066502571106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,1,64,0,1,float16,fp8,0,0.04465599854787191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,1,64,0,1,fp8,fp8,0,0.12516799569129944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,2,64,128,1,float16,float16,0,0.04470933477083842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,2,64,0,1,float16,float16,0,0.04520000020662943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,2,64,128,1,float16,fp8,0,0.044778664906819664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,2,64,128,1,fp8,fp8,0,0.12705066800117493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,2,64,0,1,float16,fp8,0,0.04477333525816599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,2,64,0,1,fp8,fp8,0,0.1267359952131907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,4,64,128,1,float16,float16,0,0.04574400186538696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,4,64,0,1,float16,float16,0,0.045696000258127846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,4,64,128,1,float16,fp8,0,0.04560533165931702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,4,64,128,1,fp8,fp8,0,0.12924266854921976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,4,64,0,1,float16,fp8,0,0.04568000137805939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,4,64,0,1,fp8,fp8,0,0.12772267063458762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,8,64,128,1,float16,float16,0,0.04633066554864248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,8,64,0,1,float16,float16,0,0.046629334489504494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,8,64,128,1,float16,fp8,0,0.04584533472855886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,8,64,128,1,fp8,fp8,0,0.12798399726549783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,8,64,0,1,float16,fp8,0,0.045834665497144066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,8,64,0,1,fp8,fp8,0,0.12930132945378622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,16,64,128,1,float16,float16,0,0.02773866554101308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,16,64,0,1,float16,float16,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,16,64,128,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,16,64,128,1,fp8,fp8,0,0.07308800021807353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,16,64,0,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,16,64,0,1,fp8,fp8,0,0.07303466896216075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,1,64,128,1,float16,float16,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,1,64,0,1,float16,float16,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,1,64,128,1,float16,fp8,0,0.027866666515668232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,1,64,128,1,fp8,fp8,0,0.07154666880766551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,1,64,0,1,float16,fp8,0,0.027866666515668232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,1,64,0,1,fp8,fp8,0,0.0713866651058197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,2,64,128,1,float16,float16,0,0.02758399893840154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,2,64,0,1,float16,float16,0,0.028010666370391846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,2,64,128,1,float16,fp8,0,0.028138667345046997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,2,64,128,1,fp8,fp8,0,0.0726453314224879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,2,64,0,1,float16,fp8,0,0.028101332485675812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,2,64,0,1,fp8,fp8,0,0.07193066676457723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,4,64,128,1,float16,float16,0,0.028378665447235107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,4,64,0,1,float16,float16,0,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,4,64,128,1,float16,fp8,0,0.02869333326816559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,4,64,128,1,fp8,fp8,0,0.07316266496976216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,4,64,0,1,float16,fp8,0,0.028570666909217834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,4,64,0,1,fp8,fp8,0,0.07315200070540111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,8,64,128,1,float16,float16,0,0.028346667687098186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,8,64,0,1,float16,float16,0,0.028522667785485584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,8,64,128,1,float16,fp8,0,0.028762665887673695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,8,64,128,1,fp8,fp8,0,0.07255466779073079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,8,64,0,1,float16,fp8,0,0.028751999139785767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,8,64,0,1,fp8,fp8,0,0.07368533313274384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,16,64,128,1,float16,float16,0,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,16,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,16,64,128,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,16,64,128,1,fp8,fp8,0,0.04524800181388855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,16,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,16,64,0,1,fp8,fp8,0,0.04548799991607666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,1,64,128,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,1,64,0,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,2,64,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,1,64,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,1,64,128,1,fp8,fp8,0,0.044122666120529175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,1,64,0,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,1,64,0,1,fp8,fp8,0,0.04389866689840952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,2,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,2,64,128,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,2,64,128,1,fp8,fp8,0,0.0444213350613912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,2,64,0,1,float16,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,2,64,0,1,fp8,fp8,0,0.04457066456476847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,4,64,128,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,4,64,0,1,float16,float16,0,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,4,64,128,1,float16,fp8,0,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,4,64,128,1,fp8,fp8,0,0.04422399898370107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,4,64,0,1,float16,fp8,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,4,64,0,1,fp8,fp8,0,0.04446933170159658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,8,64,128,1,float16,float16,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,8,64,0,1,float16,float16,0,0.019765333582957584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,8,64,128,1,float16,fp8,0,0.02014933278163274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,8,64,128,1,fp8,fp8,0,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,8,64,0,1,float16,fp8,0,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,8,64,0,1,fp8,fp8,0,0.04453866680463155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,16,64,128,1,float16,float16,0,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,16,64,0,1,float16,float16,0,0.014629332969586054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,16,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,16,64,128,1,fp8,fp8,0,0.03108799954255422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,16,64,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,16,64,0,1,fp8,fp8,0,0.030581332743167877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,1,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,1,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,1,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,1,64,128,1,fp8,fp8,0,0.029898665845394135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,1,64,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,1,64,0,1,fp8,fp8,0,0.030053332448005676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,2,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,2,64,0,1,float16,float16,0,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,2,64,128,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,2,64,128,1,fp8,fp8,0,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,2,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,2,64,0,1,fp8,fp8,0,0.030250666042168934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,4,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,4,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,4,64,128,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,4,64,128,1,fp8,fp8,0,0.03010133405526479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,4,64,0,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,4,64,0,1,fp8,fp8,0,0.030053332448005676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,8,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,8,64,0,1,float16,float16,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,8,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,8,64,128,1,fp8,fp8,0,0.030063999195893604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,8,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,8,64,0,1,fp8,fp8,0,0.030031998952229817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,16,64,128,1,float16,float16,0,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,16,64,0,1,float16,float16,0,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,16,64,128,1,float16,fp8,0,0.013445333888133367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,16,64,128,1,fp8,fp8,0,0.022783999641736347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,16,64,0,1,float16,fp8,0,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,16,64,0,1,fp8,fp8,0,0.022677332162857056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,1,64,128,1,float16,float16,0,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,1,64,0,1,float16,float16,0,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,1,64,128,1,float16,fp8,0,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,1,64,128,1,fp8,fp8,0,0.02253866692384084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,1,64,0,1,float16,fp8,0,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,1,64,0,1,fp8,fp8,0,0.022917332748572033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,2,64,128,1,float16,float16,0,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,2,64,0,1,float16,float16,0,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,2,64,128,1,float16,fp8,0,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,2,64,128,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,2,64,0,1,float16,fp8,0,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,2,64,0,1,fp8,fp8,0,0.02235200007756551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,4,64,128,1,float16,float16,0,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,4,64,0,1,float16,float16,0,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,4,64,128,1,float16,fp8,0,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,4,64,128,1,fp8,fp8,0,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,4,64,0,1,float16,fp8,0,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,4,64,0,1,fp8,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,8,64,128,1,float16,float16,0,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,8,64,0,1,float16,float16,0,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,8,64,128,1,float16,fp8,0,0.01339200014869372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,8,64,128,1,fp8,fp8,0,0.02276800076166789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,8,64,0,1,float16,fp8,0,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,8,64,0,1,fp8,fp8,0,0.022474666436513264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,16,64,128,1,float16,float16,0,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,16,64,0,1,float16,float16,0,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,16,64,128,1,float16,fp8,0,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,16,64,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,16,64,0,1,float16,fp8,0,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,16,64,0,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,1,64,128,1,float16,float16,0,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,1,64,0,1,float16,float16,0,0.0122079998254776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,1,64,128,1,float16,fp8,0,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,1,64,128,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,1,64,0,1,float16,fp8,0,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,1,64,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,2,64,128,1,float16,float16,0,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,2,64,0,1,float16,float16,0,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,2,64,128,1,float16,fp8,0,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,2,64,128,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,2,64,0,1,float16,fp8,0,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,2,64,0,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,4,64,128,1,float16,float16,0,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,4,64,0,1,float16,float16,0,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,4,64,128,1,float16,fp8,0,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,4,64,128,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,4,64,0,1,float16,fp8,0,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,4,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,8,64,128,1,float16,float16,0,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,8,64,0,1,float16,float16,0,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,8,64,128,1,float16,fp8,0,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,8,64,128,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,8,64,0,1,float16,fp8,0,0.012495999534924826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,8,64,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,16,64,128,1,float16,float16,0,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,16,64,0,1,float16,float16,0,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,16,64,128,1,float16,fp8,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,16,64,128,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,16,64,0,1,float16,fp8,0,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,16,64,0,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,1,64,128,1,float16,float16,0,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,1,64,0,1,float16,float16,0,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,1,64,128,1,float16,fp8,0,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,1,64,128,1,fp8,fp8,0,0.018640000373125076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,1,64,0,1,float16,fp8,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,1,64,0,1,fp8,fp8,0,0.01846933364868164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,2,64,128,1,float16,float16,0,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,2,64,0,1,float16,float16,0,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,2,64,128,1,float16,fp8,0,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,2,64,128,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,2,64,0,1,float16,fp8,0,0.012517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,2,64,0,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,4,64,128,1,float16,float16,0,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,4,64,0,1,float16,float16,0,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,4,64,128,1,float16,fp8,0,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,4,64,128,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,4,64,0,1,float16,fp8,0,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,4,64,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,8,64,128,1,float16,float16,0,0.012229333321253458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,8,64,0,1,float16,float16,0,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,8,64,128,1,float16,fp8,0,0.01257066677014033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,8,64,128,1,fp8,fp8,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,16,64,0,1,float16,fp8,0,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,8,64,0,1,float16,fp8,0,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,8,64,0,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,16,64,128,1,float16,float16,0,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,16,64,0,1,float16,float16,0,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,16,64,128,1,float16,fp8,0,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,16,64,128,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,16,64,0,1,fp8,fp8,0,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,1,64,128,1,float16,float16,0,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,1,64,0,1,float16,float16,0,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,1,64,128,1,float16,fp8,0,0.012378666549921036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,1,64,128,1,fp8,fp8,0,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,1,64,0,1,float16,fp8,0,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,1,64,0,1,fp8,fp8,0,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,2,64,128,1,float16,float16,0,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,2,64,0,1,float16,float16,0,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,2,64,128,1,float16,fp8,0,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,2,64,128,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,2,64,0,1,float16,fp8,0,0.012453333785136541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,2,64,0,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,4,64,128,1,float16,float16,0,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,8,64,0,1,float16,float16,0,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,4,64,0,1,float16,float16,0,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,4,64,128,1,float16,fp8,0,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,4,64,128,1,fp8,fp8,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,4,64,0,1,float16,fp8,0,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,4,64,0,1,fp8,fp8,0,0.019973333925008774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,8,64,128,1,float16,float16,0,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,8,64,128,1,float16,fp8,0,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,8,64,128,1,fp8,fp8,0,0.01821333294113477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,8,64,0,1,float16,fp8,0,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,8,64,0,1,fp8,fp8,0,0.018325333793958027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,16,64,128,1,float16,float16,0,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,16,64,0,1,float16,float16,0,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,16,64,128,1,float16,fp8,0,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,16,64,128,1,fp8,fp8,0,0.020703999946514767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,16,64,0,1,float16,fp8,0,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,16,64,0,1,fp8,fp8,0,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,1,64,128,1,float16,float16,0,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,1,64,0,1,float16,float16,0,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,1,64,128,1,float16,fp8,0,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,2,64,128,1,float16,fp8,0,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,1,64,128,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,1,64,0,1,float16,fp8,0,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,1,64,0,1,fp8,fp8,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,2,64,128,1,float16,float16,0,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,2,64,0,1,float16,float16,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,2,64,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,2,64,0,1,float16,fp8,0,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,2,64,0,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,4,64,128,1,float16,float16,0,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,4,64,0,1,float16,float16,0,0.012122667084137598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,4,64,128,1,float16,fp8,0,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,4,64,128,1,fp8,fp8,0,0.018320000420014065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,4,64,0,1,float16,fp8,0,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,4,64,0,1,fp8,fp8,0,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,8,64,128,1,float16,float16,0,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,8,64,0,1,float16,float16,0,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,8,64,128,1,float16,fp8,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,8,64,128,1,fp8,fp8,0,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,8,64,0,1,float16,fp8,0,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,8,64,0,1,fp8,fp8,0,0.01821333294113477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,float16,0,1.0952213605244954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,fp8,0,1.0738720099131267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,64,128,1,fp8,fp8,0,1.4097226460774739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,float16,0,1.114794651667277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,fp8,0,1.1038613319396973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,64,128,1,fp8,fp8,0,1.4286826451619465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,float16,0,7.376442591349284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,float16,0,1.133845329284668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,64,0,1,fp8,fp8,0,6.531685511271159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,fp8,0,7.309973398844401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,float16,0,7.255029042561849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,fp8,0,1.13154133160909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,fp8,0,7.189002354939778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,64,128,1,fp8,fp8,0,1.460927963256836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,float16,0,0.6229706605275472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,64,0,1,fp8,fp8,0,6.538986841837565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,fp8,0,0.6215893427530924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,64,128,1,fp8,fp8,0,0.8081866900126139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,float16,0,7.27842648824056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,float16,0,0.5706773201624552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,float16,0,3.7675838470458984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,fp8,0,0.5598719914754232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,64,0,1,fp8,fp8,0,6.6159413655598955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,fp8,0,3.7223679224650064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,64,128,1,fp8,fp8,0,0.740565299987793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,fp8,0,7.331957499186198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,64,0,1,fp8,fp8,0,3.4335625966389975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,float16,0,3.7405707041422525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,float16,0,0.5784159898757935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,fp8,0,0.5689599911371866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,fp8,0,3.707551956176758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,64,0,1,fp8,fp8,0,3.3452320098876953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,64,128,1,fp8,fp8,0,0.7550613085428873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,float16,0,3.731840133666992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,fp8,0,3.6578187942504883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,float16,0,0.5867306788762411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,64,0,1,fp8,fp8,0,3.4018987019856772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,fp8,0,0.5848373174667358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,64,128,1,fp8,fp8,0,0.7623626391092936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,float16,0,0.3330986698468526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,float16,0,3.6476160685221353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,fp8,0,3.7313334147135415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,fp8,0,0.33931732177734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,64,0,1,fp8,fp8,0,3.396874745686849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,64,128,1,fp8,fp8,0,0.44021864732106525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,float16,0,1.9161814053853352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,fp8,0,1.9060319264729817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,64,0,1,fp8,fp8,0,1.6860960324605305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,float16,0,0.315775990486145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,fp8,0,0.31249600648880005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,float16,0,1.900986671447754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,64,128,1,fp8,fp8,0,0.4055733283360799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,fp8,0,1.8975253105163574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,64,0,1,fp8,fp8,0,1.6633599599202473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,float16,0,0.3163520097732544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,float16,0,1.90829865137736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,fp8,0,0.3137066761652629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,64,128,1,fp8,fp8,0,0.4117813507715861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,fp8,0,1.8973867098490398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,64,0,1,fp8,fp8,0,1.653205394744873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,float16,0,0.3211573362350464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,float16,0,1.906325340270996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,fp8,0,0.3188426693280538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,64,128,1,fp8,fp8,0,0.41952534516652423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,fp8,0,1.893898646036784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,64,0,1,fp8,fp8,0,1.6751467386881511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,float16,0,0.22697067260742188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,fp8,0,0.2264853318532308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,float16,0,0.970736026763916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,64,128,1,fp8,fp8,0,0.27823466062545776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,fp8,0,0.9719893137613932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,64,128,1,fp8,fp8,0,0.2755146622657776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,64,0,1,fp8,fp8,0,0.9028373559316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,float16,0,0.22542399168014526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,float16,0,0.9814826647440592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,fp8,0,0.22665599981943765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,fp8,0,0.976703961690267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,64,0,1,fp8,fp8,0,0.8990826606750488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,float16,0,0.22587732474009195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,fp8,0,0.22620266675949097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,float16,0,0.9826719760894775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,64,128,1,fp8,fp8,0,0.2760000030199687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,float16,0,0.22702399889628092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,fp8,0,0.9796213308970133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,64,0,1,fp8,fp8,0,0.8929759661356608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,float16,0,0.9738240242004395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,fp8,0,0.2265226642290751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,64,128,1,fp8,fp8,0,0.27801599105199176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,fp8,0,0.979973316192627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,64,0,1,fp8,fp8,0,0.9001333713531494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,float16,0,0.8235146999359131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,fp8,0,0.8054560025533041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,float16,0,4.25820255279541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,64,128,1,fp8,fp8,0,1.0618133544921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,float16,0,0.8414613405863444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,fp8,0,0.8336160182952881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,64,128,1,fp8,fp8,0,1.0795040130615234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,fp8,0,4.21452267964681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,64,0,1,fp8,fp8,0,3.8400001525878906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,float16,0,4.27945613861084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,float16,0,0.8575786749521891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,fp8,0,0.8454879919687907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,fp8,0,4.2646026611328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,64,0,1,fp8,fp8,0,3.8422826131184897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,64,128,1,fp8,fp8,0,1.1078773339589436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,float16,0,0.4638559818267822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,float16,0,2.166869322458903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,float16,0,4.192826588948567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,fp8,0,0.46961601575215656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,fp8,0,4.295407931009929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,64,0,1,fp8,fp8,0,3.888218561808268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,64,128,1,fp8,fp8,0,0.6117013295491537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,fp8,0,2.18233060836792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,64,0,1,fp8,fp8,0,2.0308586756388345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,float16,0,0.4278186559677124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,fp8,0,0.42421332995096844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,float16,0,2.169871966044108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,64,128,1,fp8,fp8,0,0.5653066635131836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,float16,0,0.43875734011332196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,fp8,0,2.1440159479777017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,64,0,1,fp8,fp8,0,1.943109353383382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,fp8,0,0.42850132783253986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,64,128,1,fp8,fp8,0,0.565221349398295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,float16,0,2.1549812952677407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,fp8,0,2.1342026392618814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,float16,0,0.43851733207702637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,64,0,1,fp8,fp8,0,1.9872852961222331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,fp8,0,0.43330132961273193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,float16,0,2.1621813774108887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,64,128,1,fp8,fp8,0,0.575055996576945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,float16,0,0.25697600841522217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,fp8,0,2.143061319986979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,float16,0,1.1415786743164062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,fp8,0,0.26100800434748334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,64,0,1,fp8,fp8,0,1.9927679697672527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,64,128,1,fp8,fp8,0,0.338485320409139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,float16,0,0.2432639996210734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,fp8,0,1.1493226687113445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,64,0,1,fp8,fp8,0,1.0006826718648274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,fp8,0,0.24017600218454996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,float16,0,1.1332746346791585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,64,128,1,fp8,fp8,0,0.31570667028427124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,fp8,0,1.1311840216318767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,64,0,1,fp8,fp8,0,0.969866673151652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,float16,0,0.24499734242757162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,fp8,0,0.24100265900293985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,float16,0,1.138261318206787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,64,128,1,fp8,fp8,0,0.3173333406448364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,fp8,0,1.1315253575642903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,64,0,1,fp8,fp8,0,0.9897173245747884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,64,0,1,fp8,fp8,0,0.9866613547007242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,float16,0,0.24623999993006387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,float16,0,1.1231253147125244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,fp8,0,0.2481279969215393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,64,128,1,fp8,fp8,0,0.32071467240651447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,fp8,0,1.1381653149922688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,float16,0,0.17682133118311563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,float16,0,0.5887466669082642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,fp8,0,0.17685866355895996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,64,128,1,fp8,fp8,0,0.2179786761601766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,fp8,0,0.5819733142852783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,64,0,1,fp8,fp8,0,0.5453493197758993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,float16,0,0.17492800951004028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,float16,0,0.5889546473821005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,fp8,0,0.17533334096272787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,64,128,1,fp8,fp8,0,0.2148373325665792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,fp8,0,0.5899466673533121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,64,0,1,fp8,fp8,0,0.5442879994710287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,float16,0,0.17458132902781168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,float16,0,0.5897973378499349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,fp8,0,0.17502933740615845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,64,128,1,fp8,fp8,0,0.21565866470336914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,fp8,0,0.5901333491007487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,64,0,1,fp8,fp8,0,0.5421173175175985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,float16,0,0.17573332786560059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,float16,0,0.5888053178787231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,fp8,0,0.17546133200327554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,64,128,1,fp8,fp8,0,0.2172213395436605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,fp8,0,0.5863253275553385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,64,0,1,fp8,fp8,0,0.5443679889043173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,float16,0,0.6823893388112386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,fp8,0,0.6724692980448405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,64,128,1,fp8,fp8,0,0.8835360209147135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,float16,0,3.0032641092936196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,float16,0,0.7014453411102295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,fp8,0,2.9888534545898438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,64,0,1,fp8,fp8,0,2.7349866231282554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,float16,0,3.00276788075765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,fp8,0,0.6923466523488363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,64,128,1,fp8,fp8,0,0.9014986356099447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,float16,0,0.7123573621114095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,fp8,0,3.0027360916137695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,64,0,1,fp8,fp8,0,2.7722078959147134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,float16,0,3.051823933919271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,fp8,0,0.7035679817199707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,64,128,1,fp8,fp8,0,0.9242880344390869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,fp8,0,3.019370714823405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,64,0,1,fp8,fp8,0,2.750944137573242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,float16,0,0.3883306582768758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,fp8,0,0.3928639888763428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,64,128,1,fp8,fp8,0,0.5161386728286743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,float16,0,1.5710879961649578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,fp8,0,1.5601812998453777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,64,0,1,fp8,fp8,0,1.3979093233744304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,float16,0,0.360698660214742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,fp8,0,0.35628799597422284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,64,128,1,fp8,fp8,0,0.4714346726735433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,float16,0,1.537775993347168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,fp8,0,1.5366400082906086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,64,0,1,fp8,fp8,0,1.3683573404947917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,float16,0,0.3628213405609131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,float16,0,1.539749304453532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,fp8,0,0.3604480028152466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,64,128,1,fp8,fp8,0,0.47749332586924237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,fp8,0,1.5440692901611328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,fp8,0,1.5526026089986165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,64,0,1,fp8,fp8,0,1.3641600608825684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,float16,0,0.3697226842244466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,float16,0,1.5556853612263997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,fp8,0,0.36552000045776367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,64,128,1,fp8,fp8,0,0.4833600123723348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,float16,0,0.2181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,64,0,1,fp8,fp8,0,1.383797327677409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,float16,0,0.7771519819895426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,fp8,0,0.21961599588394165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,64,128,1,fp8,fp8,0,0.2882773280143738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,fp8,0,0.7853493690490723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,64,0,1,fp8,fp8,0,0.724128007888794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,float16,0,0.2051253318786621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,float16,0,0.770634651184082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,fp8,0,0.20269866784413657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,64,128,1,fp8,fp8,0,0.26632533470789593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,fp8,0,0.765893300374349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,64,0,1,fp8,fp8,0,0.7102986971537272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,float16,0,0.20683199167251587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,fp8,0,0.20518932739893594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,float16,0,0.7676373322804769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,64,128,1,fp8,fp8,0,0.270197331905365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,fp8,0,0.7721439997355143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,64,0,1,fp8,fp8,0,0.7110133171081543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,float16,0,0.20887466271718344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,float16,0,0.7742239634195963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,fp8,0,0.208570659160614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,64,128,1,fp8,fp8,0,0.27498666445414227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,fp8,0,0.768992026646932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,64,0,1,fp8,fp8,0,0.7153440316518148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,float16,0,0.15154666701952615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,float16,0,0.4331253369649251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,fp8,0,0.15189866224924722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,64,128,1,fp8,fp8,0,0.18745599190394083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,fp8,0,0.43263999621073407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,64,0,1,fp8,fp8,0,0.4001760085423787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,fp8,0,0.4306453466415405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,float16,0,0.1504693329334259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,float16,0,0.43091734250386554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,fp8,0,0.1498186687628428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,64,128,1,fp8,fp8,0,0.18427733580271402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,64,0,1,fp8,fp8,0,0.4007413387298584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,float16,0,0.1499626636505127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,float16,0,0.4314933220545451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,fp8,0,0.15016000469525656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,64,128,1,fp8,fp8,0,0.18503467241923013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,fp8,0,0.4318079948425293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,64,0,1,fp8,fp8,0,0.4001813332239787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,float16,0,0.15079466501871744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,float16,0,0.43423465887705487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,fp8,0,0.15093866984049478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,64,128,1,fp8,fp8,0,0.18722132841746011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,fp8,0,0.4310506582260132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,64,0,1,fp8,fp8,0,0.3997066815694173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,float16,0,1.0758826732635498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,float16,0,4.037568092346191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,fp8,0,1.0590773423512776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,64,128,1,fp8,fp8,0,1.3904959360758464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,float16,0,1.0960266590118408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,float16,0,3.9667787551879883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,fp8,0,3.9345385233561196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,fp8,0,1.0872960090637207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,64,128,1,fp8,fp8,0,1.4124639828999836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,64,0,1,fp8,fp8,0,3.6011679967244468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,float16,0,1.1262400150299072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,fp8,0,4.030714670817058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,64,0,1,fp8,fp8,0,3.6213865280151367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,fp8,0,1.1113333702087402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,64,128,1,fp8,fp8,0,1.439743995666504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,float16,0,4.014032046000163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,float16,0,0.6103946765263876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,fp8,0,3.9544798533121743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,64,0,1,fp8,fp8,0,3.6989173889160156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,float16,0,2.0433707237243652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,fp8,0,0.6037866671880087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,64,128,1,fp8,fp8,0,0.7878986994425455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,float16,0,0.5524426698684692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,fp8,0,2.047877311706543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,64,0,1,fp8,fp8,0,1.9364159901936848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,fp8,0,0.534495989481608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,float16,0,1.992970625559489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,64,128,1,fp8,fp8,0,0.7113706270853678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,float16,0,0.555621345837911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,fp8,0,0.5511093139648438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,fp8,0,1.9939680099487305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,64,128,1,fp8,fp8,0,0.7271786530812582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,64,0,1,fp8,fp8,0,1.8407840728759766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,float16,0,2.0181652704874673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,float16,0,0.5649333397547404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,fp8,0,0.5634613434473673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,fp8,0,1.9930613835652669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,64,128,1,fp8,fp8,0,0.7409119606018066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,64,0,1,fp8,fp8,0,1.850719928741455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,float16,0,2.024890740712484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,fp8,0,2.012949307759603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,64,0,1,fp8,fp8,0,1.8688373565673828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,float16,0,0.31361599763234455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,fp8,0,0.31747732559839886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,float16,0,1.0606186389923096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,64,128,1,fp8,fp8,0,0.4163946708043416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,fp8,0,1.0589599609375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,float16,0,0.2921706636746724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,64,0,1,fp8,fp8,0,0.9498240152994791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,fp8,0,0.28774933020273846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,float16,0,1.0414026578267415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,64,128,1,fp8,fp8,0,0.3829600016276042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,fp8,0,1.0371893246968586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,64,0,1,fp8,fp8,0,0.9091200033823649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,float16,0,0.29604800542195636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,fp8,0,0.292794664700826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,float16,0,1.0436053276062012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,64,128,1,fp8,fp8,0,0.3855733474095662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,fp8,0,1.0409386952718098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,float16,0,0.2997066577275594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,64,0,1,fp8,fp8,0,0.9168426990509033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,fp8,0,0.29799999793370563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,float16,0,1.0432319641113281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,64,128,1,fp8,fp8,0,0.3956906795501709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,fp8,0,1.0466933250427246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,64,0,1,fp8,fp8,0,0.9208746751149496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,float16,0,0.17907732725143433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,float16,0,0.5298293431599935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,fp8,0,0.1820853352546692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,64,128,1,fp8,fp8,0,0.23786665995915732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,fp8,0,0.5296693245569865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,64,0,1,fp8,fp8,0,0.498032013575236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,float16,0,0.16875733931859335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,float16,0,0.5179253419240316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,fp8,0,0.16702934106191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,64,128,1,fp8,fp8,0,0.2208426594734192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,fp8,0,0.5177493492762247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,64,0,1,fp8,fp8,0,0.4789546728134155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,float16,0,0.17029333114624023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,float16,0,0.5223519802093506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,fp8,0,0.16874667008717856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,64,128,1,fp8,fp8,0,0.2217173377672831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,fp8,0,0.5210293531417847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,64,0,1,fp8,fp8,0,0.48387734095255536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,float16,0,0.17276267210642496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,float16,0,0.5199520190556844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,fp8,0,0.172106663386027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,64,128,1,fp8,fp8,0,0.22847465674082437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,fp8,0,0.5237333377202352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,64,0,1,fp8,fp8,0,0.484991987546285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,float16,0,0.12596266468365988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,float16,0,0.3006666700045268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,fp8,0,0.12301333745320638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,fp8,0,0.12733866771062216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,64,128,1,fp8,fp8,0,0.1575040022532145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,fp8,0,0.29898132880528766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,64,0,1,fp8,fp8,0,0.28087466955184937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,float16,0,0.12268799543380737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,float16,0,0.29691733916600543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,64,128,1,fp8,fp8,0,0.14806933204332987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,fp8,0,0.2985440095265706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,64,0,1,fp8,fp8,0,0.27777065833409625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,float16,0,0.1236799955368042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,float16,0,0.2972053289413452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,fp8,0,0.12334400415420532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,64,128,1,fp8,fp8,0,0.15246933698654175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,fp8,0,0.2972533305486043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,64,0,1,fp8,fp8,0,0.27824532985687256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,float16,0,0.12500799695650736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,float16,0,0.2977546652158101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,fp8,0,0.12531733512878418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,64,128,1,fp8,fp8,0,0.15475733081499735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,fp8,0,0.29868799448013306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,64,0,1,fp8,fp8,0,0.2779093384742737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,float16,0,0.8045173486073812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,fp8,0,0.7888800303141276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,float16,0,2.3642239570617676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,64,128,1,fp8,fp8,0,1.0436639785766602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,float16,0,0.8244266510009766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,fp8,0,2.352901299794515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,64,0,1,fp8,fp8,0,2.1762293179829917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,fp8,0,0.8177279631296793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,float16,0,2.3850720723470054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,64,128,1,fp8,fp8,0,1.0699146588643391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,fp8,0,2.353343963623047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,float16,0,0.8442719777425131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,64,0,1,fp8,fp8,0,2.1850345929463706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,float16,0,2.3860106468200684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,fp8,0,0.8359893163045248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,64,128,1,fp8,fp8,0,1.0826293627421062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,fp8,0,2.3891785939534507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,float16,0,0.45311466852823895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,64,0,1,fp8,fp8,0,2.2153919537862143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,float16,0,1.2487680117289226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,fp8,0,0.45186134179433185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,64,128,1,fp8,fp8,0,0.599397341410319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,fp8,0,1.242469310760498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,64,0,1,fp8,fp8,0,1.1650826930999756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,float16,0,0.4137333234151204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,fp8,0,0.4063733418782552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,float16,0,1.198357343673706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,64,128,1,fp8,fp8,0,0.5440586805343628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,float16,0,0.41847999890645343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,fp8,0,1.192069371541341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,64,0,1,fp8,fp8,0,1.1036319732666016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,fp8,0,0.4145653247833252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,float16,0,1.2074720064798992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,64,128,1,fp8,fp8,0,0.550384004910787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,fp8,0,1.199295997619629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,64,0,1,fp8,fp8,0,1.1224693457285564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,float16,0,0.4254879951477051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,float16,0,1.2102186679840088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,fp8,0,0.4204426606496175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,64,128,1,fp8,fp8,0,0.563040018081665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,fp8,0,1.2064053217569988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,float16,0,0.24174400170644125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,64,0,1,fp8,fp8,0,1.1319146951039631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,float16,0,0.6527466773986816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,fp8,0,0.2459519902865092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,64,128,1,fp8,fp8,0,0.3211733301480611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,fp8,0,0.6553440093994141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,64,0,1,fp8,fp8,0,0.580730676651001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,float16,0,0.22537599007288614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,fp8,0,0.22290132443110147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,float16,0,0.6337813138961792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,64,128,1,fp8,fp8,0,0.2983520030975342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,fp8,0,0.6324213345845541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,float16,0,0.2275893290837606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,64,0,1,fp8,fp8,0,0.5536106824874878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,float16,0,0.6391093333562216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,fp8,0,0.22509332497914633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,64,128,1,fp8,fp8,0,0.30086400111516315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,fp8,0,0.6324479977289835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,64,0,1,fp8,fp8,0,0.5609226624170939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,float16,0,0.23113600413004556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,float16,0,0.6404533386230469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,fp8,0,0.2290239930152893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,64,128,1,fp8,fp8,0,0.30523733297983807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,fp8,0,0.6377760171890259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,64,0,1,fp8,fp8,0,0.5655573209126791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,float16,0,0.14101333419481912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,float16,0,0.32941333452860516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,fp8,0,0.14361066619555155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,64,128,1,fp8,fp8,0,0.18886399269104004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,fp8,0,0.32948799928029376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,64,0,1,fp8,fp8,0,0.31119465827941895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,float16,0,0.1321386694908142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,float16,0,0.32046399513880414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,fp8,0,0.13167466719945273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,64,128,1,fp8,fp8,0,0.17247466246287027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,fp8,0,0.31868799527486164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,64,0,1,fp8,fp8,0,0.29629333813985187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,float16,0,0.13275733590126038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,float16,0,0.3211733301480611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,fp8,0,0.1327786644299825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,64,128,1,fp8,fp8,0,0.17679999272028604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,fp8,0,0.3189706603686015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,64,0,1,fp8,fp8,0,0.2998453378677368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,float16,0,0.1343786617120107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,64,0,1,fp8,fp8,0,0.30193066596984863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,float16,0,0.3211200038592021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,fp8,0,0.13487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,64,128,1,fp8,fp8,0,0.179584006468455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,fp8,0,0.3209173281987508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,float16,0,0.10073600212732951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,float16,0,0.19453867276509604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,fp8,0,0.10081600149472554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,64,128,1,fp8,fp8,0,0.12807466586430868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,fp8,0,0.1945120096206665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,64,0,1,fp8,fp8,0,0.1816320021947225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,float16,0,0.0974666674931844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,float16,0,0.19139200448989868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,fp8,0,0.09821866949399312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,64,128,1,fp8,fp8,0,0.11613866686820984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,fp8,0,0.19212265809377035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,64,0,1,fp8,fp8,0,0.17961066961288452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,float16,0,0.09815466403961182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,float16,0,0.192303995291392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,fp8,0,0.09758933385213216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,64,128,1,fp8,fp8,0,0.11657599608103435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,fp8,0,0.192303995291392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,64,0,1,fp8,fp8,0,0.17974400520324707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,float16,0,0.09867733716964722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,float16,0,0.19243200620015463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,fp8,0,0.09832533200581868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,64,128,1,fp8,fp8,0,0.11674132943153381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,fp8,0,0.19265600045522055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,64,0,1,fp8,fp8,0,0.18045334021250406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,float16,0,1.0679306983947754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,fp8,0,1.0508800347646077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,float16,0,2.309605280558268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,64,128,1,fp8,fp8,0,1.3737653096516926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,fp8,0,2.2895359992980957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,64,0,1,fp8,fp8,0,2.1471947034200034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,float16,0,1.087503989537557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,float16,0,2.333850701649984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,fp8,0,1.08188796043396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,64,128,1,fp8,fp8,0,1.4005279541015625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,float16,0,1.1080373128255208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,fp8,0,2.32314666112264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,64,0,1,fp8,fp8,0,2.172272046407064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,float16,0,2.3688000043233237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,fp8,0,1.0991413593292236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,64,128,1,fp8,fp8,0,1.4232533772786458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,fp8,0,2.3624480565389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,float16,0,0.601968010266622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,64,0,1,fp8,fp8,0,2.1969119707743325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,float16,0,1.2395573457082112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,fp8,0,0.595578670501709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,64,128,1,fp8,fp8,0,0.7796266873677572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,fp8,0,1.226906696955363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,64,0,1,fp8,fp8,0,1.1585013071695964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,float16,0,0.5367306470870972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,float16,0,1.171120007832845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,fp8,0,0.5273546775182089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,64,128,1,fp8,fp8,0,0.7036426862080892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,fp8,0,1.1656213601430256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,float16,0,0.5472266674041748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,64,0,1,fp8,fp8,0,1.0901013215382893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,fp8,0,0.5421919822692871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,float16,0,1.1789066791534424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,64,128,1,fp8,fp8,0,0.7194186846415201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,fp8,0,1.1800906658172607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,64,0,1,fp8,fp8,0,1.1067519982655842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,float16,0,0.5577919880549113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,fp8,0,0.5520106554031372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,64,0,1,fp8,fp8,0,1.1148053010304768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,float16,0,1.1885120073954265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,64,128,1,fp8,fp8,0,0.7383786837259928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,float16,0,0.3078346649805705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,fp8,0,1.1833386421203613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,float16,0,0.6368586619695028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,fp8,0,0.3081066608428955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,64,128,1,fp8,fp8,0,0.4060426553090413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,fp8,0,0.6335253318150839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,64,0,1,fp8,fp8,0,0.5759946505228678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,float16,0,0.2839253346125285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,float16,0,0.6134399970372518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,fp8,0,0.27878399689992267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,64,128,1,fp8,fp8,0,0.3721280097961426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,fp8,0,0.6073493162790934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,64,0,1,fp8,fp8,0,0.5457760095596313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,float16,0,0.28514132897059125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,float16,0,0.6108533143997192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,fp8,0,0.2829973300298055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,64,128,1,fp8,fp8,0,0.3762506643931071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,fp8,0,0.6095626751581827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,64,0,1,fp8,fp8,0,0.5499360164006551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,float16,0,0.2890933354695638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,float16,0,0.6191093524297079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,fp8,0,0.28753600517908734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,float16,0,0.32079466183980304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,64,128,1,fp8,fp8,0,0.3810986677805583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,fp8,0,0.6173760096232096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,64,0,1,fp8,fp8,0,0.5545973380406698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,float16,0,0.16913066307703653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,fp8,0,0.17164800564448038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,64,128,1,fp8,fp8,0,0.22589333852132162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,fp8,0,0.3235359986623128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,64,0,1,fp8,fp8,0,0.30615466833114624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,float16,0,0.15731199582417807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,float16,0,0.3086986740430196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,fp8,0,0.15428800384203592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,64,128,1,fp8,fp8,0,0.2079360087712606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,fp8,0,0.304965337117513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,64,0,1,fp8,fp8,0,0.2881706754366557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,float16,0,0.15949333707491556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,float16,0,0.309717337290446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,fp8,0,0.15637866655985513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,64,128,1,fp8,fp8,0,0.21061867475509644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,fp8,0,0.3073386748631795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,64,0,1,fp8,fp8,0,0.28941333293914795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,float16,0,0.16210132837295532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,float16,0,0.3115359942118327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,fp8,0,0.16115199526151022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,64,128,1,fp8,fp8,0,0.21439466873804727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,fp8,0,0.31170666217803955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,64,0,1,fp8,fp8,0,0.2926560044288635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,float16,0,0.10254933436711629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,float16,0,0.1761173407236735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,fp8,0,0.10429333647092183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,64,128,1,fp8,fp8,0,0.1386613349119822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,fp8,0,0.17692800362904867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,64,0,1,fp8,fp8,0,0.1695573329925537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,float16,0,0.09476799766222636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,fp8,0,0.09359467029571533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,float16,0,0.16971200704574585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,64,128,1,fp8,fp8,0,0.11875733733177185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,fp8,0,0.16863999764124551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,64,0,1,fp8,fp8,0,0.15937599539756775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,float16,0,0.09505599737167358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,float16,0,0.1698346734046936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,fp8,0,0.09460799892743428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,64,128,1,fp8,fp8,0,0.1218293309211731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,fp8,0,0.16937067111333212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,64,0,1,fp8,fp8,0,0.15921066204706827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,float16,0,0.09678933024406433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,float16,0,0.17126933733622232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,fp8,0,0.09627733627955119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,64,128,1,fp8,fp8,0,0.12948800126711527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,fp8,0,0.17033600807189941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,64,0,1,fp8,fp8,0,0.16316800316174826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,float16,0,0.07097066442171733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,float16,0,0.11052266756693523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,fp8,0,0.07147733370463054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,64,128,1,fp8,fp8,0,0.09141866366068523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,fp8,0,0.11095466216405232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,64,0,1,fp8,fp8,0,0.1050933301448822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,float16,0,0.07092266778151195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,float16,0,0.11009066303571065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,fp8,0,0.07110933462778728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,64,128,1,fp8,fp8,0,0.08776533603668213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,fp8,0,0.11050666371981303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,64,0,1,fp8,fp8,0,0.10475732882817586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,float16,0,0.07073066631952922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,float16,0,0.11038933197657268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,fp8,0,0.07098666826883952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,64,128,1,fp8,fp8,0,0.08638399839401245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,fp8,0,0.1104693313439687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,64,0,1,fp8,fp8,0,0.10431466499964397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,float16,0,0.07110400001207988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,float16,0,0.1111199955145518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,fp8,0,0.07105599840482076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,64,128,1,fp8,fp8,0,0.08861866593360901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,fp8,0,0.10991467038790385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,64,0,1,fp8,fp8,0,0.10424000024795532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,float16,0,0.7960373560587565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,float16,0,1.4454986254374187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,fp8,0,0.7811573346455892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,64,128,1,fp8,fp8,0,1.028538703918457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,fp8,0,1.4244799613952637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,64,0,1,fp8,fp8,0,1.3342347145080566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,float16,0,0.8051733175913492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,fp8,0,0.8001866340637207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,float16,0,1.4600106875101726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,64,128,1,fp8,fp8,0,1.055834690729777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,fp8,0,1.4468213717142742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,64,0,1,fp8,fp8,0,1.3579146067301433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,float16,0,0.8234026432037354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,float16,0,1.475162665049235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,fp8,0,0.8171520233154297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,64,128,1,fp8,fp8,0,1.0779999891916912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,fp8,0,1.4670186042785645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,float16,0,0.45044267177581787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,64,0,1,fp8,fp8,0,1.37773863474528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,fp8,0,0.44626665115356445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,float16,0,0.7806879679361979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,64,128,1,fp8,fp8,0,0.5872799952824911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,fp8,0,0.7734399636586508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,64,0,1,fp8,fp8,0,0.7440319856007894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,float16,0,0.4059946537017822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,float16,0,0.7343626817067465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,fp8,0,0.3997279802958171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,64,0,1,fp8,fp8,0,0.6793386936187744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,float16,0,0.40803198019663495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,64,128,1,fp8,fp8,0,0.528218666712443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,fp8,0,0.7323253154754639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,fp8,0,0.4058186610539754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,float16,0,0.7454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,64,128,1,fp8,fp8,0,0.5376160144805908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,fp8,0,0.7388160228729248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,64,0,1,fp8,fp8,0,0.6872479915618896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,float16,0,0.41570134957631427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,fp8,0,0.41555198033650714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,float16,0,0.7441706657409668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,64,128,1,fp8,fp8,0,0.5510720014572144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,fp8,0,0.7467093467712402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,64,0,1,fp8,fp8,0,0.700597365697225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,float16,0,0.2363146742184957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,float16,0,0.4056053161621094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,fp8,0,0.23855467637379965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,64,128,1,fp8,fp8,0,0.3114933371543884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,fp8,0,0.4089653491973877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,64,0,1,fp8,fp8,0,0.372378667195638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,float16,0,0.21649599075317383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,float16,0,0.39212266604105633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,fp8,0,0.21284266312917074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,64,128,1,fp8,fp8,0,0.28778133789698285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,fp8,0,0.38591468334198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,float16,0,0.2189120054244995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,64,0,1,fp8,fp8,0,0.3444746732711792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,float16,0,0.3917706807454427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,fp8,0,0.21834667523701987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,64,128,1,fp8,fp8,0,0.2917226751645406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,fp8,0,0.3887519836425781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,64,0,1,fp8,fp8,0,0.35044264793395996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,float16,0,0.22399999698003134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,float16,0,0.3952000141143799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,fp8,0,0.22188266118367514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,64,128,1,fp8,fp8,0,0.29665066798528034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,fp8,0,0.3927573362986247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,64,0,1,fp8,fp8,0,0.35419201850891113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,float16,0,0.13354667027791342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,float16,0,0.20849066972732544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,fp8,0,0.13569066921869913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,64,128,1,fp8,fp8,0,0.17947733402252197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,fp8,0,0.21088000138600668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,64,0,1,fp8,fp8,0,0.20044267177581787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,float16,0,0.12329600254694621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,float16,0,0.19881065686543783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,fp8,0,0.12154666582743327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,64,128,1,fp8,fp8,0,0.16427733500798544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,fp8,0,0.19614932934443155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,64,0,1,fp8,fp8,0,0.1851573387781779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,float16,0,0.12385066350301106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,float16,0,0.20298133293787637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,fp8,0,0.12319466471672058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,64,128,1,fp8,fp8,0,0.16677866379419962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,fp8,0,0.1978506644566854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,64,0,1,fp8,fp8,0,0.1885439952214559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,float16,0,0.126202662785848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,float16,0,0.2012853423754374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,fp8,0,0.12549866239229837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,64,128,1,fp8,fp8,0,0.16921067237854004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,fp8,0,0.2007466753323873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,64,0,1,fp8,fp8,0,0.19152534008026123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,float16,0,0.08142933249473572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,float16,0,0.11924800276756287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,fp8,0,0.08231999973456065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,64,128,1,fp8,fp8,0,0.11052800218264262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,fp8,0,0.11875200271606445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,64,0,1,fp8,fp8,0,0.11563199758529663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,float16,0,0.07621333499749501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,float16,0,0.1132319966952006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,fp8,0,0.07578133543332417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,64,128,1,fp8,fp8,0,0.09551466504732768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,fp8,0,0.11251200238863628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,64,0,1,fp8,fp8,0,0.10687999924023946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,float16,0,0.0758133331934611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,float16,0,0.11437867085138957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,fp8,0,0.07610133290290833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,64,128,1,fp8,fp8,0,0.09724266330401103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,fp8,0,0.11309867103894551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,64,0,1,fp8,fp8,0,0.10679999987284343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,float16,0,0.07790933549404144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,float16,0,0.11412266890207927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,fp8,0,0.07704533139864604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,64,128,1,fp8,fp8,0,0.09937600294748943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,fp8,0,0.1144480009873708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,64,0,1,fp8,fp8,0,0.10755733648935954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,float16,0,0.058117335041364036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,float16,0,0.07865066826343536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,fp8,0,0.05880533158779144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,64,128,1,fp8,fp8,0,0.06868266562620799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,fp8,0,0.07846400141716003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,64,0,1,fp8,fp8,0,0.07393600046634674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,float16,0,0.05816533168156942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,float16,0,0.07829333345095317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,fp8,0,0.05813866853713989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,64,128,1,fp8,fp8,0,0.06845866640408833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,fp8,0,0.07821333408355713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,64,0,1,fp8,fp8,0,0.07347199817498525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,float16,0,0.05795733133951823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,float16,0,0.07874666651089986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,fp8,0,0.058517331878344216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,64,128,1,fp8,fp8,0,0.06832533578077953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,fp8,0,0.07824533184369405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,64,0,1,fp8,fp8,0,0.07422400017579396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,float16,0,0.058320000767707825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,float16,0,0.07841066519419353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,fp8,0,0.05830933153629303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,64,128,1,fp8,fp8,0,0.06829333305358887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,fp8,0,0.07866133252779643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,64,0,1,fp8,fp8,0,0.07332799832026164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,float16,0,1.037706693013509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,fp8,0,1.0269973278045654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,float16,0,1.518405278523763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,64,128,1,fp8,fp8,0,1.3368053436279297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,fp8,0,1.4934986432393391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,64,0,1,fp8,fp8,0,1.3847626050313313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,float16,0,1.0531679789225261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,float16,0,1.5167039235432942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,fp8,0,1.032639980316162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,64,128,1,fp8,fp8,0,1.3589866956075032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,fp8,0,1.5118133227030437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,float16,0,1.0708106358846028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,64,0,1,fp8,fp8,0,1.4044639269510906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,fp8,0,1.058672030766805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,float16,0,1.5523999532063801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,64,128,1,fp8,fp8,0,1.3949440320332844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,float16,0,0.5849386850992838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,fp8,0,1.5393493970235188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,float16,0,0.8219306468963623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,64,0,1,fp8,fp8,0,1.4556800524393718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,fp8,0,0.582912007967631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,64,128,1,fp8,fp8,0,0.7672266960144043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,fp8,0,0.8214773337046305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,64,0,1,fp8,fp8,0,0.7934239705403646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,float16,0,0.5284586747487386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,fp8,0,0.5181493361790975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,float16,0,0.7716853618621826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,64,128,1,fp8,fp8,0,0.6856799920399984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,fp8,0,0.7575893402099609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,64,0,1,fp8,fp8,0,0.7100799878438314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,float16,0,0.5346399943033854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,fp8,0,0.5295733213424683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,float16,0,0.7789759635925293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,64,128,1,fp8,fp8,0,0.6948160330454508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,fp8,0,0.7714560031890869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,64,0,1,fp8,fp8,0,0.7186773618062338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,float16,0,0.5451680024464926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,float16,0,0.788096030553182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,fp8,0,0.540117343266805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,64,128,1,fp8,fp8,0,0.7097226778666178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,fp8,0,0.7841546535491943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,float16,0,0.303765336672465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,64,0,1,fp8,fp8,0,0.7315893173217773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,float16,0,0.426581343015035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,fp8,0,0.30457067489624023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,64,128,1,fp8,fp8,0,0.40002667903900146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,fp8,0,0.4265226523081462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,64,0,1,fp8,fp8,0,0.3979146480560303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,float16,0,0.27721067269643146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,float16,0,0.40112535158793133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,fp8,0,0.27172799905141193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,64,128,1,fp8,fp8,0,0.3638720115025838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,fp8,0,0.3964213530222575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,64,0,1,fp8,fp8,0,0.3617599805196126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,float16,0,0.2795413335164388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,float16,0,0.4028053283691406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,fp8,0,0.277402659257253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,64,128,1,fp8,fp8,0,0.36855467160542804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,fp8,0,0.40205331643422443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,64,0,1,fp8,fp8,0,0.3659840027491252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,float16,0,0.2832640012105306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,float16,0,0.4095360040664673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,fp8,0,0.2819146712621053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,64,128,1,fp8,fp8,0,0.3755253156026204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,fp8,0,0.40889068444569904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,64,0,1,fp8,fp8,0,0.37396268049875897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,float16,0,0.16524266203244528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,float16,0,0.22107199827829996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,fp8,0,0.16731733083724976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,64,128,1,fp8,fp8,0,0.22057600816090903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,fp8,0,0.22245866060256958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,64,0,1,fp8,fp8,0,0.21092265844345093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,float16,0,0.15199466546376547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,float16,0,0.205402672290802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,fp8,0,0.15005333224932352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,64,128,1,fp8,fp8,0,0.2029013236363729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,fp8,0,0.20286933581034342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,64,0,1,fp8,fp8,0,0.19324799378712973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,float16,0,0.15262400110562643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,float16,0,0.20558400948842367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,fp8,0,0.15155733625094095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,64,128,1,fp8,fp8,0,0.20557334025700888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,fp8,0,0.2042133410771688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,64,0,1,fp8,fp8,0,0.19553599754969278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,float16,0,0.15575466553370157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,float16,0,0.2084533373514811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,fp8,0,0.15501333276430765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,64,128,1,fp8,fp8,0,0.20814400911331177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,fp8,0,0.20812267065048218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,64,0,1,fp8,fp8,0,0.2015519936879476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,float16,0,0.09585600097974141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,float16,0,0.12091733018557231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,fp8,0,0.0979200005531311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,64,128,1,fp8,fp8,0,0.13040000200271606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,fp8,0,0.12205333511034648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,64,0,1,fp8,fp8,0,0.11864533027013142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,float16,0,0.08763733506202698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,float16,0,0.11333866914113362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,fp8,0,0.0863200028737386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,64,128,1,fp8,fp8,0,0.11108266313870747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,fp8,0,0.11049600442250569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,64,0,1,fp8,fp8,0,0.10733333230018616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,float16,0,0.08772266904513042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,float16,0,0.1144533356030782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,fp8,0,0.08743466933568318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,64,128,1,fp8,fp8,0,0.11527466773986816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,fp8,0,0.11147733529408772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,64,0,1,fp8,fp8,0,0.10812800129254659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,float16,0,0.09018133083979289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,float16,0,0.11506666739781697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,fp8,0,0.09071466326713562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,64,128,1,fp8,fp8,0,0.12118400136629741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,fp8,0,0.11361599961916606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,64,0,1,fp8,fp8,0,0.11158933242162068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,float16,0,0.05894933144251505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,float16,0,0.07099733253320058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,fp8,0,0.05602666735649109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,64,128,1,fp8,fp8,0,0.07745600243409474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,fp8,0,0.0717439999183019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,64,0,1,fp8,fp8,0,0.07003200054168701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,float16,0,0.05553600192070007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,float16,0,0.06940266489982605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,64,128,1,fp8,fp8,0,0.07283199826876323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,fp8,0,0.0690880020459493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,64,0,1,fp8,fp8,0,0.0662720004717509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,float16,0,0.057130664587020874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,float16,0,0.06960533559322357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,fp8,0,0.056458666920661926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,64,128,1,fp8,fp8,0,0.07407466570536296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,fp8,0,0.06930133203665416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,64,0,1,fp8,fp8,0,0.06586666901906331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,float16,0,0.057189335425694786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,float16,0,0.06992533306280772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,fp8,0,0.057114665706952415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,64,128,1,fp8,fp8,0,0.07540266712506612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,fp8,0,0.07035733262697856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,64,0,1,fp8,fp8,0,0.06660800178845723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,float16,0,0.04554666578769684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,float16,0,0.05031999945640564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,fp8,0,0.04574933151404063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,64,128,1,fp8,fp8,0,0.05157333115736643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,fp8,0,0.05078400174776713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,64,0,1,fp8,fp8,0,0.04909333089987437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,float16,0,0.04515199859937032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,float16,0,0.050010666251182556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,fp8,0,0.045221333702405296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,64,128,1,fp8,fp8,0,0.051216001311937966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,fp8,0,0.05050133168697357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,64,0,1,fp8,fp8,0,0.04897599915663401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,float16,0,0.04534933467706045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,float16,0,0.04946133494377136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,fp8,0,0.046351999044418335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,fp8,0,0.04562666515509287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,64,128,1,fp8,fp8,0,0.05156266689300537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,fp8,0,0.05042133231957754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,64,0,1,fp8,fp8,0,0.049029335379600525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,float16,0,0.04526400069395701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,float16,0,0.049829334020614624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,64,128,1,fp8,fp8,0,0.05120533208052317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,fp8,0,0.05041066805521647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,64,0,1,fp8,fp8,0,0.04907733201980591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,float16,0,0.8032906850179037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,fp8,0,0.7951412995656332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,float16,0,1.0257226626078289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,64,128,1,fp8,fp8,0,1.0098346869150798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,fp8,0,1.010858694712321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,64,0,1,fp8,fp8,0,0.9265493551890055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,float16,0,0.8102453549702963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,float16,0,1.0255413055419922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,fp8,0,0.7948906421661377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,64,128,1,fp8,fp8,0,1.0286986827850342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,fp8,0,1.016218662261963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,64,0,1,fp8,fp8,0,0.9331519603729248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,float16,0,0.8215359846750895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,float16,0,1.0465493202209473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,fp8,0,0.8141012986501058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,64,128,1,fp8,fp8,0,1.0494879881540935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,fp8,0,1.0252479712168376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,64,0,1,fp8,fp8,0,0.9608746369679769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,float16,0,0.4519999821980794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,float16,0,0.40068264802296955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,float16,0,0.5603253444035848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,fp8,0,0.44512001673380536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,64,128,1,fp8,fp8,0,0.5797599951426188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,fp8,0,0.5571786562601725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,64,0,1,fp8,fp8,0,0.5262933174769083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,float16,0,0.5152159929275513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,fp8,0,0.395087997118632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,64,128,1,fp8,fp8,0,0.5201760133107504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,fp8,0,0.5062933365503947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,64,0,1,fp8,fp8,0,0.4720319906870524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,float16,0,0.41117334365844727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,float16,0,0.5230720043182373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,fp8,0,0.40460801124572754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,64,128,1,fp8,fp8,0,0.5333173274993896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,fp8,0,0.5174239873886108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,64,0,1,fp8,fp8,0,0.47971200942993164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,float16,0,0.4183093309402466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,float16,0,0.5272639989852905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,fp8,0,0.4162293275197347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,64,128,1,fp8,fp8,0,0.5440959930419922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,fp8,0,0.5263573328653971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,64,0,1,fp8,fp8,0,0.49406933784484863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,float16,0,0.2352586587270101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,float16,0,0.28987733523050946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,fp8,0,0.23457066218058267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,64,128,1,fp8,fp8,0,0.307861328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,fp8,0,0.2929919958114624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,64,0,1,fp8,fp8,0,0.2711946765581767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,float16,0,0.21262399355570474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,float16,0,0.27135999997456867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,fp8,0,0.2090346614519755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,64,128,1,fp8,fp8,0,0.2808053294817607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,fp8,0,0.26785600185394287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,64,0,1,fp8,fp8,0,0.2430986762046814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,float16,0,0.2165706753730774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,float16,0,0.2743413249651591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,fp8,0,0.21264533201853433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,64,128,1,fp8,fp8,0,0.28486400842666626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,fp8,0,0.2707039912541707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,64,0,1,fp8,fp8,0,0.24707732597986856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,float16,0,0.22027732928593954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,float16,0,0.2767360011736552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,fp8,0,0.22030933698018393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,64,128,1,fp8,fp8,0,0.29043734073638916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,fp8,0,0.2765439947446187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,64,0,1,fp8,fp8,0,0.25283199548721313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,float16,0,0.1302079955736796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,float16,0,0.15377066532770792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,fp8,0,0.13105066617329916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,64,128,1,fp8,fp8,0,0.17317867279052734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,fp8,0,0.15465600291887918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,64,0,1,fp8,fp8,0,0.1474560002485911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,float16,0,0.11839999755223592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,float16,0,0.13940266768137613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,fp8,0,0.11680000027020772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,64,128,1,fp8,fp8,0,0.1574666698773702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,fp8,0,0.13809067010879517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,64,0,1,fp8,fp8,0,0.1325546701749166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,float16,0,0.11997866630554199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,float16,0,0.14017066359519958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,fp8,0,0.1183093289534251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,64,128,1,fp8,fp8,0,0.16059199968973795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,fp8,0,0.1393226683139801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,64,0,1,fp8,fp8,0,0.13410666584968567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,float16,0,0.12150933345158894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,float16,0,0.14220266540845236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,fp8,0,0.12231466174125671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,64,128,1,fp8,fp8,0,0.16384533047676086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,fp8,0,0.14268267154693604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,fp8,0,0.08880000313123067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,64,0,1,fp8,fp8,0,0.13859200477600098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,float16,0,0.0765226682027181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,float16,0,0.08665066957473755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,fp8,0,0.07764799892902374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,64,128,1,fp8,fp8,0,0.1058079997698466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,64,0,1,fp8,fp8,0,0.0862559974193573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,float16,0,0.06971199810504913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,float16,0,0.08106133341789246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,fp8,0,0.06833066542943318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,64,128,1,fp8,fp8,0,0.08967999617258708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,fp8,0,0.08018666505813599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,64,0,1,fp8,fp8,0,0.07670933504899342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,float16,0,0.06880533198515575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,float16,0,0.08130666613578796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,fp8,0,0.06866133213043213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,64,128,1,fp8,fp8,0,0.09099200367927551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,fp8,0,0.08081066608428955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,64,0,1,fp8,fp8,0,0.07669333120187123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,float16,0,0.07111999889214833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,float16,0,0.08195200065771739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,fp8,0,0.06987200180689494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,64,128,1,fp8,fp8,0,0.09268266956011455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,fp8,0,0.08134933312733968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,64,0,1,fp8,fp8,0,0.07878933350245158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,float16,0,0.04854399959246317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,float16,0,0.05351999898751577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,fp8,0,0.04889066517353058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,64,128,1,fp8,fp8,0,0.060405333836873375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,fp8,0,0.05454400181770325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,64,0,1,fp8,fp8,0,0.05221866567929586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,float16,0,0.04708800216515859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,float16,0,0.05279466509819031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,fp8,0,0.047338664531707764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,64,128,1,fp8,fp8,0,0.05834666887919108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,fp8,0,0.05328000088532766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,64,0,1,fp8,fp8,0,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,float16,0,0.04716266691684723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,float16,0,0.05346133311589559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,fp8,0,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,64,128,1,fp8,fp8,0,0.05839466551939646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,64,128,1,fp8,fp8,0,0.05899199843406677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,fp8,0,0.05282133320967356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,64,0,1,fp8,fp8,0,0.05026133358478546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,float16,0,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,float16,0,0.05302399893601736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,fp8,0,0.04766400158405304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,fp8,0,0.05356800059477488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,64,0,1,fp8,fp8,0,0.05073066552480062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,float16,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,float16,0,0.04223999877770742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,fp8,0,0.04274666806062063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,64,128,1,fp8,fp8,0,0.04725333551565806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,fp8,0,0.042223999897638954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,64,0,1,fp8,fp8,0,0.04135466615358988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,float16,0,0.04126933217048645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,float16,0,0.04195733368396759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,fp8,0,0.04127466678619385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,64,128,1,fp8,fp8,0,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,fp8,0,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,64,0,1,fp8,fp8,0,0.04140799989302953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,float16,0,0.04138666639725367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,float16,0,0.04214933514595032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,fp8,0,0.041296000281969704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,64,128,1,fp8,fp8,0,0.047354668378829956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,fp8,0,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,64,0,1,fp8,fp8,0,0.040965333580970764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,float16,0,0.041093334555625916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,float16,0,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,64,128,1,fp8,fp8,0,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,fp8,0,0.042064001162846885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,64,0,1,fp8,fp8,0,0.041221333046754204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,float16,0,0.932528018951416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,float16,0,1.057909329732259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,fp8,0,0.9309386412302653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,64,128,1,fp8,fp8,0,1.242085297902425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,fp8,0,1.0616587003072102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,64,0,1,fp8,fp8,0,0.9663253625233968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,float16,0,0.9403786659240723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,float16,0,1.0726293722788494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,fp8,0,0.9377973079681396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,64,128,1,fp8,fp8,0,1.2451999982198079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,fp8,0,1.0624159971872966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,64,0,1,fp8,fp8,0,0.9850827058156332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,float16,0,0.9743946393330892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,float16,0,1.1001386642456055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,fp8,0,0.9683466752370199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,float16,0,0.5240533351898193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,fp8,0,1.0922826925913494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,64,128,1,fp8,fp8,0,1.2950613498687744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,64,0,1,fp8,fp8,0,1.014906644821167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,float16,0,0.5934400161107382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,fp8,0,0.5206453402837118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,64,128,1,fp8,fp8,0,0.6756532986958822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,fp8,0,0.581770658493042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,64,0,1,fp8,fp8,0,0.529423991839091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,float16,0,0.48121599356333417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,float16,0,0.5461440086364746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,fp8,0,0.47974932193756104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,64,128,1,fp8,fp8,0,0.6391146580378214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,fp8,0,0.5461493333180746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,64,0,1,fp8,fp8,0,0.49692801634470624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,float16,0,0.485429326693217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,float16,0,0.5481546719868978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,fp8,0,0.4833759864171346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,64,128,1,fp8,fp8,0,0.6442079941431681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,fp8,0,0.5464479923248291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,64,0,1,fp8,fp8,0,0.5056853294372559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,float16,0,0.4978933334350586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,float16,0,0.5627466837565104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,fp8,0,0.49531201521555585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,64,128,1,fp8,fp8,0,0.6569493214289347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,fp8,0,0.5576106707255045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,64,0,1,fp8,fp8,0,0.5221866766611735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,float16,0,0.2764479915301005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,float16,0,0.31029866139094037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,fp8,0,0.2735146681467692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,64,128,1,fp8,fp8,0,0.3525386651357015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,fp8,0,0.30552534262339276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,64,0,1,fp8,fp8,0,0.26901866992314655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,float16,0,0.2540480097134908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,float16,0,0.2874559958775838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,fp8,0,0.25570134321848553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,64,128,1,fp8,fp8,0,0.33896533648173016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,fp8,0,0.2888000011444092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,64,0,1,fp8,fp8,0,0.25785066684087116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,float16,0,0.25918932755788165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,float16,0,0.2900480031967163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,fp8,0,0.25731199979782104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,64,128,1,fp8,fp8,0,0.3417760133743286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,fp8,0,0.28913599252700806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,64,0,1,fp8,fp8,0,0.26146133740743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,float16,0,0.2635200023651123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,float16,0,0.2965173323949178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,fp8,0,0.2634773254394531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,64,128,1,fp8,fp8,0,0.34142935276031494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,fp8,0,0.2950613300005595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,64,0,1,fp8,fp8,0,0.263264000415802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,float16,0,0.15331199765205383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,float16,0,0.16596266627311707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,fp8,0,0.15100799997647604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,64,128,1,fp8,fp8,0,0.19452265898386636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,fp8,0,0.1649333337942759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,64,0,1,fp8,fp8,0,0.14664533734321594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,float16,0,0.1388746698697408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,float16,0,0.15127999583880106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,fp8,0,0.13914133111635843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,64,128,1,fp8,fp8,0,0.18698134024937949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,fp8,0,0.15196800231933594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,64,0,1,fp8,fp8,0,0.1381760040918986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,float16,0,0.14226133624712625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,float16,0,0.15331199765205383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,fp8,0,0.14148799578348795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,64,128,1,fp8,fp8,0,0.18769067525863647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,fp8,0,0.15366933743158975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,64,0,1,fp8,fp8,0,0.14097600181897482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,float16,0,0.14621866742769876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,float16,0,0.15557866295178732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,fp8,0,0.14531733592351279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,64,128,1,fp8,fp8,0,0.19058666626612344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,fp8,0,0.15495466192563376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,64,0,1,fp8,fp8,0,0.14569600423177084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,float16,0,0.08990933497746785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,float16,0,0.09308800101280212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,fp8,0,0.0888853371143341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,64,128,1,fp8,fp8,0,0.11476266384124756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,fp8,0,0.09113599856694539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,64,0,1,fp8,fp8,0,0.08462400237719218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,float16,0,0.0783679982026418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,float16,0,0.08553600311279297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,fp8,0,0.07897066573301952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,64,128,1,fp8,fp8,0,0.1032426655292511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,fp8,0,0.08506666620572408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,64,0,1,fp8,fp8,0,0.07725866635640462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,float16,0,0.07995200157165527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,float16,0,0.08470400174458821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,fp8,0,0.07933866480986278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,64,128,1,fp8,fp8,0,0.10776533683141072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,fp8,0,0.08561066786448161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,64,0,1,fp8,fp8,0,0.0786293347676595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,float16,0,0.08273600041866302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,float16,0,0.0881760021050771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,fp8,0,0.08261866867542267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,64,128,1,fp8,fp8,0,0.10980799794197083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,fp8,0,0.08709866801897685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,64,0,1,fp8,fp8,0,0.080485333998998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,float16,0,0.05303466816743215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,float16,0,0.053685332338015236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,fp8,0,0.05000533163547516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,fp8,0,0.05266666909058889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,64,128,1,fp8,fp8,0,0.07204799850781758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,fp8,0,0.05454933146635691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,64,0,1,fp8,fp8,0,0.05017599960168203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,float16,0,0.04957333207130432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,float16,0,0.05119466781616211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,64,128,1,fp8,fp8,0,0.06423466900984447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,fp8,0,0.05147733290990194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,64,0,1,fp8,fp8,0,0.047354668378829956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,float16,0,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,float16,0,0.05179733534653982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,fp8,0,0.049471999208132424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,64,128,1,fp8,fp8,0,0.06659199794133504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,fp8,0,0.05182399849096934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,64,0,1,fp8,fp8,0,0.048309331138928734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,float16,0,0.050986667474110924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,float16,0,0.052341332038243614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,fp8,0,0.050613333781560264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,64,128,1,fp8,fp8,0,0.06502399841944377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,fp8,0,0.052383999029795326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,64,0,1,fp8,fp8,0,0.049135997891426086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,float16,0,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,float16,0,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,fp8,0,0.03763733307520548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,64,128,1,fp8,fp8,0,0.043381333351135254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,64,0,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,float16,0,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,float16,0,0.03572266548871994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,fp8,0,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,64,128,1,fp8,fp8,0,0.04271999994913737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,fp8,0,0.035818666219711304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,64,0,1,fp8,fp8,0,0.0341333324710528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,float16,0,0.03771200031042099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,float16,0,0.03640533238649368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,fp8,0,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,64,128,1,fp8,fp8,0,0.04270400106906891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,fp8,0,0.03636800001064936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,64,0,1,fp8,fp8,0,0.03480000048875809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,float16,0,0.0365226666132609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,float16,0,0.037061333656311035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,fp8,0,0.03658666710058848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,64,128,1,fp8,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,fp8,0,0.03726933399836222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,64,0,1,fp8,fp8,0,0.03484266748030981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,float16,0,0.026821332673231762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,float16,0,0.029898665845394135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,fp8,0,0.02693866689999898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,64,128,1,fp8,fp8,0,0.03366400053103765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,fp8,0,0.02972800036271413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,64,0,1,fp8,fp8,0,0.028725333511829376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,float16,0,0.02589866767326991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,float16,0,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,fp8,0,0.026330667237440746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,64,128,1,fp8,fp8,0,0.033733333150545754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,fp8,0,0.02882666637500127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,64,0,1,fp8,fp8,0,0.027727998793125153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,float16,0,0.02629866699377696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,fp8,0,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,64,128,1,fp8,fp8,0,0.03286399940649668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,64,0,1,fp8,fp8,0,0.02811199923356374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,float16,0,0.02665599932273229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,float16,0,0.028789333999156952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,64,128,1,fp8,fp8,0,0.03310399999221166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,fp8,0,0.029077333708604176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,64,0,1,fp8,fp8,0,0.028058665494124096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,1,64,128,1,float16,float16,0,0.8928906917572021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,1,64,0,1,float16,float16,0,0.8744106292724609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,1,64,128,1,float16,fp8,0,0.8849066893259684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,1,64,128,1,fp8,fp8,0,1.1595306396484375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,1,64,0,1,float16,fp8,0,0.8683573404947916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,1,64,0,1,fp8,fp8,0,0.7842400074005127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,2,64,128,1,float16,float16,0,0.8991733392079672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,2,64,0,1,float16,float16,0,0.8860053221384684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,2,64,128,1,float16,fp8,0,0.8982240358988444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,2,64,128,1,fp8,fp8,0,1.1882826487223308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,2,64,0,1,float16,fp8,0,0.8755626678466797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,2,64,0,1,fp8,fp8,0,0.8133066495259603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,4,64,128,1,float16,float16,0,0.9242133299509684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,4,64,0,1,float16,float16,0,0.9123466809590658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,4,64,128,1,float16,fp8,0,0.926245371500651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,4,64,128,1,fp8,fp8,0,1.2253226439158122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,4,64,0,1,float16,fp8,0,0.9096373716990153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,4,64,0,1,fp8,fp8,0,0.8394560019175211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,12,64,128,1,float16,float16,0,0.5025493303934733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,12,64,0,1,float16,float16,0,0.4911946853001912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,12,64,128,1,float16,fp8,0,0.49776001771291095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,12,64,128,1,fp8,fp8,0,0.6488106648127238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,12,64,0,1,float16,fp8,0,0.486191987991333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,12,64,0,1,fp8,fp8,0,0.4437333345413208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,1,64,128,1,float16,float16,0,0.4610186815261841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,1,64,0,1,float16,float16,0,0.45070401827494305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,1,64,128,1,float16,fp8,0,0.4573813279469808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,1,64,128,1,fp8,fp8,0,0.5993760029474894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,1,64,0,1,float16,fp8,0,0.4462133248647054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,1,64,0,1,fp8,fp8,0,0.40299733479817706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,2,64,128,1,float16,float16,0,0.4652266502380371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,2,64,0,1,float16,float16,0,0.4529600143432617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,2,64,128,1,float16,fp8,0,0.46404266357421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,2,64,128,1,fp8,fp8,0,0.6062186559041342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,2,64,0,1,float16,fp8,0,0.4506773153940837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,2,64,0,1,fp8,fp8,0,0.4079626798629761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,4,64,128,1,float16,float16,0,0.4732160170873006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,4,64,0,1,float16,float16,0,0.4635733366012573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,4,64,128,1,float16,fp8,0,0.47523732980092365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,4,64,128,1,fp8,fp8,0,0.6225493351618449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,4,64,0,1,float16,fp8,0,0.464469313621521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,4,64,0,1,fp8,fp8,0,0.4262400070826213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,12,64,128,1,float16,float16,0,0.26481600602467853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,12,64,0,1,float16,float16,0,0.2574346661567688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,12,64,128,1,float16,fp8,0,0.26123199860254925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,12,64,128,1,fp8,fp8,0,0.3357866605122884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,12,64,0,1,float16,fp8,0,0.25618666410446167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,12,64,0,1,fp8,fp8,0,0.22604266802469888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,1,64,128,1,float16,float16,0,0.24453334013621011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,1,64,0,1,float16,float16,0,0.23767467339833578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,1,64,128,1,float16,fp8,0,0.24304000536600748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,1,64,128,1,fp8,fp8,0,0.3178773323694865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,1,64,0,1,float16,fp8,0,0.23581333955128989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,1,64,0,1,fp8,fp8,0,0.21191465854644775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,2,64,128,1,float16,float16,0,0.24631466468175253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,4,64,128,1,float16,float16,0,0.2505439917246501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,2,64,0,1,float16,float16,0,0.24021865924199423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,2,64,128,1,float16,fp8,0,0.24785067637761435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,2,64,128,1,fp8,fp8,0,0.32470933596293133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,2,64,0,1,float16,fp8,0,0.2376319964726766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,2,64,0,1,fp8,fp8,0,0.21496532360712686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,4,64,0,1,float16,float16,0,0.24441067377726236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,4,64,128,1,float16,fp8,0,0.2520266572634379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,4,64,128,1,fp8,fp8,0,0.32472000519434613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,4,64,0,1,float16,fp8,0,0.24528000752131143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,4,64,0,1,fp8,fp8,0,0.21744000911712646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,12,64,128,1,float16,float16,0,0.14627733826637268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,12,64,0,1,float16,float16,0,0.14473066727320352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,12,64,128,1,float16,fp8,0,0.14548800388971964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,12,64,128,1,fp8,fp8,0,0.18532800674438477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,12,64,0,1,float16,fp8,0,0.14284800489743552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,12,64,0,1,fp8,fp8,0,0.12459199627240498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,1,64,128,1,float16,float16,0,0.13339199622472128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,1,64,0,1,float16,float16,0,0.1283146639664968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,1,64,128,1,float16,fp8,0,0.13352533181508383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,1,64,128,1,fp8,fp8,0,0.17505067586898804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,1,64,0,1,float16,fp8,0,0.126991997162501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,1,64,0,1,fp8,fp8,0,0.11615467071533203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,2,64,128,1,float16,float16,0,0.1362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,2,64,0,1,float16,float16,0,0.12942399581273398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,2,64,128,1,float16,fp8,0,0.1365120013554891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,2,64,128,1,fp8,fp8,0,0.17867199579874674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,2,64,0,1,float16,fp8,0,0.12941333651542664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,2,64,0,1,fp8,fp8,0,0.11755733688672383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,4,64,128,1,float16,float16,0,0.13934399684270224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,4,64,0,1,float16,float16,0,0.13170133034388223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,4,64,128,1,float16,fp8,0,0.13978667060534158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,4,64,128,1,fp8,fp8,0,0.18026665846506754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,4,64,0,1,float16,fp8,0,0.13330666224161783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,4,64,0,1,fp8,fp8,0,0.11954133709271748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,12,64,128,1,float16,float16,0,0.08714666962623596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,12,64,0,1,float16,float16,0,0.07962133487065633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,12,64,128,1,float16,fp8,0,0.08611733714739482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,12,64,128,1,fp8,fp8,0,0.10866666833559673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,12,64,0,1,float16,fp8,0,0.07937600215276082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,12,64,0,1,fp8,fp8,0,0.07338666419188182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,1,64,128,1,float16,float16,0,0.07725333174069722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,1,64,0,1,float16,float16,0,0.07153599957625072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,1,64,128,1,float16,fp8,0,0.07641066610813141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,1,64,128,1,fp8,fp8,0,0.09776533643404643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,1,64,0,1,float16,fp8,0,0.0722453345855077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,1,64,0,1,fp8,fp8,0,0.06528000036875407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,2,64,128,1,float16,float16,0,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,2,64,0,1,float16,float16,0,0.07327466706434886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,2,64,128,1,float16,fp8,0,0.07784533500671387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,2,64,128,1,fp8,fp8,0,0.10161067048708598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,2,64,0,1,float16,fp8,0,0.07303466896216075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,2,64,0,1,fp8,fp8,0,0.06671999891599019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,4,64,128,1,float16,float16,0,0.08099733293056488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,4,64,0,1,float16,float16,0,0.07469866673151652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,4,64,128,1,float16,fp8,0,0.07934399942557017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,4,64,128,1,fp8,fp8,0,0.10419733325640361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,4,64,0,1,float16,fp8,0,0.07509866853555043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,4,64,0,1,fp8,fp8,0,0.06923733154932658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,12,64,128,1,float16,float16,0,0.051594664653142296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,12,64,0,1,float16,float16,0,0.047824000318845115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,12,64,128,1,float16,fp8,0,0.052442664901415505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,12,64,128,1,fp8,fp8,0,0.06932266553243001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,12,64,0,1,float16,fp8,0,0.046944002310434975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,12,64,0,1,fp8,fp8,0,0.04554133117198944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,1,64,128,1,float16,float16,0,0.04900800188382467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,1,64,0,1,float16,float16,0,0.04464533428351084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,1,64,128,1,float16,fp8,0,0.04930666585763296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,1,64,128,1,fp8,fp8,0,0.0627040018637975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,1,64,0,1,float16,fp8,0,0.044719999035199486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,1,64,0,1,fp8,fp8,0,0.04438399771849314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,2,64,128,1,float16,float16,0,0.049584001302719116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,2,64,0,1,float16,float16,0,0.045194665590922035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,2,64,128,1,float16,fp8,0,0.04976533353328705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,2,64,128,1,fp8,fp8,0,0.06338133414586385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,2,64,0,1,float16,fp8,0,0.045594667394955955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,2,64,0,1,fp8,fp8,0,0.04237333436806997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,4,64,128,1,float16,float16,0,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,4,64,0,1,float16,float16,0,0.046053335070610046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,4,64,128,1,float16,fp8,0,0.05082666873931885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,4,64,128,1,fp8,fp8,0,0.06372266511122386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,4,64,0,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,4,64,0,1,fp8,fp8,0,0.04249600072701772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,12,64,128,1,float16,float16,0,0.03809066613515218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,12,64,0,1,float16,float16,0,0.03508266558249792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,12,64,128,1,float16,fp8,0,0.03803733239571253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,12,64,128,1,fp8,fp8,0,0.043285335103670754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,1,64,0,1,float16,fp8,0,0.033941333492596946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,12,64,0,1,float16,fp8,0,0.03523733218510946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,12,64,0,1,fp8,fp8,0,0.034128000338872276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,1,64,128,1,float16,float16,0,0.036933332681655884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,1,64,0,1,float16,float16,0,0.033146666983763375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,1,64,128,1,float16,fp8,0,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,1,64,128,1,fp8,fp8,0,0.04250133534272512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,1,64,0,1,fp8,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,2,64,128,1,float16,float16,0,0.03711466739575068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,2,64,0,1,float16,float16,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,2,64,128,1,float16,fp8,0,0.03618133316437403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,2,64,128,1,fp8,fp8,0,0.04240000247955322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,2,64,0,1,float16,fp8,0,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,2,64,0,1,fp8,fp8,0,0.03181866556406021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,4,64,128,1,float16,float16,0,0.03851199895143509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,4,64,0,1,float16,float16,0,0.034527999659379326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,4,64,128,1,float16,fp8,0,0.03745600084463755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,4,64,128,1,fp8,fp8,0,0.042805333932240806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,4,64,0,1,float16,fp8,0,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,4,64,0,1,fp8,fp8,0,0.032405334214369454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,12,64,128,1,float16,float16,0,0.028181334336598713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,12,64,0,1,float16,float16,0,0.02496533344189326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,12,64,128,1,float16,fp8,0,0.02700799951950709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,12,64,128,1,fp8,fp8,0,0.03427733232577642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,12,64,0,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,12,64,0,1,fp8,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,1,64,128,1,float16,float16,0,0.026362667481104534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,1,64,0,1,float16,float16,0,0.02409599969784419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,1,64,128,1,float16,fp8,0,0.026021334032217663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,1,64,128,1,fp8,fp8,0,0.03298133363326391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,1,64,0,1,float16,fp8,0,0.024160000185171764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,1,64,0,1,fp8,fp8,0,0.02362666775782903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,2,64,128,1,float16,float16,0,0.026250667870044708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,2,64,0,1,float16,float16,0,0.023951999843120575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,2,64,128,1,float16,fp8,0,0.028325334191322327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,2,64,128,1,fp8,fp8,0,0.033013333876927696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,2,64,0,1,float16,fp8,0,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,2,64,0,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,4,64,128,1,float16,float16,0,0.026234666506449383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,4,64,0,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,12,64,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,4,64,128,1,float16,fp8,0,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,4,64,128,1,fp8,fp8,0,0.03278400003910065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,4,64,0,1,float16,fp8,0,0.02470933397610982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,4,64,0,1,fp8,fp8,0,0.02380266785621643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,12,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,12,64,128,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,12,64,128,1,fp8,fp8,0,0.03253866732120514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,12,64,0,1,float16,fp8,0,0.02384000023206075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,12,64,0,1,fp8,fp8,0,0.022805333137512207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,1,64,128,1,float16,float16,0,0.02491733431816101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,1,64,0,1,float16,float16,0,0.023669332265853882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,1,64,128,1,float16,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,1,64,128,1,fp8,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,1,64,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,1,64,0,1,fp8,fp8,0,0.02254933367172877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,2,64,128,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,4,64,128,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,2,64,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,2,64,128,1,float16,fp8,0,0.025797332326571148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,2,64,128,1,fp8,fp8,0,0.03183466692765554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,2,64,0,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,2,64,0,1,fp8,fp8,0,0.02271999915440877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,4,64,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,4,64,128,1,float16,fp8,0,0.02593066543340683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,4,64,128,1,fp8,fp8,0,0.031888000667095184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,4,64,0,1,float16,fp8,0,0.024090667565663654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,4,64,0,1,fp8,fp8,0,0.022511998812357586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,1,64,128,1,float16,float16,0,0.38714667161305744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,1,64,0,1,float16,float16,0,0.37600000699361164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,1,64,128,1,float16,fp8,0,0.3870933453241984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,1,64,0,1,float16,fp8,0,0.3731520175933838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,1,64,128,1,fp8,fp8,0,0.5114293495814005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,1,64,0,1,fp8,fp8,0,0.34408001104990643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,2,64,128,1,float16,float16,0,0.38943998018900555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,2,64,0,1,float16,float16,0,0.3779093424479167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,2,64,128,1,float16,fp8,0,0.3853866656621297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,2,64,128,1,fp8,fp8,0,0.5210346778233846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,2,64,0,1,float16,fp8,0,0.37573333581288654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,2,64,0,1,fp8,fp8,0,0.34669331709543866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,4,64,128,1,float16,float16,0,0.39929068088531494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,4,64,0,1,float16,float16,0,0.3922293186187744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,4,64,128,1,float16,fp8,0,0.39933868249257404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,4,64,128,1,fp8,fp8,0,0.5404693285624186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,4,64,0,1,float16,fp8,0,0.39083198706309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,4,64,0,1,fp8,fp8,0,0.37328000863393146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,12,64,128,1,float16,float16,0,0.23135999838511148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,12,64,0,1,float16,float16,0,0.2269973357518514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,12,64,128,1,float16,fp8,0,0.22707732518513998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,12,64,128,1,fp8,fp8,0,0.28307199478149414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,12,64,0,1,float16,fp8,0,0.22472532590230307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,12,64,0,1,fp8,fp8,0,0.1972106695175171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,1,64,128,1,float16,float16,0,0.20468799273173013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,1,64,0,1,float16,float16,0,0.19881600141525269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,1,64,128,1,float16,fp8,0,0.2047520081202189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,1,64,128,1,fp8,fp8,0,0.26658666133880615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,1,64,0,1,float16,fp8,0,0.19778666893641153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,1,64,0,1,fp8,fp8,0,0.18414400021235147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,2,64,128,1,float16,float16,0,0.20690133174260458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,2,64,0,1,float16,float16,0,0.2002826730410258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,2,64,128,1,float16,fp8,0,0.2047626574834188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,2,64,128,1,fp8,fp8,0,0.2681279977162679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,2,64,0,1,float16,fp8,0,0.19918400049209595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,2,64,0,1,fp8,fp8,0,0.18476267655690512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,4,64,128,1,float16,float16,0,0.21237866083780924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,4,64,0,1,float16,float16,0,0.20900267362594604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,4,64,128,1,float16,fp8,0,0.21336533625920615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,4,64,128,1,fp8,fp8,0,0.2781813343365987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,4,64,0,1,float16,fp8,0,0.20837332804997763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,4,64,0,1,fp8,fp8,0,0.19531200329462686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,12,64,128,1,float16,float16,0,0.12894399960835776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,12,64,0,1,float16,float16,0,0.12708800037701926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,12,64,128,1,float16,fp8,0,0.12667199969291687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,12,64,128,1,fp8,fp8,0,0.1530080040295919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,12,64,0,1,float16,fp8,0,0.12569066882133484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,12,64,0,1,fp8,fp8,0,0.11030933260917664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,1,64,128,1,float16,float16,0,0.10868266224861145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,1,64,0,1,float16,float16,0,0.10570133725802104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,1,64,128,1,float16,fp8,0,0.10866666833559673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,1,64,128,1,fp8,fp8,0,0.1421013375123342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,1,64,0,1,float16,fp8,0,0.10573333501815796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,1,64,0,1,fp8,fp8,0,0.10212266445159912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,2,64,128,1,float16,float16,0,0.11051733295122783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,2,64,0,1,float16,float16,0,0.10798399647076924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,2,64,128,1,float16,fp8,0,0.10998933513959248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,2,64,128,1,fp8,fp8,0,0.1431893308957418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,2,64,0,1,float16,fp8,0,0.10718400279680888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,2,64,0,1,fp8,fp8,0,0.10090667009353638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,4,64,128,1,float16,float16,0,0.11848533153533936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,4,64,0,1,float16,float16,0,0.11406933267911275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,4,64,128,1,float16,fp8,0,0.11556266744931538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,4,64,128,1,fp8,fp8,0,0.1490133305390676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,4,64,0,1,float16,fp8,0,0.11412266890207927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,4,64,0,1,fp8,fp8,0,0.10811733206113179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,12,64,128,1,float16,float16,0,0.07575466732184093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,12,64,0,1,float16,float16,0,0.0765173335870107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,12,64,128,1,float16,fp8,0,0.07379200061162312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,12,64,128,1,fp8,fp8,0,0.08665600419044495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,12,64,0,1,float16,fp8,0,0.07478933533032735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,12,64,0,1,fp8,fp8,0,0.06512000163396199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,1,64,128,1,float16,float16,0,0.06664533416430156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,1,64,0,1,float16,float16,0,0.06435200075308482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,1,64,128,1,float16,fp8,0,0.0635040005048116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,2,64,128,1,float16,fp8,0,0.066021333138148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,1,64,128,1,fp8,fp8,0,0.07691733539104462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,1,64,0,1,float16,fp8,0,0.06276266773541768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,1,64,0,1,fp8,fp8,0,0.05817066629727682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,2,64,128,1,float16,float16,0,0.06613333523273468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,2,64,0,1,float16,float16,0,0.06373866895834605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,2,64,128,1,fp8,fp8,0,0.07964799801508586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,2,64,0,1,float16,fp8,0,0.06533333162466685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,2,64,0,1,fp8,fp8,0,0.06019733349482218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,4,64,128,1,float16,float16,0,0.06614399949709575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,4,64,0,1,float16,float16,0,0.06781333188215892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,4,64,128,1,float16,fp8,0,0.06678933401902516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,4,64,128,1,fp8,fp8,0,0.08357333143552144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,4,64,0,1,float16,fp8,0,0.06777599950631459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,4,64,0,1,fp8,fp8,0,0.06205866734186808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,12,64,128,1,float16,float16,0,0.04318400224049886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,12,64,0,1,float16,float16,0,0.04060266663630804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,12,64,128,1,float16,fp8,0,0.041434665520985924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,12,64,128,1,fp8,fp8,0,0.050885334610939026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,12,64,0,1,float16,fp8,0,0.040565334260463715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,12,64,0,1,fp8,fp8,0,0.04113066693147024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,1,64,128,1,float16,float16,0,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,1,64,0,1,float16,float16,0,0.03804266701141993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,1,64,128,1,float16,fp8,0,0.03884266565243403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,1,64,128,1,fp8,fp8,0,0.04667200148105621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,1,64,0,1,float16,fp8,0,0.03864533454179764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,1,64,0,1,fp8,fp8,0,0.03673599908749262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,2,64,128,1,float16,float16,0,0.03991466760635376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,2,64,0,1,float16,float16,0,0.03896533449490865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,2,64,128,1,float16,fp8,0,0.04010133445262909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,4,64,128,1,fp8,fp8,0,0.04821333289146423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,2,64,128,1,fp8,fp8,0,0.047269334395726524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,2,64,0,1,float16,fp8,0,0.03839466720819473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,2,64,0,1,fp8,fp8,0,0.0373333344856898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,4,64,128,1,float16,float16,0,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,4,64,0,1,float16,float16,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,4,64,128,1,float16,fp8,0,0.04065066576004028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,4,64,0,1,float16,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,4,64,0,1,fp8,fp8,0,0.03847466657559077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,1,64,128,1,float16,float16,0,0.02993600070476532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,12,64,128,1,float16,float16,0,0.03215466688076655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,12,64,0,1,float16,float16,0,0.030767999589443207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,12,64,128,1,float16,fp8,0,0.032101333141326904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,12,64,128,1,fp8,fp8,0,0.03702933341264725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,12,64,0,1,float16,fp8,0,0.03177600105603536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,12,64,0,1,fp8,fp8,0,0.030293333033720653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,1,64,0,1,float16,float16,0,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,1,64,128,1,float16,fp8,0,0.029743999242782593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,1,64,128,1,fp8,fp8,0,0.036090667049090065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,1,64,0,1,float16,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,1,64,0,1,fp8,fp8,0,0.029045333464940388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,2,64,128,1,float16,float16,0,0.030026666820049286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,2,64,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,2,64,128,1,float16,fp8,0,0.02997333308060964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,2,64,128,1,fp8,fp8,0,0.03613866617282232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,2,64,0,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,2,64,0,1,fp8,fp8,0,0.02985599885384242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,4,64,128,1,float16,float16,0,0.030634666482607525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,4,64,0,1,float16,float16,0,0.030389333764712017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,4,64,128,1,float16,fp8,0,0.030986666679382324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,4,64,128,1,fp8,fp8,0,0.03698666642109553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,4,64,0,1,float16,fp8,0,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,4,64,0,1,fp8,fp8,0,0.030133334298928578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,12,64,128,1,float16,float16,0,0.020389333367347717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,12,64,0,1,float16,float16,0,0.020762667059898376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,12,64,128,1,float16,fp8,0,0.02083733429511388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,12,64,128,1,fp8,fp8,0,0.024906667570273083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,12,64,0,1,float16,fp8,0,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,12,64,0,1,fp8,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,1,64,128,1,float16,float16,0,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,2,64,128,1,float16,float16,0,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,1,64,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,2,64,128,1,float16,fp8,0,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,1,64,128,1,float16,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,1,64,128,1,fp8,fp8,0,0.02477866659561793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,1,64,0,1,float16,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,1,64,0,1,fp8,fp8,0,0.02046400060256322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,2,64,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,2,64,128,1,fp8,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,2,64,0,1,float16,fp8,0,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,2,64,0,1,fp8,fp8,0,0.020831999679406483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,4,64,128,1,float16,float16,0,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,4,64,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,4,64,128,1,float16,fp8,0,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,12,64,128,1,float16,fp8,0,0.018229333062966663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,4,64,128,1,fp8,fp8,0,0.024645333488782246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,4,64,0,1,float16,fp8,0,0.019989332805077236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,4,64,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,12,64,128,1,float16,float16,0,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,12,64,0,1,float16,float16,0,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,12,64,128,1,fp8,fp8,0,0.024149333437283833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,12,64,0,1,float16,fp8,0,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,12,64,0,1,fp8,fp8,0,0.020597333709398907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,1,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,1,64,0,1,float16,float16,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,1,64,128,1,float16,fp8,0,0.01785600061217944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,1,64,128,1,fp8,fp8,0,0.02404800057411194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,1,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,2,64,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,1,64,0,1,fp8,fp8,0,0.020074666788180668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,2,64,128,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,2,64,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,2,64,128,1,float16,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,2,64,128,1,fp8,fp8,0,0.024160000185171764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,2,64,0,1,fp8,fp8,0,0.02015999952952067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,4,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,4,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,4,64,128,1,float16,fp8,0,0.018122666825850803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,4,64,128,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,4,64,0,1,float16,fp8,0,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,4,64,0,1,fp8,fp8,0,0.02041600023706754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,12,64,128,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,12,64,0,1,float16,float16,0,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,12,64,128,1,float16,fp8,0,0.016522667060295742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,12,64,128,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,12,64,0,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,12,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,1,64,128,1,float16,float16,0,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,1,64,0,1,float16,float16,0,0.016271999726692837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,1,64,128,1,float16,fp8,0,0.016682667036851246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,1,64,128,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,1,64,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,1,64,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,2,64,128,1,float16,float16,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,2,64,0,1,float16,float16,0,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,2,64,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,2,64,128,1,fp8,fp8,0,0.022821334501107533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,2,64,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,2,64,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,4,64,128,1,float16,float16,0,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,4,64,0,1,float16,float16,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,4,64,128,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,4,64,128,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,4,64,0,1,float16,fp8,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,4,64,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,1,64,128,1,float16,float16,0,0.1490666667620341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,1,64,0,1,float16,float16,0,0.14934933185577393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,1,64,128,1,float16,fp8,0,0.14844800035158792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,1,64,128,1,fp8,fp8,0,0.1791093349456787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,1,64,0,1,float16,fp8,0,0.14918933312098184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,1,64,0,1,fp8,fp8,0,0.18163732687632242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,2,64,128,1,float16,float16,0,0.1509173313776652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,2,64,0,1,float16,float16,0,0.15051733454068503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,2,64,128,1,float16,fp8,0,0.1487626632054647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,2,64,128,1,fp8,fp8,0,0.18793600797653198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,4,64,0,1,float16,fp8,0,0.15608533223470053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,2,64,0,1,float16,fp8,0,0.14866133530934653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,2,64,0,1,fp8,fp8,0,0.18612800041834512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,4,64,128,1,float16,float16,0,0.15651733676592508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,4,64,0,1,float16,float16,0,0.1556533376375834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,4,64,128,1,float16,fp8,0,0.15607466300328574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,4,64,128,1,fp8,fp8,0,0.19450666507085165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,12,64,128,1,float16,float16,0,0.09993066390355428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,4,64,0,1,fp8,fp8,0,0.19507733980814615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,12,64,0,1,float16,float16,0,0.09985599915186565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,12,64,128,1,float16,fp8,0,0.09796800216039021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,12,64,128,1,fp8,fp8,0,0.11000532905260722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,12,64,0,1,float16,fp8,0,0.09732799728711446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,12,64,0,1,fp8,fp8,0,0.10962133606274922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,1,64,0,1,fp8,fp8,0,0.10178132851918538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,1,64,128,1,float16,float16,0,0.08141333361466725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,1,64,0,1,float16,float16,0,0.08085866769154866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,1,64,128,1,float16,fp8,0,0.08099733293056488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,1,64,128,1,fp8,fp8,0,0.10212266445159912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,1,64,0,1,float16,fp8,0,0.08097599943478902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,2,64,128,1,float16,float16,0,0.08291199803352356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,2,64,0,1,float16,float16,0,0.08278400202592213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,2,64,128,1,float16,fp8,0,0.08275733391443889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,2,64,128,1,fp8,fp8,0,0.10364266236623128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,2,64,0,1,float16,fp8,0,0.08206933240095775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,2,64,0,1,fp8,fp8,0,0.1037013332049052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,4,64,128,1,float16,float16,0,0.0869599978129069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,4,64,0,1,float16,float16,0,0.08794666330019633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,4,64,128,1,float16,fp8,0,0.0874826709429423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,4,64,128,1,fp8,fp8,0,0.10620799660682678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,4,64,0,1,float16,fp8,0,0.08660800258318584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,4,64,0,1,fp8,fp8,0,0.10661866267522176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,12,64,128,1,float16,float16,0,0.05871999760468801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,12,64,0,1,float16,float16,0,0.0580213318268458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,12,64,128,1,float16,fp8,0,0.05598400036493937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,12,64,128,1,fp8,fp8,0,0.06517333288987477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,12,64,0,1,float16,fp8,0,0.055205335219701133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,12,64,0,1,fp8,fp8,0,0.06461866696675618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,1,64,0,1,fp8,fp8,0,0.05745066702365875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,1,64,128,1,float16,float16,0,0.04688533147176107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,1,64,0,1,float16,float16,0,0.0469760000705719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,1,64,128,1,float16,fp8,0,0.04706133405367533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,1,64,128,1,fp8,fp8,0,0.05770133435726166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,1,64,0,1,float16,fp8,0,0.047050664822260536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,2,64,128,1,float16,float16,0,0.047797332207361855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,2,64,0,1,float16,float16,0,0.04729066789150238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,2,64,128,1,float16,fp8,0,0.04806933303674062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,2,64,128,1,fp8,fp8,0,0.060378665725390114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,2,64,0,1,float16,fp8,0,0.04799466828505198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,2,64,0,1,fp8,fp8,0,0.05970133344332377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,4,64,128,1,float16,float16,0,0.049925332268079124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,4,64,0,1,float16,float16,0,0.04949333270390829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,4,64,128,1,float16,fp8,0,0.050101334849993386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,4,64,128,1,fp8,fp8,0,0.06169599791367849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,4,64,0,1,float16,fp8,0,0.050160000721613564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,4,64,0,1,fp8,fp8,0,0.06189866860707601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,12,64,128,1,float16,float16,0,0.03178666780392329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,12,64,0,1,float16,float16,0,0.030832000076770782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,12,64,128,1,float16,fp8,0,0.03121600051720937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,12,64,128,1,fp8,fp8,0,0.03940266619126002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,12,64,0,1,float16,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,12,64,0,1,fp8,fp8,0,0.03860799968242645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,1,64,128,1,float16,float16,0,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,1,64,0,1,float16,float16,0,0.028565332293510437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,1,64,128,1,float16,fp8,0,0.028549333413441975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,1,64,128,1,fp8,fp8,0,0.03659199923276901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,1,64,0,1,float16,fp8,0,0.028757333755493164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,1,64,0,1,fp8,fp8,0,0.0364533339937528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,2,64,128,1,float16,float16,0,0.028565332293510437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,2,64,0,1,float16,float16,0,0.02861333390076955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,2,64,128,1,float16,fp8,0,0.029098667204380035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,2,64,128,1,fp8,fp8,0,0.036757332583268486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,2,64,0,1,float16,fp8,0,0.02871999889612198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,2,64,0,1,fp8,fp8,0,0.037045332292715706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,4,64,128,1,float16,float16,0,0.02976000060637792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,4,64,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,4,64,128,1,float16,fp8,0,0.02985599885384242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,4,64,128,1,fp8,fp8,0,0.03841066608826319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,4,64,0,1,float16,fp8,0,0.030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,12,64,128,1,fp8,fp8,0,0.029898665845394135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,4,64,0,1,fp8,fp8,0,0.03782933453718821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,12,64,128,1,float16,float16,0,0.02459733436505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,12,64,0,1,float16,float16,0,0.02465066562096278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,12,64,128,1,float16,fp8,0,0.02473066747188568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,12,64,0,1,float16,fp8,0,0.024874667326609295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,12,64,0,1,fp8,fp8,0,0.03029866764942805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,1,64,128,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,1,64,0,1,float16,float16,0,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,1,64,128,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,1,64,128,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,1,64,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,1,64,0,1,fp8,fp8,0,0.028927999238173168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,2,64,128,1,float16,float16,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,2,64,0,1,float16,float16,0,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,2,64,128,1,float16,fp8,0,0.023770667612552643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,2,64,128,1,fp8,fp8,0,0.028880000114440918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,2,64,0,1,float16,fp8,0,0.02476266771554947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,2,64,0,1,fp8,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,4,64,128,1,float16,float16,0,0.02367466688156128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,4,64,0,1,float16,float16,0,0.024085332949956257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,4,64,128,1,float16,fp8,0,0.023946667710940044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,4,64,128,1,fp8,fp8,0,0.030154667794704437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,4,64,0,1,float16,fp8,0,0.024080000817775726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,4,64,0,1,fp8,fp8,0,0.030085332691669464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,12,64,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,12,64,128,1,float16,float16,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,12,64,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,12,64,128,1,float16,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,12,64,128,1,fp8,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,12,64,0,1,float16,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,1,64,128,1,float16,float16,0,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,1,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,2,64,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,1,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,1,64,128,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,1,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,1,64,0,1,fp8,fp8,0,0.020714666694402695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,2,64,128,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,2,64,128,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,2,64,128,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,2,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,2,64,0,1,fp8,fp8,0,0.022810667753219604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,4,64,128,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,4,64,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,4,64,128,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,4,64,128,1,fp8,fp8,0,0.020474666108687718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,4,64,0,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,4,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,12,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,12,64,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,12,64,128,1,float16,fp8,0,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,12,64,128,1,fp8,fp8,0,0.020400000115235645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,12,64,0,1,float16,fp8,0,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,12,64,0,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,1,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,1,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,1,64,128,1,fp8,fp8,0,0.020143999407688778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,1,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,1,64,0,1,fp8,fp8,0,0.020410666863123577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,2,64,0,1,fp8,fp8,0,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,2,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,2,64,0,1,float16,float16,0,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,4,64,128,1,fp8,fp8,0,0.0199946661790212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,2,64,128,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,2,64,128,1,fp8,fp8,0,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,2,64,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,4,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,4,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,4,64,128,1,float16,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,4,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,4,64,0,1,fp8,fp8,0,0.019727999965349834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,12,64,128,1,float16,float16,0,0.014032000054915747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,12,64,0,1,float16,float16,0,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,12,64,128,1,float16,fp8,0,0.014389333625634512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,12,64,128,1,fp8,fp8,0,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,12,64,0,1,float16,fp8,0,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,12,64,0,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,1,64,128,1,float16,float16,0,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,1,64,0,1,float16,float16,0,0.014186666657527288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,1,64,128,1,float16,fp8,0,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,1,64,128,1,fp8,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,1,64,0,1,float16,fp8,0,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,1,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,2,64,128,1,float16,float16,0,0.013605333864688873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,2,64,0,1,float16,float16,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,2,64,128,1,float16,fp8,0,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,4,64,128,1,float16,fp8,0,0.014570667097965876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,2,64,128,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,2,64,0,1,float16,fp8,0,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,2,64,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,4,64,128,1,float16,float16,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,4,64,0,1,float16,float16,0,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,4,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,4,64,0,1,float16,fp8,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,4,64,0,1,fp8,fp8,0,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,12,64,128,1,float16,float16,0,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,12,64,0,1,float16,float16,0,0.013722666849692663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,12,64,128,1,float16,fp8,0,0.013744000345468521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,12,64,128,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,12,64,0,1,float16,fp8,0,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,12,64,0,1,fp8,fp8,0,0.01841066653529803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,1,64,128,1,float16,float16,0,0.013471999516089758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,1,64,0,1,float16,float16,0,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,1,64,128,1,float16,fp8,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,1,64,128,1,fp8,fp8,0,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,1,64,0,1,float16,fp8,0,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,1,64,0,1,fp8,fp8,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,2,64,128,1,float16,float16,0,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,2,64,0,1,float16,float16,0,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,2,64,128,1,float16,fp8,0,0.014256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,4,64,128,1,float16,fp8,0,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,4,64,0,1,float16,fp8,0,0.014303999642531076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,2,64,128,1,fp8,fp8,0,0.018570666511853535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,2,64,0,1,float16,fp8,0,0.013999999811251959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,1,64,128,1,float16,float16,0,0.076773335536321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,1,64,0,1,float16,float16,0,0.07593066493670146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,2,64,0,1,fp8,fp8,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,4,64,128,1,float16,float16,0,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,1,64,128,1,fp8,fp8,0,0.1329973340034485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,4,64,0,1,float16,float16,0,0.013584000368913015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,4,64,128,1,fp8,fp8,0,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,4,64,0,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,1,64,128,1,float16,fp8,0,0.07632533212502797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,1,64,0,1,float16,fp8,0,0.07670933504899342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,1,64,0,1,fp8,fp8,0,0.1341919998327891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,2,64,128,1,float16,float16,0,0.07877333462238312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,2,64,0,1,float16,float16,0,0.07809600234031677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,2,64,128,1,float16,fp8,0,0.07698133091131847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,2,64,128,1,fp8,fp8,0,0.13482133547465006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,2,64,0,1,float16,fp8,0,0.077674667040507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,2,64,0,1,fp8,fp8,0,0.13527466853459677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,4,64,128,1,float16,float16,0,0.08260799944400787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,4,64,0,1,float16,float16,0,0.08201066652933757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,4,64,128,1,float16,fp8,0,0.08203733464082082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,4,64,128,1,fp8,fp8,0,0.13843733072280884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,4,64,0,1,float16,fp8,0,0.08191466828187306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,4,64,0,1,fp8,fp8,0,0.1388106644153595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,12,64,128,1,float16,float16,0,0.05342400074005127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,12,64,0,1,float16,float16,0,0.05287466446558634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,12,64,128,1,float16,fp8,0,0.05212266743183136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,12,64,128,1,fp8,fp8,0,0.08331733445326488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,12,64,0,1,float16,fp8,0,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,12,64,0,1,fp8,fp8,0,0.08191466828187306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,1,64,128,1,float16,float16,0,0.044079999128977455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,1,64,0,1,float16,float16,0,0.04417600234349569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,1,64,128,1,float16,fp8,0,0.04716266691684723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,1,64,128,1,fp8,fp8,0,0.07436800003051758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,1,64,0,1,float16,fp8,0,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,1,64,0,1,fp8,fp8,0,0.07508266468842824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,2,64,128,1,float16,float16,0,0.04418133199214935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,2,64,0,1,float16,float16,0,0.044666667779286705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,2,64,128,1,float16,fp8,0,0.04452799757321676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,2,64,128,1,fp8,fp8,0,0.07748266557852428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,2,64,0,1,float16,fp8,0,0.04433600107828776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,2,64,0,1,fp8,fp8,0,0.07692266503969829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,12,64,128,1,float16,float16,0,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,4,64,128,1,float16,float16,0,0.04622933268547058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,4,64,0,1,float16,float16,0,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,4,64,128,1,float16,fp8,0,0.04656533400217692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,4,64,128,1,fp8,fp8,0,0.07735999921957652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,4,64,0,1,float16,fp8,0,0.04653333127498627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,4,64,0,1,fp8,fp8,0,0.079434668024381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,12,64,0,1,float16,float16,0,0.030181333422660828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,12,64,128,1,float16,fp8,0,0.028618666032950085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,1,64,128,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,12,64,128,1,fp8,fp8,0,0.047456001242001854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,12,64,0,1,float16,fp8,0,0.02889599899450938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,12,64,0,1,fp8,fp8,0,0.04797333478927612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,1,64,128,1,float16,float16,0,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,1,64,0,1,float16,float16,0,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,1,64,128,1,fp8,fp8,0,0.044581333796183266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,1,64,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,1,64,0,1,fp8,fp8,0,0.04494399825731913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,2,64,128,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,2,64,0,1,float16,float16,0,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,2,64,128,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,2,64,128,1,fp8,fp8,0,0.04473066826661428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,2,64,0,1,float16,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,2,64,0,1,fp8,fp8,0,0.044768000642458596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,4,64,128,1,float16,float16,0,0.028543998797734577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,4,64,0,1,fp8,fp8,0,0.04568000137805939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,4,64,0,1,float16,float16,0,0.02812266598145167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,4,64,128,1,float16,fp8,0,0.028058665494124096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,4,64,128,1,fp8,fp8,0,0.046154667933781944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,4,64,0,1,float16,fp8,0,0.028170667588710785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,12,64,128,1,float16,float16,0,0.022442666192849476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,12,64,0,1,float16,float16,0,0.02250666668017705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,12,64,128,1,float16,fp8,0,0.022384000321229298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,12,64,128,1,fp8,fp8,0,0.03216533362865448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,12,64,0,1,float16,fp8,0,0.022389332453409832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,12,64,0,1,fp8,fp8,0,0.0324799989660581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,1,64,128,1,float16,float16,0,0.020714666694402695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,1,64,0,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,1,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,1,64,128,1,fp8,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,1,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,1,64,0,1,fp8,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,2,64,128,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,2,64,0,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,2,64,128,1,float16,fp8,0,0.021594665944576263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,2,64,128,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,2,64,0,1,float16,fp8,0,0.02199466774861018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,2,64,0,1,fp8,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,4,64,128,1,float16,float16,0,0.021984001000722248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,4,64,0,1,float16,float16,0,0.021589333812395733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,4,64,128,1,float16,fp8,0,0.022197333474953968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,4,64,128,1,fp8,fp8,0,0.032586666444937386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,4,64,0,1,float16,fp8,0,0.02205866575241089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,4,64,0,1,fp8,fp8,0,0.03262399882078171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,12,64,128,1,float16,float16,0,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,12,64,0,1,float16,float16,0,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,12,64,128,1,float16,fp8,0,0.01632533346613248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,12,64,128,1,fp8,fp8,0,0.02476799984773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,12,64,0,1,float16,fp8,0,0.01640533283352852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,12,64,0,1,fp8,fp8,0,0.02497600018978119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,1,64,128,1,float16,float16,0,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,1,64,0,1,float16,float16,0,0.015967999895413715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,1,64,128,1,float16,fp8,0,0.016186666985352833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,1,64,128,1,fp8,fp8,0,0.0239680012067159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,1,64,0,1,float16,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,1,64,0,1,fp8,fp8,0,0.023733332753181458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,2,64,128,1,float16,float16,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,2,64,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,2,64,128,1,float16,fp8,0,0.016389333953460056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,2,64,128,1,fp8,fp8,0,0.024442667762438457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,2,64,0,1,float16,fp8,0,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,2,64,0,1,fp8,fp8,0,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,4,64,0,1,fp8,fp8,0,0.024442667762438457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,4,64,128,1,float16,float16,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,4,64,0,1,float16,float16,0,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,4,64,128,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,4,64,128,1,fp8,fp8,0,0.024058667321999867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,4,64,0,1,float16,fp8,0,0.0163680004576842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,12,64,128,1,float16,float16,0,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,12,64,0,1,float16,float16,0,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,12,64,128,1,float16,fp8,0,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,12,64,128,1,fp8,fp8,0,0.020389333367347717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,12,64,0,1,float16,fp8,0,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,12,64,0,1,fp8,fp8,0,0.02046400060256322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,1,64,128,1,float16,float16,0,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,1,64,0,1,float16,float16,0,0.014032000054915747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,1,64,128,1,float16,fp8,0,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,1,64,128,1,fp8,fp8,0,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,1,64,0,1,float16,fp8,0,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,1,64,0,1,fp8,fp8,0,0.020021333048741024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,2,64,128,1,float16,float16,0,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,2,64,0,1,float16,float16,0,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,2,64,128,1,float16,fp8,0,0.014015999933083853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,2,64,128,1,fp8,fp8,0,0.02024000013868014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,2,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,2,64,0,1,fp8,fp8,0,0.02032533288002014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,4,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,4,64,0,1,float16,float16,0,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,4,64,128,1,float16,fp8,0,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,4,64,128,1,fp8,fp8,0,0.02022933339079221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,4,64,0,1,float16,fp8,0,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,4,64,0,1,fp8,fp8,0,0.020245333512624104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,12,64,128,1,float16,float16,0,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,12,64,0,1,float16,float16,0,0.012645332763592402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,12,64,128,1,float16,fp8,0,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,12,64,128,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,12,64,0,1,float16,fp8,0,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,12,64,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,1,64,128,1,float16,float16,0,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,1,64,0,1,float16,float16,0,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,1,64,128,1,float16,fp8,0,0.013381333400805792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,1,64,128,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,1,64,0,1,float16,fp8,0,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,1,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,2,64,128,1,float16,float16,0,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,2,64,0,1,float16,float16,0,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,2,64,128,1,float16,fp8,0,0.013418667018413544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,2,64,128,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,2,64,0,1,float16,fp8,0,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,2,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,4,64,128,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,4,64,0,1,float16,float16,0,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,4,64,128,1,float16,fp8,0,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,4,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,4,64,0,1,float16,fp8,0,0.013514666507641474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,4,64,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,12,64,128,1,float16,float16,0,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,12,64,0,1,float16,float16,0,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,12,64,128,1,float16,fp8,0,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,12,64,128,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,12,64,0,1,float16,fp8,0,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,12,64,0,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,1,64,128,1,float16,float16,0,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,1,64,0,1,float16,float16,0,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,1,64,128,1,float16,fp8,0,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,1,64,128,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,1,64,0,1,float16,fp8,0,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,1,64,0,1,fp8,fp8,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,2,64,128,1,float16,float16,0,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,2,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,2,64,128,1,float16,fp8,0,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,2,64,128,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,2,64,0,1,float16,fp8,0,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,2,64,0,1,fp8,fp8,0,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,4,64,128,1,float16,float16,0,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,4,64,0,1,float16,float16,0,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,4,64,128,1,float16,fp8,0,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,4,64,128,1,fp8,fp8,0,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,4,64,0,1,float16,fp8,0,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,4,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,12,64,128,1,float16,float16,0,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,12,64,0,1,float16,float16,0,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,12,64,128,1,float16,fp8,0,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,12,64,128,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,12,64,0,1,float16,fp8,0,0.012538666526476542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,12,64,0,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,1,64,128,1,float16,float16,0,0.012645332763592402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,1,64,0,1,float16,float16,0,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,1,64,128,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,1,64,128,1,fp8,fp8,0,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,1,64,0,1,float16,fp8,0,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,1,64,0,1,fp8,fp8,0,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,2,64,128,1,float16,float16,0,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,2,64,0,1,float16,float16,0,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,2,64,128,1,float16,fp8,0,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,2,64,128,1,fp8,fp8,0,0.018672000616788864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,2,64,0,1,float16,fp8,0,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,2,64,0,1,fp8,fp8,0,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,4,64,128,1,float16,float16,0,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,4,64,0,1,float16,float16,0,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,4,64,128,1,float16,fp8,0,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,4,64,128,1,fp8,fp8,0,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,4,64,0,1,float16,fp8,0,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,4,64,0,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,1,64,128,1,float16,float16,0,0.0470773329337438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,1,64,0,1,float16,float16,0,0.0469706654548645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,1,64,128,1,float16,fp8,0,0.04655466477076212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,1,64,128,1,fp8,fp8,0,0.10889066259066264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,1,64,0,1,float16,fp8,0,0.04693866769472758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,1,64,0,1,fp8,fp8,0,0.11167466640472412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,2,64,128,1,float16,float16,0,0.04766400158405304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,2,64,0,1,float16,float16,0,0.04750399788220724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,2,64,128,1,float16,fp8,0,0.04782933493455251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,2,64,128,1,fp8,fp8,0,0.11238933602968852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,2,64,0,1,float16,fp8,0,0.04738133152325948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,2,64,0,1,fp8,fp8,0,0.11033067107200623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,4,64,128,1,float16,float16,0,0.04957333207130432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,4,64,0,1,float16,float16,0,0.04996266464392344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,4,64,128,1,float16,fp8,0,0.049178664882977806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,4,64,128,1,fp8,fp8,0,0.1139306624730428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,4,64,0,1,float16,fp8,0,0.04858666658401489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,4,64,0,1,fp8,fp8,0,0.11379733681678772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,12,64,128,1,float16,float16,0,0.030559999247392017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,12,64,0,1,float16,float16,0,0.030661332110563915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,12,64,128,1,float16,fp8,0,0.030000001192092896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,12,64,128,1,fp8,fp8,0,0.06774933139483134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,12,64,0,1,float16,fp8,0,0.029743999242782593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,12,64,0,1,fp8,fp8,0,0.06558933357397716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,1,64,128,1,float16,float16,0,0.02864533414443334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,1,64,0,1,float16,float16,0,0.02865600089232127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,1,64,128,1,float16,fp8,0,0.029088000456492107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,1,64,128,1,fp8,fp8,0,0.06311466793219249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,1,64,0,1,float16,fp8,0,0.028543998797734577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,1,64,0,1,fp8,fp8,0,0.06340266764163971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,2,64,128,1,float16,float16,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,2,64,0,1,float16,float16,0,0.02890666574239731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,2,64,128,1,float16,fp8,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,2,64,128,1,fp8,fp8,0,0.06403733293215434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,2,64,0,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,2,64,0,1,fp8,fp8,0,0.06461866696675618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,4,64,128,1,float16,float16,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,4,64,0,1,float16,float16,0,0.029994666576385498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,4,64,128,1,float16,fp8,0,0.029946667452653248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,4,64,128,1,fp8,fp8,0,0.06497600177923839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,4,64,0,1,float16,fp8,0,0.029904000461101532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,4,64,0,1,fp8,fp8,0,0.06497066716353099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,12,64,128,1,float16,float16,0,0.02199466774861018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,12,64,0,1,float16,float16,0,0.02197333425283432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,12,64,128,1,float16,fp8,0,0.021701333423455555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,12,64,128,1,fp8,fp8,0,0.04268800218900045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,12,64,0,1,float16,fp8,0,0.02186666677395503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,12,64,0,1,fp8,fp8,0,0.04240000247955322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,1,64,128,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,1,64,0,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,1,64,128,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,1,64,128,1,fp8,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,1,64,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,1,64,0,1,fp8,fp8,0,0.04119999955097834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,2,64,128,1,float16,float16,0,0.020666666328907013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,2,64,0,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,2,64,128,1,float16,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,2,64,128,1,fp8,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,2,64,0,1,float16,fp8,0,0.021802666286627453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,2,64,0,1,fp8,fp8,0,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,4,64,128,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,4,64,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,4,64,128,1,float16,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,4,64,128,1,fp8,fp8,0,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,4,64,0,1,float16,fp8,0,0.02181866765022278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,4,64,0,1,fp8,fp8,0,0.041749333341916404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,12,64,0,1,float16,fp8,0,0.015967999895413715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,12,64,128,1,float16,float16,0,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,12,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,12,64,128,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,12,64,128,1,fp8,fp8,0,0.02790933350721995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,12,64,0,1,fp8,fp8,0,0.02796799937884013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,1,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,1,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,1,64,128,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,1,64,128,1,fp8,fp8,0,0.02700799951950709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,1,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,1,64,0,1,fp8,fp8,0,0.026917333404223125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,2,64,128,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,2,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,2,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,2,64,128,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,2,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,2,64,0,1,fp8,fp8,0,0.026586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,4,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,4,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,4,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,4,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,4,64,128,1,fp8,fp8,0,0.026821332673231762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,4,64,0,1,fp8,fp8,0,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,12,64,128,1,float16,float16,0,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,12,64,0,1,float16,float16,0,0.013280000537633896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,12,64,128,1,float16,fp8,0,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,12,64,128,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,12,64,0,1,float16,fp8,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,12,64,0,1,fp8,fp8,0,0.02276266614596049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,1,64,128,1,float16,float16,0,0.013440000514189402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,1,64,0,1,float16,float16,0,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,2,64,128,1,float16,fp8,0,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,1,64,128,1,float16,fp8,0,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,1,64,128,1,fp8,fp8,0,0.022240000466505688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,1,64,0,1,float16,fp8,0,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,1,64,0,1,fp8,fp8,0,0.02293333411216736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,2,64,128,1,float16,float16,0,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,2,64,0,1,float16,float16,0,0.01379199946920077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,4,64,128,1,float16,fp8,0,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,2,64,128,1,fp8,fp8,0,0.022800001005331676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,2,64,0,1,float16,fp8,0,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,2,64,0,1,fp8,fp8,0,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,4,64,128,1,float16,float16,0,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,4,64,0,1,float16,float16,0,0.014015999933083853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,4,64,128,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,4,64,0,1,float16,fp8,0,0.013744000345468521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,4,64,0,1,fp8,fp8,0,0.02362666775782903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,12,64,128,1,float16,float16,0,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,12,64,0,1,float16,float16,0,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,12,64,128,1,float16,fp8,0,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,12,64,128,1,fp8,fp8,0,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,12,64,0,1,float16,fp8,0,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,12,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,1,64,128,1,float16,float16,0,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,1,64,0,1,float16,float16,0,0.01257066677014033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,1,64,128,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,1,64,128,1,fp8,fp8,0,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,1,64,0,1,float16,fp8,0,0.012543999900420507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,1,64,0,1,fp8,fp8,0,0.01833600054184596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,2,64,128,1,float16,float16,0,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,2,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,2,64,128,1,float16,fp8,0,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,2,64,128,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,2,64,0,1,float16,fp8,0,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,2,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,4,64,128,1,float16,float16,0,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,4,64,0,1,float16,float16,0,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,4,64,128,1,float16,fp8,0,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,4,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,4,64,0,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,4,64,0,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,12,64,128,1,float16,float16,0,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,12,64,0,1,float16,float16,0,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,12,64,128,1,float16,fp8,0,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,12,64,128,1,fp8,fp8,0,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,12,64,0,1,float16,fp8,0,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,12,64,0,1,fp8,fp8,0,0.01836799954374631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,1,64,128,1,float16,float16,0,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,1,64,0,1,float16,float16,0,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,1,64,128,1,float16,fp8,0,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,2,64,128,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,1,64,128,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,1,64,0,1,float16,fp8,0,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,1,64,0,1,fp8,fp8,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,2,64,128,1,float16,float16,0,0.012448000411192576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,2,64,0,1,float16,float16,0,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,2,64,128,1,float16,fp8,0,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,4,64,128,1,float16,fp8,0,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,2,64,0,1,float16,fp8,0,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,2,64,0,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,4,64,128,1,float16,float16,0,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,4,64,0,1,float16,float16,0,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,4,64,128,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,12,64,128,1,float16,fp8,0,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,4,64,0,1,float16,fp8,0,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,4,64,0,1,fp8,fp8,0,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,12,64,128,1,float16,float16,0,0.011946666985750198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,12,64,0,1,float16,float16,0,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,12,64,128,1,fp8,fp8,0,0.018160000443458557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,1,64,128,1,fp8,fp8,0,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,12,64,0,1,float16,fp8,0,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,12,64,0,1,fp8,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,1,64,128,1,float16,float16,0,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,1,64,0,1,float16,float16,0,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,1,64,128,1,float16,fp8,0,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,1,64,0,1,float16,fp8,0,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,1,64,0,1,fp8,fp8,0,0.018085333208243053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,2,64,128,1,float16,float16,0,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,2,64,0,1,float16,float16,0,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,2,64,128,1,float16,fp8,0,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,2,64,128,1,fp8,fp8,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,2,64,0,1,float16,fp8,0,0.012517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,2,64,0,1,fp8,fp8,0,0.017840000490347546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,4,64,128,1,float16,float16,0,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,4,64,0,1,float16,float16,0,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,4,64,128,1,float16,fp8,0,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,4,64,128,1,fp8,fp8,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,4,64,0,1,float16,fp8,0,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,4,64,0,1,fp8,fp8,0,0.0182239996890227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,12,64,128,1,float16,float16,0,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,12,64,0,1,float16,float16,0,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,12,64,128,1,float16,fp8,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,12,64,128,1,fp8,fp8,0,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,12,64,0,1,float16,fp8,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,12,64,0,1,fp8,fp8,0,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,1,64,128,1,float16,float16,0,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,1,64,0,1,float16,float16,0,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,1,64,128,1,float16,fp8,0,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,1,64,128,1,fp8,fp8,0,0.018053332964579265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,1,64,0,1,float16,fp8,0,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,1,64,0,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,2,64,128,1,float16,float16,0,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,2,64,0,1,float16,float16,0,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,2,64,128,1,float16,fp8,0,0.012586666891972223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,2,64,128,1,fp8,fp8,0,0.017871999492247898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,2,64,0,1,float16,fp8,0,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,2,64,0,1,fp8,fp8,0,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,4,64,128,1,float16,float16,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,4,64,0,1,float16,float16,0,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,4,64,128,1,float16,fp8,0,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,4,64,128,1,fp8,fp8,0,0.020090666910012562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,4,64,0,1,float16,fp8,0,0.012586666891972223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,4,64,0,1,fp8,fp8,0,0.018053332964579265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,1,64,128,1,float16,float16,0,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,1,64,0,1,float16,float16,0,0.036570665736993156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,1,64,128,1,float16,fp8,0,0.03665599972009659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,1,64,128,1,fp8,fp8,0,0.09882666667302449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,1,64,0,1,float16,fp8,0,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,1,64,0,1,fp8,fp8,0,0.09905067086219788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,2,64,128,1,float16,float16,0,0.03677333394686381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,2,64,0,1,float16,float16,0,0.03659199923276901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,2,64,128,1,float16,fp8,0,0.03626133253177007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,2,64,128,1,fp8,fp8,0,0.09936533371607463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,2,64,0,1,float16,fp8,0,0.03686933219432831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,2,64,0,1,fp8,fp8,0,0.09884267052014668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,4,64,128,1,float16,float16,0,0.037477334340413414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,4,64,0,1,float16,float16,0,0.0372533326347669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,4,64,128,1,float16,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,4,64,0,1,float16,fp8,0,0.03691199918588003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,4,64,128,1,fp8,fp8,0,0.10052266716957092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,4,64,0,1,fp8,fp8,0,0.10043199857076009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,12,64,128,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,12,64,0,1,float16,float16,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,12,64,128,1,float16,fp8,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,12,64,128,1,fp8,fp8,0,0.06069866816202799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,12,64,0,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,12,64,0,1,fp8,fp8,0,0.06140799820423126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,1,64,128,1,float16,float16,0,0.02492800106604894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,1,64,0,1,float16,float16,0,0.024688000480333965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,1,64,128,1,float16,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,1,64,128,1,fp8,fp8,0,0.0598826656738917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,1,64,0,1,float16,fp8,0,0.024666666984558105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,1,64,0,1,fp8,fp8,0,0.05900266766548157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,2,64,0,1,fp8,fp8,0,0.06182399888833364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,2,64,128,1,float16,float16,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,2,64,0,1,float16,float16,0,0.024853333830833435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,2,64,128,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,2,64,128,1,fp8,fp8,0,0.059818665186564125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,2,64,0,1,float16,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,4,64,128,1,float16,float16,0,0.025685332715511322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,4,64,0,1,float16,float16,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,4,64,128,1,float16,fp8,0,0.026154667139053345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,4,64,128,1,fp8,fp8,0,0.060959999759991966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,4,64,0,1,float16,fp8,0,0.025605333348115284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,4,64,0,1,fp8,fp8,0,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,12,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,12,64,0,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,12,64,128,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,1,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,1,64,128,1,fp8,fp8,0,0.03681066632270813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,12,64,128,1,fp8,fp8,0,0.037962667644023895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,12,64,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,12,64,0,1,fp8,fp8,0,0.03737066686153412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,1,64,128,1,float16,float16,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,1,64,0,1,float16,float16,0,0.016682667036851246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,1,64,0,1,float16,fp8,0,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,1,64,0,1,fp8,fp8,0,0.036720000207424164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,2,64,128,1,float16,float16,0,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,2,64,0,1,float16,float16,0,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,2,64,128,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,2,64,128,1,fp8,fp8,0,0.036981334288915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,2,64,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,2,64,0,1,fp8,fp8,0,0.03643733263015747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,4,64,128,1,float16,float16,0,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,4,64,0,1,float16,float16,0,0.01637866720557213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,4,64,128,1,float16,fp8,0,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,4,64,128,1,fp8,fp8,0,0.03664000084002813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,4,64,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,4,64,0,1,fp8,fp8,0,0.036858665446440377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,12,64,128,1,float16,float16,0,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,12,64,0,1,float16,float16,0,0.01358933374285698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,12,64,128,1,float16,fp8,0,0.01392000044385592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,12,64,128,1,fp8,fp8,0,0.02601066728432973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,12,64,0,1,float16,fp8,0,0.013909333695967993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,12,64,0,1,fp8,fp8,0,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,1,64,128,1,float16,float16,0,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,1,64,0,1,float16,float16,0,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,1,64,128,1,float16,fp8,0,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,1,64,128,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,1,64,0,1,float16,fp8,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,1,64,0,1,fp8,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,2,64,0,1,fp8,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,2,64,128,1,float16,float16,0,0.013536000003417334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,2,64,0,1,float16,float16,0,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,2,64,128,1,float16,fp8,0,0.01463466634353002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,2,64,128,1,fp8,fp8,0,0.025839999318122864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,2,64,0,1,float16,fp8,0,0.014394666999578476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,4,64,128,1,float16,float16,0,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,4,64,0,1,float16,float16,0,0.01404800017674764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,4,64,128,1,float16,fp8,0,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,4,64,128,1,fp8,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,4,64,0,1,float16,fp8,0,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,4,64,0,1,fp8,fp8,0,0.026352000733216602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,12,64,128,1,float16,float16,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,12,64,0,1,float16,float16,0,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,1,64,0,1,float16,float16,0,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,12,64,128,1,float16,fp8,0,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,12,64,128,1,fp8,fp8,0,0.021877333521842957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,12,64,0,1,float16,fp8,0,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,12,64,0,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,1,64,128,1,float16,float16,0,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,1,64,128,1,float16,fp8,0,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,1,64,128,1,fp8,fp8,0,0.02204799900452296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,1,64,0,1,float16,fp8,0,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,1,64,0,1,fp8,fp8,0,0.02271999915440877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,2,64,128,1,float16,float16,0,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,2,64,0,1,float16,float16,0,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,2,64,128,1,float16,fp8,0,0.012538666526476542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,2,64,128,1,fp8,fp8,0,0.02195200075705846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,2,64,0,1,float16,fp8,0,0.013536000003417334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,2,64,0,1,fp8,fp8,0,0.021989333132902782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,4,64,128,1,float16,float16,0,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,4,64,0,1,float16,float16,0,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,4,64,128,1,float16,fp8,0,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,4,64,128,1,fp8,fp8,0,0.022511998812357586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,4,64,0,1,float16,fp8,0,0.012586666891972223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,4,64,0,1,fp8,fp8,0,0.022341333329677582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,12,64,128,1,float16,float16,0,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,12,64,0,1,float16,float16,0,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,12,64,128,1,float16,fp8,0,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,12,64,128,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,12,64,0,1,float16,fp8,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,12,64,0,1,fp8,fp8,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,1,64,128,1,float16,float16,0,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,1,64,0,1,float16,float16,0,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,1,64,128,1,float16,fp8,0,0.012335999558369318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,1,64,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,1,64,0,1,float16,fp8,0,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,1,64,0,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,2,64,128,1,float16,float16,0,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,2,64,0,1,float16,float16,0,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,2,64,128,1,float16,fp8,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,2,64,128,1,fp8,fp8,0,0.01868266612291336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,2,64,0,1,float16,fp8,0,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,2,64,0,1,fp8,fp8,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,4,64,128,1,float16,float16,0,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,4,64,0,1,float16,float16,0,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,4,64,128,1,float16,fp8,0,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,4,64,128,1,fp8,fp8,0,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,4,64,0,1,float16,fp8,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,4,64,0,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,12,64,128,1,float16,float16,0,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,12,64,0,1,float16,float16,0,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,12,64,128,1,float16,fp8,0,0.012517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,12,64,128,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,12,64,0,1,float16,fp8,0,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,12,64,0,1,fp8,fp8,0,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,1,64,128,1,float16,float16,0,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,1,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,1,64,128,1,float16,fp8,0,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,1,64,128,1,fp8,fp8,0,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,1,64,0,1,float16,fp8,0,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,1,64,0,1,fp8,fp8,0,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,2,64,128,1,float16,float16,0,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,2,64,0,1,float16,float16,0,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,2,64,128,1,float16,fp8,0,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,2,64,128,1,fp8,fp8,0,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,2,64,0,1,float16,fp8,0,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,2,64,0,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,4,64,128,1,float16,float16,0,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,4,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,4,64,128,1,float16,fp8,0,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,4,64,128,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,4,64,0,1,float16,fp8,0,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,4,64,0,1,fp8,fp8,0,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,12,64,128,1,float16,float16,0,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,12,64,0,1,float16,float16,0,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,12,64,128,1,float16,fp8,0,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,12,64,128,1,fp8,fp8,0,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,12,64,0,1,float16,fp8,0,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,12,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,1,64,128,1,float16,float16,0,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,1,64,0,1,float16,float16,0,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,1,64,128,1,float16,fp8,0,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,1,64,128,1,fp8,fp8,0,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,1,64,0,1,float16,fp8,0,0.011946666985750198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,1,64,0,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,2,64,128,1,float16,float16,0,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,2,64,0,1,float16,float16,0,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,2,64,128,1,float16,fp8,0,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,2,64,128,1,fp8,fp8,0,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,2,64,0,1,float16,fp8,0,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,2,64,0,1,fp8,fp8,0,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,4,64,128,1,float16,float16,0,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,4,64,0,1,float16,float16,0,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,4,64,128,1,float16,fp8,0,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,4,64,128,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,4,64,0,1,float16,fp8,0,0.012517333030700684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,4,64,0,1,fp8,fp8,0,0.01834133391578992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,12,64,128,1,float16,float16,0,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,12,64,0,1,float16,float16,0,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,12,64,128,1,float16,fp8,0,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,12,64,128,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,12,64,0,1,float16,fp8,0,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,12,64,0,1,fp8,fp8,0,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,1,64,128,1,float16,float16,0,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,1,64,0,1,float16,float16,0,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,1,64,128,1,float16,fp8,0,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,1,64,128,1,fp8,fp8,0,0.01785600061217944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,1,64,0,1,float16,fp8,0,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,1,64,0,1,fp8,fp8,0,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,2,64,128,1,float16,float16,0,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,2,64,0,1,float16,float16,0,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,4,64,0,1,float16,float16,0,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,2,64,128,1,float16,fp8,0,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,2,64,128,1,fp8,fp8,0,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,2,64,0,1,float16,fp8,0,0.012122667084137598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,2,64,0,1,fp8,fp8,0,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,4,64,128,1,float16,float16,0,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,4,64,128,1,float16,fp8,0,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,4,64,128,1,fp8,fp8,0,0.017797333498795826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,4,64,0,1,float16,fp8,0,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,4,64,0,1,fp8,fp8,0,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,float16,0,0.7311200300852457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,fp8,0,0.717034657796224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,64,128,1,fp8,fp8,0,0.9513866901397705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,float16,0,0.744592030843099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,float16,0,4.98637326558431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,fp8,0,0.7413973013559977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,64,128,1,fp8,fp8,0,0.969866673151652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,64,0,1,fp8,fp8,0,4.404527982076009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,fp8,0,4.881829261779785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,float16,0,0.7651627063751221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,fp8,0,0.7559519608815511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,64,128,1,fp8,fp8,0,0.9970453580220541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,float16,0,4.935957272847493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,fp8,0,4.812538782755534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,float16,0,0.414741317431132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,fp8,0,0.41496535142262775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,64,0,1,fp8,fp8,0,4.4102827707926435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,64,128,1,fp8,fp8,0,0.5529546737670898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,float16,0,4.917130788167317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,float16,0,0.38309868176778156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,float16,0,2.4864746729532876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,fp8,0,2.475173314412435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,64,0,1,fp8,fp8,0,4.459328015645345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,fp8,0,4.928042729695638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,fp8,0,0.37778135140736896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,64,0,1,fp8,fp8,0,2.2983253796895347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,64,128,1,fp8,fp8,0,0.5046559969584147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,float16,0,2.468186696370443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,float16,0,0.3913280169169108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,fp8,0,2.499663988749186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,64,0,1,fp8,fp8,0,2.2834879557291665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,fp8,0,0.3901866674423218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,64,128,1,fp8,fp8,0,0.519594669342041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,float16,0,2.497034708658854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,float16,0,0.4021013180414836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,fp8,0,2.442848046620687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,64,0,1,fp8,fp8,0,2.2791360219319663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,fp8,0,0.39762667814890545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,64,128,1,fp8,fp8,0,0.5272693236668905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,fp8,0,2.4894773165384927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,float16,0,0.22868800163269043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,float16,0,2.485952059427897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,float16,0,1.3078719774882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,64,0,1,fp8,fp8,0,2.3117653528849282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,fp8,0,0.23332266012827554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,64,128,1,fp8,fp8,0,0.30461867650349933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,float16,0,0.21548267205556235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,fp8,0,1.3224159876505535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,64,0,1,fp8,fp8,0,1.1316213607788086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,fp8,0,0.214303990205129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,64,128,1,fp8,fp8,0,0.2836906711260478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,float16,0,1.3003253142038982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,float16,0,0.21719467639923096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,fp8,0,1.2955946922302246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,64,0,1,fp8,fp8,0,1.110906680425008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,fp8,0,1.2871039708455403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,fp8,0,0.2163626750310262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,float16,0,1.2994080384572346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,64,128,1,fp8,fp8,0,0.2897973259290059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,float16,0,0.2207733392715454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,64,0,1,fp8,fp8,0,1.121509313583374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,fp8,0,0.22190399964650473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,64,128,1,fp8,fp8,0,0.2941173315048218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,float16,0,1.3061227003733318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,fp8,0,1.2925493717193604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,float16,0,0.15833066900571188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,64,0,1,fp8,fp8,0,1.1269226868947346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,float16,0,0.653989315032959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,fp8,0,0.15915733575820923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,64,128,1,fp8,fp8,0,0.19723733266194662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,fp8,0,0.6525439818700155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,64,0,1,fp8,fp8,0,0.6102026700973511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,float16,0,0.15786666671435037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,float16,0,0.6595146656036377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,fp8,0,0.15806933244069418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,64,128,1,fp8,fp8,0,0.1946986714998881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,fp8,0,0.6603413422902426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,float16,0,0.15648000439008078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,64,0,1,fp8,fp8,0,0.6047733227411906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,float16,0,0.6586666504542033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,fp8,0,0.15875732898712158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,64,128,1,fp8,fp8,0,0.19554666678110758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,fp8,0,0.6576586564381918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,float16,0,0.1574026644229889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,64,0,1,fp8,fp8,0,0.6054133176803589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,float16,0,0.6547306776046753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,fp8,0,0.15718400478363037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,64,128,1,fp8,fp8,0,0.19693867365519205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,fp8,0,0.6591466665267944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,64,0,1,fp8,fp8,0,0.6114666859308878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,float16,0,0.5507040023803711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,fp8,0,0.5402559836705526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,64,128,1,fp8,fp8,0,0.7222613493601481
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,float16,0,2.841871897379557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,float16,0,0.5669440031051636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,fp8,0,2.8464905420939126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,64,0,1,fp8,fp8,0,2.6011679967244468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,fp8,0,0.5611519813537598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,64,128,1,fp8,fp8,0,0.7366399765014648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,float16,0,2.872447967529297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,float16,0,0.5788319905598959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,64,128,1,fp8,fp8,0,0.7574133078257242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,fp8,0,0.5767146746317545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,fp8,0,2.807648022969564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,64,0,1,fp8,fp8,0,2.636026700337728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,float16,0,0.3134933312733968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,float16,0,2.8439839680989585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,fp8,0,0.3157493273417155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,64,128,1,fp8,fp8,0,0.41974933942159015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,float16,0,1.4794665972391765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,fp8,0,2.8412532806396484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,64,0,1,fp8,fp8,0,2.651583989461263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,float16,0,0.2914080023765564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,fp8,0,1.4648426373799641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,64,0,1,fp8,fp8,0,1.3141012986501057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,fp8,0,0.2858826716740926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,float16,0,1.4566720326741536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,64,128,1,fp8,fp8,0,0.3843839963277181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,fp8,0,1.4497653643290203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,float16,0,1.4596746762593586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,float16,0,0.2963786721229553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,64,0,1,fp8,fp8,0,1.2914293607076008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,fp8,0,0.2943626642227173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,64,128,1,fp8,fp8,0,0.39285866419474286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,float16,0,0.3009439905484517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,fp8,0,1.4571733474731445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,fp8,0,0.30077866713205975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,64,0,1,fp8,fp8,0,1.287168025970459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,64,0,1,fp8,fp8,0,1.2985226313273113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,64,128,1,fp8,fp8,0,0.40725334485371906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,float16,0,1.4548479715983074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,float16,0,0.17764800786972046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,fp8,0,0.18013334274291992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,fp8,0,1.4639040629069011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,float16,0,0.7290453116099039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,64,128,1,fp8,fp8,0,0.23838400840759277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,fp8,0,0.7284479935963949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,64,0,1,fp8,fp8,0,0.6743893623352051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,float16,0,0.16787733634312949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,fp8,0,0.16524799664815268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,float16,0,0.7148213386535645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,64,128,1,fp8,fp8,0,0.22189867496490479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,fp8,0,0.7126293182373047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,float16,0,0.16779732704162598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,64,0,1,fp8,fp8,0,0.6638453404108683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,float16,0,0.719258705774943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,fp8,0,0.16884267330169678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,64,128,1,fp8,fp8,0,0.22316267093022665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,fp8,0,0.7212693691253662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,64,0,1,fp8,fp8,0,0.6608800093332926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,float16,0,0.1733013391494751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,float16,0,0.7220959663391113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,fp8,0,0.17303466796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,64,128,1,fp8,fp8,0,0.2292799949645996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,float16,0,0.12426666418711345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,fp8,0,0.718074639638265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,64,0,1,fp8,fp8,0,0.6712266604105631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,float16,0,0.3964373270670573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,fp8,0,0.12589866916338602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,64,128,1,fp8,fp8,0,0.15729600191116333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,fp8,0,0.39882131417592365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,float16,0,0.12157866358757019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,64,0,1,fp8,fp8,0,0.3712906837463379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,float16,0,0.39561065038045246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,fp8,0,0.12327466408411662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,64,128,1,fp8,fp8,0,0.14714133739471436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,fp8,0,0.39821334679921466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,64,0,1,fp8,fp8,0,0.3700480063756307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,float16,0,0.12285332878430684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,float16,0,0.39722665150960285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,fp8,0,0.12341866890589397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,64,128,1,fp8,fp8,0,0.15319466590881348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,fp8,0,0.39740268389383954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,64,0,1,fp8,fp8,0,0.36769600709279376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,float16,0,0.12401599685351054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,float16,0,0.39804800351460773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,fp8,0,0.12433066964149475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,64,128,1,fp8,fp8,0,0.15588800112406412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,fp8,0,0.39579200744628906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,64,0,1,fp8,fp8,0,0.370197335879008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,float16,0,0.4575466712315877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,fp8,0,0.4509066740671794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,64,128,1,fp8,fp8,0,0.6113440195719401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,64,0,1,fp8,fp8,0,1.8699092864990234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,float16,0,2.0056959788004556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,float16,0,0.4716693162918091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,fp8,0,2.0106453895568848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,fp8,0,0.46876800060272217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,64,128,1,fp8,fp8,0,0.6224106550216675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,float16,0,2.022186597188314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,fp8,0,2.0214719772338867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,float16,0,0.4817440112431844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,64,0,1,fp8,fp8,0,1.8806719779968262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,fp8,0,0.480074683825175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,float16,0,2.032538731892904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,64,128,1,fp8,fp8,0,0.6376586755116781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,float16,0,0.2632906635602315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,fp8,0,0.2675306598345439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,float16,0,1.0614986419677734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,fp8,0,2.040191968282064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,64,128,1,fp8,fp8,0,0.35711999734242755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,64,0,1,fp8,fp8,0,1.8961280186971028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,64,0,1,fp8,fp8,0,0.9449600378672282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,float16,0,0.24554133415222168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,fp8,0,1.0606773694356282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,fp8,0,0.24268800020217896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,64,0,1,fp8,fp8,0,0.9161279996236166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,float16,0,1.0510133107503254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,64,128,1,fp8,fp8,0,0.32410667339960736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,float16,0,0.24889600276947021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,fp8,0,1.0408480167388916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,fp8,0,0.24665600061416626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,64,128,1,fp8,fp8,0,0.33084267377853394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,float16,0,1.0573493639628093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,fp8,0,1.0409493446350098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,64,0,1,fp8,fp8,0,0.9197386900583903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,float16,0,0.2539733250935872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,float16,0,1.0602400302886963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,fp8,0,0.2543413241704305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,64,128,1,fp8,fp8,0,0.3402666648228963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,fp8,0,1.0541760126749675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,float16,0,0.15082666277885437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,64,0,1,fp8,fp8,0,0.9322026570638021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,float16,0,0.5268106857935587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,fp8,0,0.15381866693496704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,64,128,1,fp8,fp8,0,0.2047626574834188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,fp8,0,0.5247626701990763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,64,0,1,fp8,fp8,0,0.4911786715189616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,float16,0,0.14180800318717957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,fp8,0,0.14166399836540222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,float16,0,0.5149866739908854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,64,128,1,fp8,fp8,0,0.191103994846344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,fp8,0,0.5173759857813517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,64,0,1,fp8,fp8,0,0.4808906714121501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,float16,0,0.1444906691710154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,fp8,0,0.14401599764823914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,float16,0,0.5151413281758627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,64,128,1,fp8,fp8,0,0.19156799713770548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,fp8,0,0.520250678062439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,64,0,1,fp8,fp8,0,0.48285333315531415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,float16,0,0.14724799990653992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,fp8,0,0.14780799547831217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,float16,0,0.10750400026639302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,float16,0,0.5201013485590616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,64,128,1,fp8,fp8,0,0.19709332784016928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,fp8,0,0.5200586716334025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,64,0,1,fp8,fp8,0,0.4864746729532878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,float16,0,0.29850133260091144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,fp8,0,0.10702932874361674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,64,128,1,fp8,fp8,0,0.13645866513252258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,fp8,0,0.2964853247006734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,64,0,1,fp8,fp8,0,0.27558932701746625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,float16,0,0.10354133447011311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,float16,0,0.2963520089785258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,fp8,0,0.10388799508412679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,64,128,1,fp8,fp8,0,0.12643200159072876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,fp8,0,0.29639999071757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,64,0,1,fp8,fp8,0,0.2732693354288737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,float16,0,0.10392533739407857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,float16,0,0.10647466778755188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,float16,0,0.2946773370107015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,fp8,0,0.10358933607737224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,64,128,1,fp8,fp8,0,0.12802132964134216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,fp8,0,0.29660266637802124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,64,0,1,fp8,fp8,0,0.27391467491785687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,float16,0,0.2956533432006836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,fp8,0,0.10504532853762309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,64,128,1,fp8,fp8,0,0.13519466916720072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,fp8,0,0.2960853377978007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,64,0,1,fp8,fp8,0,0.27638399600982666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,float16,0,0.713263988494873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,fp8,0,0.7083573341369629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,float16,0,2.658202648162842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,64,128,1,fp8,fp8,0,0.9354453086853027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,float16,0,0.735637346903483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,fp8,0,2.60916805267334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,64,0,1,fp8,fp8,0,2.439589341481527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,fp8,0,0.7276159922281901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,float16,0,2.6558666229248047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,64,128,1,fp8,fp8,0,0.9638506571451823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,float16,0,0.7541173299153646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,float16,0,2.6480587323506675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,fp8,0,2.6680212020874023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,fp8,0,0.7505119641621908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,64,0,1,fp8,fp8,0,2.4439786275227866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,64,128,1,fp8,fp8,0,0.9827199776967367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,float16,0,0.4056160052617391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,fp8,0,0.4087466796239217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,float16,0,1.3740906715393066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,float16,0,0.3666293223698934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,fp8,0,2.6888373692830405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,64,0,1,fp8,fp8,0,2.4838666915893555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,64,128,1,fp8,fp8,0,0.5370933214823405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,fp8,0,1.375040054321289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,64,0,1,fp8,fp8,0,1.2895946502685547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,fp8,0,0.35897600650787354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,64,128,1,fp8,fp8,0,0.4883893330891927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,float16,0,1.3604747454325359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,float16,0,0.37768534819285077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,fp8,0,1.346735954284668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,64,0,1,fp8,fp8,0,1.2537493705749512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,float16,0,0.38703465461730957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,fp8,0,0.3729439973831177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,float16,0,1.3476959864298503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,64,128,1,fp8,fp8,0,0.4993973175684611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,fp8,0,1.3425226211547852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,64,0,1,fp8,fp8,0,1.2639946937561035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,fp8,0,0.3851253191630046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,64,128,1,fp8,fp8,0,0.5154720147450765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,float16,0,1.3552533785502117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,fp8,0,1.3597440719604492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,float16,0,0.213210662206014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,64,0,1,fp8,fp8,0,1.2710560162862141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,float16,0,0.7180853684743246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,fp8,0,0.21837866306304932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,64,128,1,fp8,fp8,0,0.29120532671610516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,fp8,0,0.7262506484985352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,64,0,1,fp8,fp8,0,0.6356853246688843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,float16,0,0.20069332917531332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,fp8,0,0.19722133874893188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,float16,0,0.7042933305104574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,64,128,1,fp8,fp8,0,0.26637333631515503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,fp8,0,0.7091413338979086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,float16,0,0.20419732729593912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,64,0,1,fp8,fp8,0,0.6188960075378418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,fp8,0,0.20153067509333292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,float16,0,0.7097173531850179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,64,128,1,fp8,fp8,0,0.2723840077718099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,fp8,0,0.7064800262451172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,64,0,1,fp8,fp8,0,0.6225119829177856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,float16,0,0.2066719929377238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,float16,0,0.7188959916432699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,fp8,0,0.20875734090805054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,64,128,1,fp8,fp8,0,0.27933865785598755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,fp8,0,0.713530699412028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,64,0,1,fp8,fp8,0,0.6258666515350342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,float16,0,0.12610133488972983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,float16,0,0.3596266508102417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,fp8,0,0.12811199824015299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,64,128,1,fp8,fp8,0,0.17128000656763712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,fp8,0,0.3589973449707031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,64,0,1,fp8,fp8,0,0.3378346761067708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,float16,0,0.11755733688672383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,float16,0,0.3487946589787801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,fp8,0,0.11774933338165283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,64,128,1,fp8,fp8,0,0.1578933298587799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,fp8,0,0.3526453177134196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,64,0,1,fp8,fp8,0,0.3251306613286336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,float16,0,0.11878400047620137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,float16,0,0.35261865456899005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,fp8,0,0.11818666259447734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,64,128,1,fp8,fp8,0,0.16084266702334085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,fp8,0,0.35306668281555176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,64,0,1,fp8,fp8,0,0.331386665503184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,float16,0,0.12205333511034648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,float16,0,0.3555999994277954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,fp8,0,0.12146133184432983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,64,128,1,fp8,fp8,0,0.1650879979133606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,fp8,0,0.3532426754633586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,64,0,1,fp8,fp8,0,0.33352001508076984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,float16,0,0.09051199754079182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,float16,0,0.20596800247828165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,fp8,0,0.09162132938702901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,64,128,1,fp8,fp8,0,0.11530666550000508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,fp8,0,0.2041226625442505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,64,0,1,fp8,fp8,0,0.1929759979248047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,float16,0,0.08839999636014302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,float16,0,0.20355733235677084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,fp8,0,0.0888159970442454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,64,128,1,fp8,fp8,0,0.10582933823267619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,fp8,0,0.20320000251134238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,64,0,1,fp8,fp8,0,0.19117865959803262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,float16,0,0.08801066875457764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,float16,0,0.20267200469970703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,fp8,0,0.08834667007128398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,64,128,1,fp8,fp8,0,0.10685867071151733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,fp8,0,0.2042293349901835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,64,0,1,fp8,fp8,0,0.19140267372131348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,float16,0,0.0888426701227824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,float16,0,0.20406933625539145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,fp8,0,0.08797333637873332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,64,128,1,fp8,fp8,0,0.10943999886512756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,fp8,0,0.20362132787704468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,64,0,1,fp8,fp8,0,0.19201600551605225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,float16,0,0.5384746789932251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,fp8,0,0.5280426740646362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,64,128,1,fp8,fp8,0,0.7161760330200195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,float16,0,1.581376075744629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,float16,0,0.5554666519165039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,fp8,0,1.5684266090393066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,64,0,1,fp8,fp8,0,1.4632852872212727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,fp8,0,1.5969759623209636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,fp8,0,0.5528106689453125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,float16,0,1.6006827354431152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,64,128,1,fp8,fp8,0,0.7268640200297037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,float16,0,0.5693493286768595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,64,0,1,fp8,fp8,0,1.4923413594563801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,fp8,0,0.5649866660435995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,64,128,1,fp8,fp8,0,0.7504213651021322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,float16,0,1.6083626747131348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,float16,0,0.30529600381851196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,fp8,0,1.6101226806640625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,64,0,1,fp8,fp8,0,1.50873597462972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,fp8,0,0.3052000006039937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,float16,0,0.8359306653340658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,64,128,1,fp8,fp8,0,0.4093386729558309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,fp8,0,0.8462613423665365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,float16,0,0.2791733344395955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,64,0,1,fp8,fp8,0,0.7574293613433838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,fp8,0,0.27611732482910156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,float16,0,0.8157920042673746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,64,128,1,fp8,fp8,0,0.37557868162790936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,float16,0,0.2863466739654541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,fp8,0,0.8088106314341227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,64,0,1,fp8,fp8,0,0.7224960327148438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,fp8,0,0.28143999973932904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,float16,0,0.818453311920166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,64,128,1,fp8,fp8,0,0.3824959993362427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,fp8,0,0.8111146291097006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,float16,0,0.2911146680514018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,64,0,1,fp8,fp8,0,0.7363519668579102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,fp8,0,0.2900533278783162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,float16,0,0.822096029917399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,64,128,1,fp8,fp8,0,0.39213867982228595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,fp8,0,0.82369065284729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,float16,0,0.1662879983584086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,64,0,1,fp8,fp8,0,0.7447253068288168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,float16,0,0.41840533415476483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,fp8,0,0.17029867569605509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,64,128,1,fp8,fp8,0,0.2245546579360962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,fp8,0,0.41834131876627606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,64,0,1,fp8,fp8,0,0.39576534430185956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,float16,0,0.15475733081499735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,float16,0,0.403711994489034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,fp8,0,0.15361066659291586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,64,128,1,fp8,fp8,0,0.21090133984883627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,fp8,0,0.4012213150660197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,64,0,1,fp8,fp8,0,0.37615466117858887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,float16,0,0.15679466724395752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,float16,0,0.405290683110555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,fp8,0,0.1560479998588562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,64,128,1,fp8,fp8,0,0.2125920057296753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,fp8,0,0.40699199835459393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,64,0,1,fp8,fp8,0,0.3800426721572876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,float16,0,0.1611733337243398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,float16,0,0.4090293248494466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,fp8,0,0.16200533509254456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,64,128,1,fp8,fp8,0,0.21818667650222778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,fp8,0,0.4113493363062541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,float16,0,0.10011733571688335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,64,0,1,fp8,fp8,0,0.387386679649353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,float16,0,0.2239840030670166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,fp8,0,0.10109333197275798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,64,128,1,fp8,fp8,0,0.13708266615867615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,fp8,0,0.22529600063959757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,64,0,1,fp8,fp8,0,0.21492266654968262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,float16,0,0.09224533041318257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,float16,0,0.21749866008758545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,fp8,0,0.09198932846387227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,64,128,1,fp8,fp8,0,0.11894933382670085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,fp8,0,0.2160159945487976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,64,0,1,fp8,fp8,0,0.20485333601633707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,float16,0,0.09318932890892029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,float16,0,0.21910399198532104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,fp8,0,0.09303466478983562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,64,128,1,fp8,fp8,0,0.12449600299199422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,fp8,0,0.21735999981562296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,64,0,1,fp8,fp8,0,0.20332799355189005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,float16,0,0.09587732950846355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,float16,0,0.221178670724233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,fp8,0,0.09674132863680522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,64,128,1,fp8,fp8,0,0.13095466295878092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,fp8,0,0.2204213341077169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,64,0,1,fp8,fp8,0,0.2099519968032837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,float16,0,0.07019733389218648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,float16,0,0.13767466942469278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,fp8,0,0.0698880006869634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,64,128,1,fp8,fp8,0,0.09107733766237895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,fp8,0,0.1381119986375173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,64,0,1,fp8,fp8,0,0.12963199615478516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,float16,0,0.06937600175539653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,float16,0,0.1381013294061025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,fp8,0,0.06972266733646393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,64,128,1,fp8,fp8,0,0.08776000142097473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,fp8,0,0.13703466455141702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,64,0,1,fp8,fp8,0,0.12899733583132425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,float16,0,0.07042133311430614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,float16,0,0.13699199755986533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,fp8,0,0.06970666845639546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,64,128,1,fp8,fp8,0,0.08698667089144389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,fp8,0,0.1371999979019165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,64,0,1,fp8,fp8,0,0.12934933106104532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,float16,0,0.06969066460927327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,float16,0,0.1372266709804535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,fp8,0,0.07070933282375336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,64,128,1,fp8,fp8,0,0.0876639982064565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,fp8,0,0.1367733379205068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,64,0,1,fp8,fp8,0,0.12940266728401184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,float16,0,0.7114346822102865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,fp8,0,0.702186663945516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,float16,0,1.5438507397969563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,64,128,1,fp8,fp8,0,0.9307093620300293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,64,0,1,fp8,fp8,0,1.4504000345865886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,fp8,0,1.5414719581604004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,float16,0,0.7317333221435547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,fp8,0,0.7229812939961752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,float16,0,1.5731520652770996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,64,128,1,fp8,fp8,0,0.9506293137868246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,float16,0,0.7538080215454102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,fp8,0,1.5565013885498047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,float16,0,0.4044853448867798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,64,0,1,fp8,fp8,0,1.472453276316325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,fp8,0,0.7431306838989258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,float16,0,1.5947306950887044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,64,128,1,fp8,fp8,0,0.9755520025889078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,fp8,0,1.5895733833312988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,64,0,1,fp8,fp8,0,1.4874293009440105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,fp8,0,0.39997867743174237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,float16,0,0.8271733125050863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,64,128,1,fp8,fp8,0,0.5295679966608683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,fp8,0,0.8244746526082357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,64,0,1,fp8,fp8,0,0.7923786640167236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,float16,0,0.3612746795018514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,fp8,0,0.3545973300933838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,64,0,1,fp8,fp8,0,0.7426933447519938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,float16,0,0.7927520275115967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,64,128,1,fp8,fp8,0,0.48315731684366864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,fp8,0,0.782426675160726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,float16,0,0.3708000183105469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,fp8,0,0.36775465806325275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,float16,0,0.8010186354319254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,64,128,1,fp8,fp8,0,0.495034654935201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,fp8,0,0.7981812953948975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,float16,0,0.3812319835027059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,64,0,1,fp8,fp8,0,0.7584693431854248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,float16,0,0.8133440017700195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,fp8,0,0.37726934750874835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,64,128,1,fp8,fp8,0,0.5110559860865275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,fp8,0,0.8016853332519531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,float16,0,0.21050133307774863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,64,0,1,fp8,fp8,0,0.77019731203715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,float16,0,0.4326080083847046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,fp8,0,0.21095999081929526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,64,128,1,fp8,fp8,0,0.28287466367085773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,fp8,0,0.43458131949106854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,64,0,1,fp8,fp8,0,0.39085865020751953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,float16,0,0.19380799929300943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,float16,0,0.41873598098754883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,fp8,0,0.19153066476186117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,64,128,1,fp8,fp8,0,0.25934932629267377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,fp8,0,0.41464531421661377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,64,0,1,fp8,fp8,0,0.37090134620666504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,float16,0,0.19650665918986002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,float16,0,0.4217439889907837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,fp8,0,0.19562133153279623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,64,128,1,fp8,fp8,0,0.2654293378194173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,fp8,0,0.4171626567840576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,64,128,1,fp8,fp8,0,0.2707359989484151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,64,0,1,fp8,fp8,0,0.3739893436431885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,float16,0,0.2012959917386373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,fp8,0,0.2026240030924479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,float16,0,0.4233706792195638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,fp8,0,0.42499200503031415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,64,0,1,fp8,fp8,0,0.38102932771046955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,float16,0,0.11880532900492351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,float16,0,0.2179093360900879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,fp8,0,0.12143466869990031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,64,128,1,fp8,fp8,0,0.16351999839146933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,fp8,0,0.22140800952911377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,64,0,1,fp8,fp8,0,0.21171732743581137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,float16,0,0.11143466830253601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,float16,0,0.20891733964284262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,fp8,0,0.10791466633478801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,64,128,1,fp8,fp8,0,0.14724266529083252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,fp8,0,0.20963199933369955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,64,0,1,fp8,fp8,0,0.1982133388519287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,float16,0,0.11186666289965312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,float16,0,0.21145067612330118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,fp8,0,0.11084799965222676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,64,128,1,fp8,fp8,0,0.1518400013446808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,fp8,0,0.21120532353719076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,64,0,1,fp8,fp8,0,0.2007733384768168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,float16,0,0.11424533526102702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,float16,0,0.21345067024230957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,fp8,0,0.1130560040473938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,64,128,1,fp8,fp8,0,0.15634666879971823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,fp8,0,0.21470399697621664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,64,0,1,fp8,fp8,0,0.20465066035588583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,float16,0,0.0737013320128123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,float16,0,0.12155200044314067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,fp8,0,0.07426133255163829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,64,128,1,fp8,fp8,0,0.10168533523877461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,fp8,0,0.12220266461372375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,64,0,1,fp8,fp8,0,0.1179200013478597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,float16,0,0.06856533388296764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,float16,0,0.11663466691970825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,fp8,0,0.06870399912198384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,64,128,1,fp8,fp8,0,0.08597333232561748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,fp8,0,0.11739200353622437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,64,0,1,fp8,fp8,0,0.11091732978820801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,float16,0,0.06814399858315785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,float16,0,0.1172160009543101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,fp8,0,0.06914666791756947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,64,128,1,fp8,fp8,0,0.08622933427492778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,fp8,0,0.11786666512489319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,64,0,1,fp8,fp8,0,0.1104906698067983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,float16,0,0.06989333530267079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,float16,0,0.11921599507331848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,fp8,0,0.06950399776299794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,64,128,1,fp8,fp8,0,0.09569600224494934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,fp8,0,0.11823466420173645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,64,0,1,fp8,fp8,0,0.11225066582361858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,float16,0,0.05439466734727224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,float16,0,0.07683200140794118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,fp8,0,0.05448000133037567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,64,128,1,fp8,fp8,0,0.06549866497516632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,fp8,0,0.07653333246707916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,64,0,1,fp8,fp8,0,0.0735040009021759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,float16,0,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,float16,0,0.07593599955240886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,fp8,0,0.05442666510740916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,64,128,1,fp8,fp8,0,0.0654666672150294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,fp8,0,0.07688533266385396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,64,0,1,fp8,fp8,0,0.07352533439795177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,float16,0,0.053557331363360085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,float16,0,0.07674133280913036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,fp8,0,0.05438933273156484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,64,128,1,fp8,fp8,0,0.06451199948787689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,fp8,0,0.07714133461316426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,64,0,1,fp8,fp8,0,0.07293333113193512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,float16,0,0.054144000013669334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,float16,0,0.07762133578459422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,fp8,0,0.054101333022117615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,64,128,1,fp8,fp8,0,0.06552533308664958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,fp8,0,0.07701333363850911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,64,0,1,fp8,fp8,0,0.07347733279069264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,float16,0,0.531274676322937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,float16,0,0.964026689529419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,fp8,0,0.5248533487319946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,64,128,1,fp8,fp8,0,0.7019146283467611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,fp8,0,0.9535520076751709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,float16,0,0.5449920097986857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,64,0,1,fp8,fp8,0,0.9043413003285726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,float16,0,0.9748106797536215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,fp8,0,0.5361653168996176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,64,128,1,fp8,fp8,0,0.7148959636688232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,fp8,0,0.9743466377258301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,float16,0,0.5570186773935953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,64,0,1,fp8,fp8,0,0.9155413309733073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,fp8,0,0.551962653795878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,float16,0,0.9921920299530029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,64,128,1,fp8,fp8,0,0.7338240146636963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,fp8,0,0.9881866772969564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,float16,0,0.3046506643295288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,64,0,1,fp8,fp8,0,0.940229336420695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,float16,0,0.5236853361129761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,fp8,0,0.302346666653951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,64,128,1,fp8,fp8,0,0.401589314142863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,fp8,0,0.5261280139287313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,64,0,1,fp8,fp8,0,0.4889333248138428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,float16,0,0.2757226626078288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,float16,0,0.49979201952616376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,fp8,0,0.2714879910151164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,64,128,1,fp8,fp8,0,0.36766401926676434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,fp8,0,0.4941920042037964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,64,0,1,fp8,fp8,0,0.45147732893625897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,float16,0,0.28193066517512005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,64,0,1,fp8,fp8,0,0.4596266746520996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,float16,0,0.5042453209559122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,fp8,0,0.27772267659505206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,64,128,1,fp8,fp8,0,0.3760799964269002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,fp8,0,0.5013013283411661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,float16,0,0.2856266697247823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,float16,0,0.5075573523839315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,fp8,0,0.2858026623725891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,64,128,1,fp8,fp8,0,0.38439468542734784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,fp8,0,0.5048640171686808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,64,0,1,fp8,fp8,0,0.47067201137542725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,float16,0,0.16318399707476297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,float16,0,0.26555200417836505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,fp8,0,0.16587733229001364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,64,128,1,fp8,fp8,0,0.22076799472173056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,fp8,0,0.267685333887736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,64,0,1,fp8,fp8,0,0.23640533288319907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,64,0,1,fp8,fp8,0,0.25464532772699994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,float16,0,0.14936000108718872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,float16,0,0.2510346571604411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,fp8,0,0.1478506624698639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,64,128,1,fp8,fp8,0,0.20275733868281046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,fp8,0,0.2477653423945109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,float16,0,0.15252799789110819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,float16,0,0.2510453263918559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,fp8,0,0.15096533298492432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,64,128,1,fp8,fp8,0,0.20830400784810385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,fp8,0,0.2512586712837219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,64,0,1,fp8,fp8,0,0.24129066864649454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,float16,0,0.1567359964052836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,float16,0,0.2555946707725525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,fp8,0,0.15636266271273294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,64,128,1,fp8,fp8,0,0.2106293241182963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,fp8,0,0.25649599234263104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,64,0,1,fp8,fp8,0,0.24662399291992188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,float16,0,0.09496532877286275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,float16,0,0.14316800236701965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,fp8,0,0.09551999966303508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,64,128,1,fp8,fp8,0,0.13249066472053528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,fp8,0,0.14402666687965393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,64,0,1,fp8,fp8,0,0.139957328637441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,float16,0,0.0864586631457011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,float16,0,0.1355839967727661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,fp8,0,0.08585600058237712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,64,128,1,fp8,fp8,0,0.11065600315729777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,fp8,0,0.13489066561063132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,64,0,1,fp8,fp8,0,0.12868799765904745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,float16,0,0.08916800220807393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,float16,0,0.13496533036231995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,fp8,0,0.08745066324869792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,64,128,1,fp8,fp8,0,0.12134400010108948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,fp8,0,0.13684800267219543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,64,0,1,fp8,fp8,0,0.13198399543762207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,float16,0,0.08988266189893086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,float16,0,0.13849066694577536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,fp8,0,0.09171199798583984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,64,128,1,fp8,fp8,0,0.12441600362459819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,fp8,0,0.13935466607411703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,64,0,1,fp8,fp8,0,0.1349493364493052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,float16,0,0.05717866619427999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,float16,0,0.08333866794904073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,fp8,0,0.056688000758488975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,64,128,1,fp8,fp8,0,0.07923200229803722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,fp8,0,0.08332799871762593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,64,0,1,fp8,fp8,0,0.08025066554546356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,float16,0,0.05509866774082184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,float16,0,0.08141866823037465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,fp8,0,0.055455997586250305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,64,128,1,fp8,fp8,0,0.07256533205509186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,fp8,0,0.08102400104204814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,64,0,1,fp8,fp8,0,0.07717333237330119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,fp8,0,0.08085333307584126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,float16,0,0.05495466788609823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,float16,0,0.08133866886297862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,fp8,0,0.055786664287249245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,64,128,1,fp8,fp8,0,0.07414933542410533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,64,0,1,fp8,fp8,0,0.07743999858697255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,float16,0,0.05552533268928528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,64,0,1,fp8,fp8,0,0.0784746656815211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,float16,0,0.08085333307584126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,fp8,0,0.055434669057528176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,fp8,0,0.044778664906819664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,64,128,1,fp8,fp8,0,0.075162669022878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,fp8,0,0.0823413332303365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,float16,0,0.04853333532810211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,float16,0,0.05830933153629303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,64,128,1,fp8,fp8,0,0.05031999945640564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,fp8,0,0.05816533168156942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,64,0,1,fp8,fp8,0,0.05658666789531708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,float16,0,0.04807466765244802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,float16,0,0.05805333455403646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,fp8,0,0.0452159990866979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,64,128,1,fp8,fp8,0,0.05061866839726766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,fp8,0,0.057487999399503074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,64,0,1,fp8,fp8,0,0.056464001536369324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,float16,0,0.04479999840259552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,float16,0,0.058005332946777344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,fp8,0,0.04435733457406362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,fp8,0,0.0447573314110438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,64,128,1,fp8,fp8,0,0.050885334610939026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,fp8,0,0.05788800120353699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,float16,0,0.04644800225893656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,64,0,1,fp8,fp8,0,0.05670933425426483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,float16,0,0.05771199862162272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,64,128,1,fp8,fp8,0,0.05090666810671488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,fp8,0,0.057818666100502014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,float16,0,1.0130346616109211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,64,0,1,fp8,fp8,0,0.05706666906674703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,float16,0,0.6960000197092692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,fp8,0,0.6872959931691488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,64,128,1,fp8,fp8,0,0.9089972972869873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,fp8,0,0.9960266749064127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,64,0,1,fp8,fp8,0,0.9406399726867676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,float16,0,0.7099893093109131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,float16,0,1.0241119861602783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,fp8,0,0.7006133397420248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,64,128,1,fp8,fp8,0,0.9225653012593588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,fp8,0,1.0158507029215496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,float16,0,0.7248693307240804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,64,0,1,fp8,fp8,0,0.9622080326080322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,float16,0,1.0346613725026448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,fp8,0,0.7190240224202474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,64,128,1,fp8,fp8,0,0.9503520329793295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,float16,0,0.3930399815241496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,fp8,0,1.0308852990468342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,64,0,1,fp8,fp8,0,0.9813973108927408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,float16,0,0.5510186751683553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,fp8,0,0.39583468437194824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,float16,0,0.5225493510564169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,64,128,1,fp8,fp8,0,0.5236320098241171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,fp8,0,0.5559200048446655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,64,0,1,fp8,fp8,0,0.5373599926630656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,float16,0,0.36103467146555585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,fp8,0,0.3528960148493449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,64,128,1,fp8,fp8,0,0.4697120189666748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,fp8,0,0.5156106551488241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,64,0,1,fp8,fp8,0,0.48685868581136066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,float16,0,0.3646453221638997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,float16,0,0.5234080155690511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,fp8,0,0.36186667283376056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,fp8,0,0.3696533441543579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,64,128,1,fp8,fp8,0,0.48231999079386395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,fp8,0,0.5219519933064779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,64,0,1,fp8,fp8,0,0.4930453300476074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,float16,0,0.3721546729405721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,float16,0,0.5317173401514689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,64,128,1,fp8,fp8,0,0.4931573470433553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,64,128,1,fp8,fp8,0,0.27831466992696124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,fp8,0,0.5301119883855184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,64,0,1,fp8,fp8,0,0.5045973459879557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,float16,0,0.20725866158803305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,float16,0,0.28938132524490356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,fp8,0,0.20901866753896078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,fp8,0,0.2903359929720561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,64,0,1,fp8,fp8,0,0.2712799906730652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,float16,0,0.18980799118677774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,float16,0,0.27532800038655597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,fp8,0,0.18771199385325113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,64,128,1,fp8,fp8,0,0.2558666666348775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,fp8,0,0.2720959981282552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,64,0,1,fp8,fp8,0,0.24889065821965536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,float16,0,0.1941386659940084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,float16,0,0.2775040070215861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,fp8,0,0.19219734271367392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,64,128,1,fp8,fp8,0,0.2619946599006653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,fp8,0,0.274725337823232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,64,0,1,fp8,fp8,0,0.254095991452535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,float16,0,0.19663467009862265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,float16,0,0.28149332602818805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,fp8,0,0.19639466206232706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,64,128,1,fp8,fp8,0,0.26705066363016766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,fp8,0,0.28281599283218384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,64,128,1,fp8,fp8,0,0.15979733069737753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,64,0,1,fp8,fp8,0,0.25893332560857135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,float16,0,0.11565867066383362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,float16,0,0.14948800206184387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,fp8,0,0.11781866351763408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,fp8,0,0.15176533659299216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,64,0,1,fp8,fp8,0,0.14827199776967367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,float16,0,0.10620266199111938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,float16,0,0.13967999815940857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,fp8,0,0.10393599669138591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,64,128,1,fp8,fp8,0,0.14358933766682944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,fp8,0,0.13853333393732706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,64,0,1,fp8,fp8,0,0.13429333766301474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,float16,0,0.10733333230018616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,float16,0,0.1420906682809194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,float16,0,0.14417066176732382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,fp8,0,0.10706133643786113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,64,128,1,fp8,fp8,0,0.1467519998550415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,fp8,0,0.13874666889508566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,64,0,1,fp8,fp8,0,0.13638400038083395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,float16,0,0.11023466785748799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,fp8,0,0.11105599999427795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,64,128,1,fp8,fp8,0,0.15266666809717813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,fp8,0,0.1439786652723948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,64,0,1,fp8,fp8,0,0.1409333348274231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,float16,0,0.06850133339564006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,float16,0,0.08348799745241801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,fp8,0,0.07028799752394359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,64,128,1,fp8,fp8,0,0.09851200381914775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,fp8,0,0.08538666367530823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,64,0,1,fp8,fp8,0,0.08526933193206787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,float16,0,0.0647573322057724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,float16,0,0.07970666885375977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,fp8,0,0.06337599953015645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,64,128,1,fp8,fp8,0,0.08054399987061818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,fp8,0,0.07881066699822743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,64,0,1,fp8,fp8,0,0.07680533329645793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,float16,0,0.0651093324025472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,float16,0,0.08003200093905131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,fp8,0,0.06506133576234181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,64,128,1,fp8,fp8,0,0.08206933240095775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,fp8,0,0.07923733194669087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,64,0,1,fp8,fp8,0,0.0759626676638921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,float16,0,0.06634133557478587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,float16,0,0.08072533210118611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,fp8,0,0.06756266454855601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,64,128,1,fp8,fp8,0,0.08538132905960083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,64,128,1,fp8,fp8,0,0.05750399827957153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,fp8,0,0.08152000109354655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,64,0,1,fp8,fp8,0,0.07949333389600118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,float16,0,0.044437333941459656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,float16,0,0.050794666012128196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,fp8,0,0.045941332976023354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,fp8,0,0.05189866820971171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,64,0,1,fp8,fp8,0,0.049733335773150124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,float16,0,0.04409599800904592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,float16,0,0.05068266888459524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,fp8,0,0.04469866553942362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,64,128,1,fp8,fp8,0,0.05483733117580414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,fp8,0,0.0497920016447703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,64,0,1,fp8,fp8,0,0.048298666874567665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,float16,0,0.04455466568470001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,float16,0,0.05049600203831991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,fp8,0,0.04452799757321676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,64,128,1,fp8,fp8,0,0.055946667989095054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,fp8,0,0.05072000126043955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,64,0,1,fp8,fp8,0,0.04806933303674062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,float16,0,0.04465066889921824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,float16,0,0.05053333441416422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,fp8,0,0.045194665590922035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,64,128,1,fp8,fp8,0,0.05650666852792104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,fp8,0,0.051455999414126076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,64,0,1,fp8,fp8,0,0.050186668833096824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,float16,0,0.03483733286460241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,float16,0,0.04423466821511587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,fp8,0,0.033957332372665405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,64,128,1,fp8,fp8,0,0.0407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,fp8,0,0.044351999958356224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,64,0,1,fp8,fp8,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,float16,0,0.0341333324710528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,float16,0,0.044490665197372437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,fp8,0,0.03468266626199087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,64,128,1,fp8,fp8,0,0.04060266663630804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,fp8,0,0.044069334864616394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,64,0,1,fp8,fp8,0,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,float16,0,0.03398400048414866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,float16,0,0.044922664761543274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,fp8,0,0.034517332911491394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,64,128,1,fp8,fp8,0,0.04053333401679993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,fp8,0,0.044826666514078774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,64,0,1,fp8,fp8,0,0.043322667479515076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,64,0,1,fp8,fp8,0,0.04397333165009817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,float16,0,0.03410666684309641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,float16,0,0.044218664367993675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,fp8,0,0.034458667039871216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,64,128,1,fp8,fp8,0,0.041333332657814026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,fp8,0,0.04496000210444132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,float16,0,0.5265920162200928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,float16,0,0.6674346923828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,fp8,0,0.5133119821548462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,64,128,1,fp8,fp8,0,0.6794293721516927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,fp8,0,0.6603413422902426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,64,0,1,fp8,fp8,0,0.619973341623942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,float16,0,0.5335733493169149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,float16,0,0.6779680252075195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,fp8,0,0.5234933296839396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,64,128,1,fp8,fp8,0,0.69705597559611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,fp8,0,0.6672586599985758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,64,0,1,fp8,fp8,0,0.6404053370157877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,float16,0,0.543018658955892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,float16,0,0.6882879734039307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,fp8,0,0.30221333106358844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,fp8,0,0.5440426667531332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,64,128,1,fp8,fp8,0,0.722330649693807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,fp8,0,0.6840799649556478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,float16,0,0.2991039951642354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,64,0,1,fp8,fp8,0,0.6609599987665812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,float16,0,0.37361598014831543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,64,128,1,fp8,fp8,0,0.40214399496714276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,fp8,0,0.37466665108998615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,64,0,1,fp8,fp8,0,0.3583306471506755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,float16,0,0.2729439934094747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,float16,0,0.3481066624323527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,fp8,0,0.26943467060724896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,64,128,1,fp8,fp8,0,0.36027201016743976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,fp8,0,0.343664010365804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,64,0,1,fp8,fp8,0,0.31886933247248334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,float16,0,0.27793065706888836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,float16,0,0.3527146577835083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,fp8,0,0.27617067098617554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,64,128,1,fp8,fp8,0,0.3668160041173299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,fp8,0,0.34902934233347577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,64,0,1,fp8,fp8,0,0.32445333401362103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,float16,0,0.2848586638768514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,float16,0,0.3571893374125163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,fp8,0,0.28355733553568524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,64,128,1,fp8,fp8,0,0.380298654238383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,fp8,0,0.35626665751139325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,64,0,1,fp8,fp8,0,0.3350293238957723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,float16,0,0.16056533654530844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,float16,0,0.19508800903956094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,fp8,0,0.16420800487200418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,64,128,1,fp8,fp8,0,0.21954667568206787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,fp8,0,0.19763733943303427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,64,0,1,fp8,fp8,0,0.18793066342671713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,float16,0,0.14850133657455444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,float16,0,0.17539199193318686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,fp8,0,0.14468266566594443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,64,128,1,fp8,fp8,0,0.19954667488733926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,fp8,0,0.1748853325843811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,64,0,1,fp8,fp8,0,0.1705440084139506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,float16,0,0.14939199884732565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,float16,0,0.1788853406906128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,fp8,0,0.14891200264294943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,64,128,1,fp8,fp8,0,0.2018346587816874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,fp8,0,0.17799466848373413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,64,0,1,fp8,fp8,0,0.17255467176437378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,float16,0,0.15429332852363586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,float16,0,0.18245333433151245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,fp8,0,0.15453333655993143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,64,128,1,fp8,fp8,0,0.20868800083796182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,fp8,0,0.18292266130447388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,64,0,1,fp8,fp8,0,0.17841066916783652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,float16,0,0.0918933351834615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,float16,0,0.10405332843462627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,fp8,0,0.09321600198745728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,64,128,1,fp8,fp8,0,0.12854400277137756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,fp8,0,0.10593066612879436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,64,0,1,fp8,fp8,0,0.10475200414657593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,float16,0,0.08187733093897502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,float16,0,0.09497599800427754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,fp8,0,0.08090133468310039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,64,128,1,fp8,fp8,0,0.10790933171908061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,fp8,0,0.09502399961153667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,64,0,1,fp8,fp8,0,0.09430932998657227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,64,0,1,fp8,fp8,0,0.09274133046468098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,float16,0,0.08416533470153809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,float16,0,0.09599467118581136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,fp8,0,0.08365333080291748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,64,128,1,fp8,fp8,0,0.11171199878056844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,fp8,0,0.0957973301410675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,float16,0,0.08763200044631958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,float16,0,0.09927999973297119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,fp8,0,0.08752533793449402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,64,128,1,fp8,fp8,0,0.1221440037091573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,fp8,0,0.09883200128873189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,fp8,0,0.061306665341059365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,64,0,1,fp8,fp8,0,0.09986666838328044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,float16,0,0.05296533306439718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,float16,0,0.05972266693909963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,fp8,0,0.0547626664241155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,64,128,1,fp8,fp8,0,0.07171733180681865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,64,0,1,fp8,fp8,0,0.060965334375699363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,float16,0,0.05114666620890299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,float16,0,0.05821866790453593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,fp8,0,0.05121066669623057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,64,128,1,fp8,fp8,0,0.06989866495132446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,fp8,0,0.05827199916044871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,64,0,1,fp8,fp8,0,0.05602133274078369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,float16,0,0.05233600238958994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,float16,0,0.058362667759259544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,fp8,0,0.052341332038243614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,64,128,1,fp8,fp8,0,0.06878933310508728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,fp8,0,0.05823466678460439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,64,0,1,fp8,fp8,0,0.05723733206590017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,float16,0,0.052144000927607216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,float16,0,0.058917333682378135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,fp8,0,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,64,128,1,fp8,fp8,0,0.06737066805362701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,fp8,0,0.037946666280428566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,fp8,0,0.0591893345117569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,64,0,1,fp8,fp8,0,0.05829333265622457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,float16,0,0.03773866593837738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,float16,0,0.040005333721637726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,64,128,1,fp8,fp8,0,0.04468800127506256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,fp8,0,0.04091733445723852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,64,0,1,fp8,fp8,0,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,float16,0,0.038405333956082664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,float16,0,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,fp8,0,0.03711466739575068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,64,128,1,fp8,fp8,0,0.04389866689840952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,fp8,0,0.03900266687075297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,64,0,1,fp8,fp8,0,0.036933332681655884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,float16,0,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,float16,0,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,fp8,0,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,64,128,1,fp8,fp8,0,0.044293334086736046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,64,0,1,fp8,fp8,0,0.03794133414824804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,float16,0,0.036501333117485046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,float16,0,0.03928533444801966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,64,128,1,fp8,fp8,0,0.04377066592375437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,64,0,1,fp8,fp8,0,0.038191998998324074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,float16,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,float16,0,0.03792533278465271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,fp8,0,0.03084266682465871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,64,128,1,fp8,fp8,0,0.03746666759252548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,fp8,0,0.03792533278465271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,64,0,1,fp8,fp8,0,0.03666666646798452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,float16,0,0.030805334448814392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,float16,0,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,fp8,0,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,64,128,1,fp8,fp8,0,0.038032000263532005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,fp8,0,0.03702933341264725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,64,0,1,fp8,fp8,0,0.03585600107908249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,float16,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,float16,0,0.03771200031042099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,fp8,0,0.030933332939942677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,64,128,1,fp8,fp8,0,0.03803733239571253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,64,0,1,fp8,fp8,0,0.036618667344252266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,float16,0,0.03057066599527995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,float16,0,0.03689600030581156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,64,128,1,fp8,fp8,0,0.038106667498747505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,fp8,0,0.03697066754102707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,64,0,1,fp8,fp8,0,0.03682666768630346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,float16,0,0.6200053294499716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,float16,0,0.7052160104115804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,64,0,1,fp8,fp8,0,0.6501919825871786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,fp8,0,0.6222506761550903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,64,128,1,fp8,fp8,0,0.8371840318044027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,fp8,0,0.6994400024414062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,float16,0,0.6219573418299357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,float16,0,0.703167994817098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,fp8,0,0.6215733289718628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,fp8,0,0.7038026650746664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,64,128,1,fp8,fp8,0,0.849232037862142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,64,0,1,fp8,fp8,0,0.6641120115915934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,float16,0,0.6379040082295736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,float16,0,0.35233600934346515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,float16,0,0.719210704167684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,fp8,0,0.6371573209762573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,64,128,1,fp8,fp8,0,0.8821120262145996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,fp8,0,0.7159039974212646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,fp8,0,0.3904373248418172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,64,0,1,fp8,fp8,0,0.6903253396352133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,float16,0,0.39377601941426593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,fp8,0,0.34697067737579346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,64,128,1,fp8,fp8,0,0.46642132600148517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,64,0,1,fp8,fp8,0,0.36505067348480225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,float16,0,0.3225066661834717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,float16,0,0.36347198486328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,fp8,0,0.32464534044265747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,64,128,1,fp8,fp8,0,0.43290666739145917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,fp8,0,0.36376531918843585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,64,0,1,fp8,fp8,0,0.3392053445180257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,float16,0,0.3280053337415059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,float16,0,0.36600534121195477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,fp8,0,0.3263466755549113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,64,128,1,fp8,fp8,0,0.43696534633636475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,fp8,0,0.3672746817270915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,64,0,1,fp8,fp8,0,0.34275201956431073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,float16,0,0.33343998591105145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,float16,0,0.3760746717453003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,fp8,0,0.3322719931602478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,64,128,1,fp8,fp8,0,0.4450399875640869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,fp8,0,0.37225067615509033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,64,0,1,fp8,fp8,0,0.3520853519439697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,float16,0,0.19110933939615884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,float16,0,0.2116373380025228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,fp8,0,0.18941332896550497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,64,128,1,fp8,fp8,0,0.24693334102630615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,fp8,0,0.21044800678888956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,64,0,1,fp8,fp8,0,0.187391996383667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,float16,0,0.17622933785120645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,float16,0,0.19611199696858725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,fp8,0,0.1738133430480957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,64,128,1,fp8,fp8,0,0.23784534136454263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,fp8,0,0.19729600350062051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,64,0,1,fp8,fp8,0,0.17805866400400797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,float16,0,0.1786880095799764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,float16,0,0.1989013353983561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,fp8,0,0.17697600523630777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,64,128,1,fp8,fp8,0,0.23834667603174844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,fp8,0,0.19857066869735718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,64,0,1,fp8,fp8,0,0.18080532550811768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,float16,0,0.18036266167958578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,float16,0,0.20282665888468424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,fp8,0,0.18068800369898477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,64,128,1,fp8,fp8,0,0.24380266666412354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,fp8,0,0.2021119991938273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,64,0,1,fp8,fp8,0,0.18255466222763062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,float16,0,0.1085599958896637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,float16,0,0.11317867040634155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,fp8,0,0.10920533537864685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,64,128,1,fp8,fp8,0,0.14229866862297058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,fp8,0,0.1127306620279948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,64,0,1,fp8,fp8,0,0.10427199800809224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,float16,0,0.09844799836476643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,float16,0,0.10356799761454265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,fp8,0,0.09723732868830363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,64,128,1,fp8,fp8,0,0.13541866342226663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,fp8,0,0.10385066270828247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,64,0,1,fp8,fp8,0,0.0976586639881134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,float16,0,0.0997226635615031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,float16,0,0.10443733135859172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,fp8,0,0.10016000270843506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,64,128,1,fp8,fp8,0,0.13542399803797403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,fp8,0,0.10612799723943074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,64,0,1,fp8,fp8,0,0.09973333279291789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,float16,0,0.10339732964833577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,float16,0,0.10867733756701152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,fp8,0,0.10291199882825215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,64,128,1,fp8,fp8,0,0.13770133256912231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,fp8,0,0.10899733503659566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,64,0,1,fp8,fp8,0,0.10038933157920837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,float16,0,0.06379200021425883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,float16,0,0.06453866759936015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,fp8,0,0.06226666768391927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,64,128,1,fp8,fp8,0,0.0860586663087209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,fp8,0,0.06377600133419037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,64,0,1,fp8,fp8,0,0.06200533111890157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,float16,0,0.05871999760468801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,float16,0,0.05930666625499725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,fp8,0,0.058229332168896995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,64,128,1,fp8,fp8,0,0.07657066484292348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,fp8,0,0.059263999263445534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,64,0,1,fp8,fp8,0,0.05611200133959452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,float16,0,0.058890665570894875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,float16,0,0.06020799775918325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,fp8,0,0.059279998143514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,64,128,1,fp8,fp8,0,0.0766133318344752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,fp8,0,0.06055466830730438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,64,0,1,fp8,fp8,0,0.057130664587020874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,float16,0,0.060773332913716636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,float16,0,0.06186666587988535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,fp8,0,0.060378665725390114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,64,128,1,fp8,fp8,0,0.08125866452852885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,fp8,0,0.061893333991368614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,64,0,1,fp8,fp8,0,0.058693334460258484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,float16,0,0.04142399877309799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,float16,0,0.0403413325548172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,fp8,0,0.040720000863075256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,64,128,1,fp8,fp8,0,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,fp8,0,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,64,0,1,fp8,fp8,0,0.0367999995748202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,float16,0,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,float16,0,0.038021333515644073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,64,128,1,fp8,fp8,0,0.050885334610939026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,64,0,1,fp8,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,float16,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,float16,0,0.03804266701141993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,64,128,1,fp8,fp8,0,0.051114668448766075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,fp8,0,0.038405333956082664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,64,0,1,fp8,fp8,0,0.03605333218971888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,float16,0,0.04026666780312856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,float16,0,0.039162665605545044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,fp8,0,0.04040000090996424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,64,128,1,fp8,fp8,0,0.0513973335425059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,fp8,0,0.028309332827727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,fp8,0,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,64,0,1,fp8,fp8,0,0.035749333600203194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,float16,0,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,float16,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,64,128,1,fp8,fp8,0,0.034245334565639496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,64,0,1,fp8,fp8,0,0.028650666276613872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,float16,0,0.026895999908447266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,float16,0,0.028223998844623566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,fp8,0,0.02701333413521449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,fp8,0,0.026714667677879333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,64,128,1,fp8,fp8,0,0.032613334556420646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,fp8,0,0.028602667152881622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,64,0,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,float16,0,0.02651199946800868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,float16,0,0.02897600084543228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,64,128,1,fp8,fp8,0,0.034559999903043113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,fp8,0,0.028789333999156952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,64,0,1,fp8,fp8,0,0.0278613343834877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,float16,0,0.026863999664783478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,float16,0,0.028650666276613872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,fp8,0,0.027797333896160126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,64,128,1,fp8,fp8,0,0.03390933324893316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,fp8,0,0.029861333469549816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,64,0,1,fp8,fp8,0,0.028250666956106823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,float16,0,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,float16,0,0.027999999622503918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,64,128,1,fp8,fp8,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,fp8,0,0.028101332485675812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,64,0,1,fp8,fp8,0,0.02678400029738744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,float16,0,0.02479466547568639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,float16,0,0.026965332527955372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,fp8,0,0.024703999360402424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,64,128,1,fp8,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,64,0,1,fp8,fp8,0,0.025888000925381977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,float16,0,0.024586667617162068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,64,128,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,64,0,1,fp8,fp8,0,0.02619733413060506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,float16,0,0.02456533412138621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,64,128,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,fp8,0,0.028213332096735638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,64,0,1,fp8,fp8,0,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,1,64,128,1,float16,float16,0,0.5912533203760783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,1,64,0,1,float16,float16,0,0.5758399963378906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,2,64,0,1,float16,float16,0,0.5837013324101766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,1,64,128,1,float16,fp8,0,0.589845339457194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,1,64,128,1,fp8,fp8,0,0.7884266376495361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,1,64,0,1,float16,fp8,0,0.5759413242340088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,1,64,0,1,fp8,fp8,0,0.5311359961827596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,2,64,128,1,float16,float16,0,0.5994079907735189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,2,64,128,1,float16,fp8,0,0.5920106569925944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,2,64,128,1,fp8,fp8,0,0.7944107055664062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,2,64,0,1,float16,fp8,0,0.583413322766622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,2,64,0,1,fp8,fp8,0,0.539898673693339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,4,64,128,1,float16,float16,0,0.6097546815872192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,4,64,0,1,float16,float16,0,0.5935253302256266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,4,64,128,1,float16,fp8,0,0.6086613337198893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,4,64,128,1,fp8,fp8,0,0.8291359742482504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,4,64,0,1,float16,fp8,0,0.5947893460591634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,4,64,0,1,fp8,fp8,0,0.5669920047124227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,8,64,128,1,float16,float16,0,0.3371093273162842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,8,64,0,1,float16,float16,0,0.3269546627998352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,8,64,128,1,float16,fp8,0,0.33372799555460614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,8,64,128,1,fp8,fp8,0,0.4350399971008301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,8,64,0,1,float16,fp8,0,0.3246240019798279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,8,64,0,1,fp8,fp8,0,0.3021226723988851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,1,64,128,1,float16,float16,0,0.30746134122212726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,1,64,0,1,float16,float16,0,0.29872000217437744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,1,64,128,1,float16,fp8,0,0.3094346721967061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,1,64,128,1,fp8,fp8,0,0.4078933397928874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,1,64,0,1,float16,fp8,0,0.30105600754419964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,1,64,0,1,fp8,fp8,0,0.27405865987141925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,2,64,128,1,float16,float16,0,0.31256532669067383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,2,64,0,1,float16,float16,0,0.30372265974680585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,2,64,128,1,float16,fp8,0,0.3104693293571472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,2,64,0,1,float16,fp8,0,0.30171199639638263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,2,64,128,1,fp8,fp8,0,0.41384534041086835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,4,64,0,1,float16,fp8,0,0.3079520066579183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,2,64,0,1,fp8,fp8,0,0.2794453303019206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,4,64,128,1,float16,float16,0,0.3178666631380717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,4,64,0,1,float16,float16,0,0.3091040054957072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,4,64,128,1,float16,fp8,0,0.3182719945907593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,4,64,128,1,fp8,fp8,0,0.4232960144678752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,4,64,0,1,fp8,fp8,0,0.28674133618672687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,8,64,128,1,float16,float16,0,0.1846133271853129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,8,64,0,1,float16,float16,0,0.17683732509613037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,8,64,128,1,float16,fp8,0,0.18020800749460855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,8,64,128,1,fp8,fp8,0,0.23412267367045084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,8,64,0,1,float16,fp8,0,0.17633599042892456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,8,64,0,1,fp8,fp8,0,0.15877866744995117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,1,64,128,1,float16,float16,0,0.16852267583211264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,1,64,0,1,float16,float16,0,0.16210132837295532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,1,64,128,1,float16,fp8,0,0.1679733395576477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,1,64,128,1,fp8,fp8,0,0.22257065773010254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,1,64,0,1,float16,fp8,0,0.16271467010180155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,1,64,0,1,fp8,fp8,0,0.1483626663684845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,2,64,128,1,float16,float16,0,0.1707520087560018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,2,64,0,1,float16,float16,0,0.16485333442687988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,4,64,128,1,float16,float16,0,0.17448532581329346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,2,64,128,1,float16,fp8,0,0.16995733976364136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,2,64,128,1,fp8,fp8,0,0.22586133082707724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,2,64,0,1,float16,fp8,0,0.16309866309165955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,2,64,0,1,fp8,fp8,0,0.15050666530927023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,4,64,0,1,float16,float16,0,0.16901334126790366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,4,64,128,1,float16,fp8,0,0.17452265818913779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,4,64,0,1,float16,fp8,0,0.1676959991455078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,4,64,128,1,fp8,fp8,0,0.22847465674082437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,4,64,0,1,fp8,fp8,0,0.15067199865976968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,8,64,128,1,float16,float16,0,0.10418132940928142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,8,64,0,1,float16,float16,0,0.09830400347709656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,8,64,128,1,float16,fp8,0,0.10430399576822917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,8,64,128,1,fp8,fp8,0,0.13387200236320496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,8,64,0,1,float16,fp8,0,0.0972106655438741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,8,64,0,1,fp8,fp8,0,0.09014933307965596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,1,64,128,1,float16,float16,0,0.09353599945704143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,1,64,0,1,float16,float16,0,0.08963200449943542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,1,64,128,1,float16,fp8,0,0.09431999921798706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,1,64,128,1,fp8,fp8,0,0.12682666381200156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,1,64,0,1,float16,fp8,0,0.08965866764386494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,1,64,0,1,fp8,fp8,0,0.0828906645377477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,2,64,128,1,float16,float16,0,0.09573333462079366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,2,64,0,1,float16,float16,0,0.09075733025868733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,2,64,128,1,float16,fp8,0,0.09573866923650105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,2,64,128,1,fp8,fp8,0,0.12621866663297018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,2,64,0,1,float16,fp8,0,0.09051199754079182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,2,64,0,1,fp8,fp8,0,0.08340799808502197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,4,64,128,1,float16,float16,0,0.09894933303197224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,4,64,0,1,float16,float16,0,0.0939466655254364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,4,64,128,1,float16,fp8,0,0.1002346674601237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,4,64,128,1,fp8,fp8,0,0.12783466776212057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,4,64,0,1,float16,fp8,0,0.0928053359190623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,8,64,0,1,float16,fp8,0,0.056032001972198486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,4,64,0,1,fp8,fp8,0,0.08634133140246074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,8,64,128,1,float16,float16,0,0.0613919993241628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,8,64,0,1,float16,float16,0,0.05696000158786774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,8,64,128,1,float16,fp8,0,0.060826669136683144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,8,64,128,1,fp8,fp8,0,0.08481599887212117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,8,64,0,1,fp8,fp8,0,0.05282666782538096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,1,64,128,1,float16,float16,0,0.05609600245952606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,2,64,0,1,float16,float16,0,0.05192000170548757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,1,64,0,1,float16,float16,0,0.05223466455936432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,1,64,128,1,float16,fp8,0,0.05633600056171417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,1,64,128,1,fp8,fp8,0,0.07451733450094859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,1,64,0,1,float16,fp8,0,0.05165866514046987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,1,64,0,1,fp8,fp8,0,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,4,64,128,1,float16,float16,0,0.057914664347966514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,2,64,128,1,float16,float16,0,0.05702400207519531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,2,64,128,1,float16,fp8,0,0.05712000032265981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,2,64,128,1,fp8,fp8,0,0.0727893312772115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,2,64,0,1,float16,fp8,0,0.0531626691420873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,2,64,0,1,fp8,fp8,0,0.048170665899912514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,4,64,0,1,float16,float16,0,0.054101333022117615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,8,64,0,1,float16,float16,0,0.03579733272393545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,8,64,128,1,float16,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,4,64,128,1,float16,fp8,0,0.05782933533191681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,4,64,128,1,fp8,fp8,0,0.07554666697978973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,4,64,0,1,float16,fp8,0,0.05422399938106537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,4,64,0,1,fp8,fp8,0,0.05029866596062978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,8,64,128,1,float16,float16,0,0.04060266663630804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,8,64,128,1,fp8,fp8,0,0.05128000179926554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,8,64,0,1,float16,fp8,0,0.03572800010442734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,8,64,0,1,fp8,fp8,0,0.032373333970705666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,1,64,128,1,float16,float16,0,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,1,64,0,1,float16,float16,0,0.03279466678698858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,1,64,128,1,float16,fp8,0,0.03908266623814901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,1,64,128,1,fp8,fp8,0,0.0507893313964208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,1,64,0,1,float16,fp8,0,0.03262399882078171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,1,64,0,1,fp8,fp8,0,0.031157332162062328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,2,64,128,1,float16,float16,0,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,2,64,0,1,float16,float16,0,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,2,64,128,1,float16,fp8,0,0.03978666663169861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,2,64,128,1,fp8,fp8,0,0.0506933331489563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,2,64,0,1,float16,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,2,64,0,1,fp8,fp8,0,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,4,64,128,1,float16,float16,0,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,4,64,0,1,float16,float16,0,0.034586665530999504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,4,64,128,1,float16,fp8,0,0.04090133309364319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,4,64,128,1,fp8,fp8,0,0.050442665815353394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,4,64,0,1,float16,fp8,0,0.03467733412981033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,4,64,0,1,fp8,fp8,0,0.03205333401759466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,8,64,128,1,float16,float16,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,8,64,0,1,float16,float16,0,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,8,64,128,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,8,64,128,1,fp8,fp8,0,0.0344106654326121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,8,64,0,1,float16,fp8,0,0.025850666066010792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,8,64,0,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,1,64,128,1,float16,float16,0,0.026005332668622334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,1,64,0,1,float16,float16,0,0.024149333437283833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,1,64,128,1,float16,fp8,0,0.026778665681680042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,1,64,128,1,fp8,fp8,0,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,1,64,0,1,float16,fp8,0,0.02420266717672348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,1,64,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,2,64,128,1,float16,float16,0,0.026928000152111053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,2,64,0,1,float16,float16,0,0.024245334168275196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,2,64,128,1,float16,fp8,0,0.026895999908447266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,2,64,128,1,fp8,fp8,0,0.033157333731651306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,2,64,0,1,float16,fp8,0,0.024698667228221893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,2,64,0,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,4,64,128,1,float16,float16,0,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,4,64,0,1,float16,float16,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,4,64,128,1,float16,fp8,0,0.026911998788515728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,4,64,128,1,fp8,fp8,0,0.034341332813103996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,4,64,0,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,4,64,0,1,fp8,fp8,0,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,8,64,128,1,float16,float16,0,0.024362665911515553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,8,64,0,1,float16,float16,0,0.022442666192849476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,8,64,128,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,8,64,128,1,fp8,fp8,0,0.030762667457262676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,8,64,0,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,1,64,128,1,fp8,fp8,0,0.03196800003449122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,1,64,0,1,float16,fp8,0,0.02239466706911723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,8,64,0,1,fp8,fp8,0,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,1,64,128,1,float16,float16,0,0.024325333535671234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,1,64,0,1,float16,float16,0,0.022064000368118286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,1,64,128,1,float16,fp8,0,0.024106666445732117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,1,64,0,1,fp8,fp8,0,0.021744000415007275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,2,64,128,1,float16,float16,0,0.02387733260790507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,2,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,4,64,0,1,float16,float16,0,0.19362666209538779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,2,64,128,1,float16,fp8,0,0.02420799930890401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,2,64,128,1,fp8,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,2,64,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,2,64,0,1,fp8,fp8,0,0.021770666042963665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,4,64,128,1,float16,float16,0,0.024138666689395905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,4,64,128,1,float16,fp8,0,0.024538666009902954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,4,64,128,1,fp8,fp8,0,0.03209066639343897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,4,64,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,4,64,0,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,8,64,128,1,float16,float16,0,0.02367999901374181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,8,64,0,1,float16,float16,0,0.022202665607134502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,8,64,128,1,float16,fp8,0,0.023792001108328503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,8,64,128,1,fp8,fp8,0,0.03033066789309184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,8,64,0,1,float16,fp8,0,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,8,64,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,1,64,128,1,float16,float16,0,0.023610666394233704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,1,64,0,1,float16,float16,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,1,64,128,1,float16,fp8,0,0.024330665667851765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,1,64,128,1,fp8,fp8,0,0.03107200066248576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,2,64,128,1,fp8,fp8,0,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,1,64,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,1,64,0,1,fp8,fp8,0,0.020746666938066483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,2,64,128,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,2,64,0,1,float16,float16,0,0.02203733225663503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,2,64,128,1,float16,fp8,0,0.024197332561016083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,2,64,0,1,float16,fp8,0,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,2,64,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,4,64,128,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,4,64,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,4,64,128,1,float16,fp8,0,0.0242399995525678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,4,64,128,1,fp8,fp8,0,0.03035199890534083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,4,64,0,1,float16,fp8,0,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,4,64,0,1,fp8,fp8,0,0.020746666938066483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,1,64,128,1,float16,float16,0,0.26503467559814453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,1,64,0,1,float16,float16,0,0.25754666328430176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,1,64,128,1,float16,fp8,0,0.26386133829752606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,1,64,128,1,fp8,fp8,0,0.3484479983647664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,1,64,0,1,float16,fp8,0,0.25728533665339154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,1,64,0,1,fp8,fp8,0,0.23884799083073935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,2,64,128,1,float16,float16,0,0.2702293395996094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,2,64,0,1,float16,float16,0,0.2630186676979065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,2,64,128,1,float16,fp8,0,0.26847465833028156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,2,64,128,1,fp8,fp8,0,0.3503306706746419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,2,64,0,1,float16,fp8,0,0.2623093326886495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,2,64,0,1,fp8,fp8,0,0.24231467644373575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,4,64,0,1,float16,float16,0,0.27319467067718506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,4,64,128,1,float16,float16,0,0.2787040074666341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,4,64,128,1,float16,fp8,0,0.27830400069554645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,4,64,128,1,fp8,fp8,0,0.35947732130686444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,4,64,0,1,float16,fp8,0,0.2725546757380168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,8,64,128,1,float16,float16,0,0.16479466358820596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,4,64,0,1,fp8,fp8,0,0.2504319945971171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,8,64,0,1,float16,float16,0,0.16220800081888834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,8,64,128,1,float16,fp8,0,0.16149333119392395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,8,64,128,1,fp8,fp8,0,0.193615992863973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,8,64,0,1,float16,fp8,0,0.1604746679464976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,8,64,0,1,fp8,fp8,0,0.14071466525395712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,1,64,128,1,float16,float16,0,0.14235732952753702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,1,64,0,1,float16,float16,0,0.13869333267211914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,1,64,128,1,float16,fp8,0,0.1411786675453186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,1,64,128,1,fp8,fp8,0,0.18600533405939737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,1,64,0,1,float16,fp8,0,0.13858667016029358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,1,64,0,1,fp8,fp8,0,0.13171733419100443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,2,64,128,1,float16,float16,0,0.14620799819628397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,2,64,0,1,float16,float16,0,0.14312533537546793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,2,64,128,1,float16,fp8,0,0.14509866635004678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,2,64,128,1,fp8,fp8,0,0.18761066595713297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,2,64,0,1,float16,fp8,0,0.14268799622853598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,2,64,0,1,fp8,fp8,0,0.13354667027791342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,4,64,128,1,float16,float16,0,0.15227199594179788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,4,64,0,1,float16,float16,0,0.14973333477973938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,4,64,128,1,float16,fp8,0,0.15124266346295676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,4,64,128,1,fp8,fp8,0,0.19009600083033243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,4,64,0,1,float16,fp8,0,0.14909332990646362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,8,64,128,1,float16,float16,0,0.09193600217501323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,4,64,0,1,fp8,fp8,0,0.1366933286190033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,8,64,0,1,float16,float16,0,0.09115200241406758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,8,64,128,1,float16,fp8,0,0.09078933795293172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,8,64,128,1,fp8,fp8,0,0.10781866312026978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,1,64,128,1,float16,fp8,0,0.07515199979146321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,8,64,0,1,float16,fp8,0,0.0902346670627594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,8,64,0,1,fp8,fp8,0,0.08116266628106435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,1,64,128,1,float16,float16,0,0.07470400134722392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,1,64,0,1,float16,float16,0,0.0740479975938797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,1,64,128,1,fp8,fp8,0,0.10038933157920837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,1,64,0,1,float16,fp8,0,0.07438399891058604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,1,64,0,1,fp8,fp8,0,0.07321600119272868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,2,64,0,1,fp8,fp8,0,0.07625600198904674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,2,64,128,1,float16,float16,0,0.07751466830571492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,4,64,128,1,float16,float16,0,0.08156799773375194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,2,64,0,1,float16,float16,0,0.07630399862925212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,4,64,128,1,float16,fp8,0,0.08102933565775554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,4,64,128,1,fp8,fp8,0,0.10435199737548828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,2,64,128,1,float16,fp8,0,0.07688533266385396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,2,64,128,1,fp8,fp8,0,0.10186133782068889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,2,64,0,1,float16,fp8,0,0.07613333563009898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,4,64,0,1,float16,float16,0,0.08105599880218506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,4,64,0,1,float16,fp8,0,0.07904533545176189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,4,64,0,1,fp8,fp8,0,0.07776533563931783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,8,64,128,1,float16,float16,0,0.048101335763931274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,8,64,0,1,float16,float16,0,0.04698666433493296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,8,64,128,1,float16,fp8,0,0.04725333551565806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,8,64,128,1,fp8,fp8,0,0.06266133487224579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,8,64,0,1,float16,fp8,0,0.046442667643229164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,1,64,0,1,fp8,fp8,0,0.04345066845417023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,8,64,0,1,fp8,fp8,0,0.04866666595141093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,2,64,0,1,float16,float16,0,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,1,64,128,1,float16,float16,0,0.042165334026018776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,1,64,0,1,float16,float16,0,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,1,64,128,1,float16,fp8,0,0.043178667624791466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,1,64,128,1,fp8,fp8,0,0.05669333537419637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,1,64,0,1,float16,fp8,0,0.041984001795450844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,2,64,128,1,float16,float16,0,0.04354133208592733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,2,64,128,1,float16,fp8,0,0.044026667873064675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,2,64,128,1,fp8,fp8,0,0.05682133138179779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,2,64,0,1,float16,fp8,0,0.04248533149560293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,2,64,0,1,fp8,fp8,0,0.044346665342648826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,4,64,128,1,float16,float16,0,0.04529066880544027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,4,64,0,1,float16,float16,0,0.04456000030040741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,4,64,128,1,float16,fp8,0,0.04461333155632019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,4,64,128,1,fp8,fp8,0,0.05850133299827576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,4,64,0,1,float16,fp8,0,0.04460800190766653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,4,64,0,1,fp8,fp8,0,0.04623466730117798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,8,64,128,1,float16,float16,0,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,8,64,0,1,float16,float16,0,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,8,64,128,1,float16,fp8,0,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,8,64,128,1,fp8,fp8,0,0.03656533360481262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,1,64,128,1,fp8,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,8,64,0,1,float16,fp8,0,0.03260799994071325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,8,64,0,1,fp8,fp8,0,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,1,64,128,1,float16,float16,0,0.03121600051720937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,1,64,0,1,float16,float16,0,0.030938667555650074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,1,64,128,1,float16,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,1,64,0,1,float16,fp8,0,0.03073066721359889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,1,64,0,1,fp8,fp8,0,0.028938665986061096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,2,64,128,1,float16,float16,0,0.03219733387231827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,2,64,0,1,float16,float16,0,0.03073599934577942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,2,64,128,1,float16,fp8,0,0.03194666653871536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,4,64,128,1,float16,fp8,0,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,2,64,128,1,fp8,fp8,0,0.0359253336985906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,2,64,0,1,float16,fp8,0,0.03173866619666418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,2,64,0,1,fp8,fp8,0,0.029088000456492107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,4,64,128,1,float16,float16,0,0.03286933402220408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,8,64,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,4,64,0,1,float16,float16,0,0.03215466688076655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,4,64,128,1,fp8,fp8,0,0.03642133375008901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,4,64,0,1,float16,fp8,0,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,4,64,0,1,fp8,fp8,0,0.029711998999118805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,8,64,128,1,float16,float16,0,0.021717332303524017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,8,64,128,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,8,64,128,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,8,64,0,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,8,64,0,1,fp8,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,1,64,128,1,float16,float16,0,0.01960533360640208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,1,64,0,1,float16,float16,0,0.01978133370478948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,1,64,128,1,float16,fp8,0,0.020303999384244282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,1,64,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,1,64,0,1,float16,fp8,0,0.019706666469573975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,1,64,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,2,64,128,1,float16,float16,0,0.02027200038234393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,2,64,0,1,float16,float16,0,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,2,64,128,1,float16,fp8,0,0.02024000013868014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,2,64,128,1,fp8,fp8,0,0.024661332368850708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,2,64,0,1,float16,fp8,0,0.02041600023706754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,2,64,0,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,4,64,128,1,float16,float16,0,0.020410666863123577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,4,64,0,1,float16,float16,0,0.02070933332045873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,4,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,4,64,128,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,4,64,0,1,float16,fp8,0,0.020538666596015293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,4,64,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,8,64,128,1,float16,float16,0,0.017871999492247898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,8,64,0,1,float16,float16,0,0.018042666216691334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,8,64,128,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,8,64,128,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,8,64,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,8,64,0,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,1,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,1,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,1,64,128,1,float16,fp8,0,0.017797333498795826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,1,64,128,1,fp8,fp8,0,0.02274666726589203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,1,64,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,1,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,2,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,2,64,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,2,64,128,1,float16,fp8,0,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,2,64,128,1,fp8,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,2,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,2,64,0,1,fp8,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,4,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,4,64,0,1,float16,float16,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,4,64,128,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,4,64,128,1,fp8,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,4,64,0,1,float16,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,4,64,0,1,fp8,fp8,0,0.019802667200565338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,8,64,128,1,float16,float16,0,0.016373333831628162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,8,64,0,1,float16,float16,0,0.015749332805474598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,8,64,128,1,float16,fp8,0,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,8,64,128,1,fp8,fp8,0,0.022154666483402252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,8,64,0,1,float16,fp8,0,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,1,64,0,1,float16,fp8,0,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,8,64,0,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,1,64,128,1,float16,float16,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,1,64,0,1,float16,float16,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,1,64,128,1,float16,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,1,64,128,1,fp8,fp8,0,0.021984001000722248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,1,64,0,1,fp8,fp8,0,0.01855466639002164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,2,64,128,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,2,64,0,1,float16,float16,0,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,2,64,128,1,float16,fp8,0,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,2,64,128,1,fp8,fp8,0,0.02197866638501485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,2,64,0,1,float16,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,2,64,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,4,64,128,1,float16,float16,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,4,64,0,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,4,64,128,1,float16,fp8,0,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,4,64,128,1,fp8,fp8,0,0.02231466770172119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,4,64,0,1,float16,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,4,64,0,1,fp8,fp8,0,0.018138666947682697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,8,64,128,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,8,64,0,1,float16,float16,0,0.015664000064134598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,8,64,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,8,64,128,1,fp8,fp8,0,0.022090665996074677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,8,64,0,1,float16,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,8,64,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,1,64,128,1,float16,float16,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,1,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,1,64,128,1,float16,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,1,64,128,1,fp8,fp8,0,0.022106667359670002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,1,64,0,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,1,64,0,1,fp8,fp8,0,0.01810666670401891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,2,64,128,1,float16,float16,0,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,2,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,2,64,128,1,float16,fp8,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,2,64,128,1,fp8,fp8,0,0.021882665654023487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,2,64,0,1,float16,fp8,0,0.01588800052801768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,2,64,0,1,fp8,fp8,0,0.01830400029818217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,4,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,4,64,0,1,float16,float16,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,4,64,128,1,float16,fp8,0,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,4,64,128,1,fp8,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,4,64,0,1,float16,fp8,0,0.016202667107184727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,4,64,0,1,fp8,fp8,0,0.017877332866191864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,1,64,128,1,float16,float16,0,0.10437333583831787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,1,64,0,1,float16,float16,0,0.10431466499964397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,1,64,128,1,float16,fp8,0,0.10387733578681946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,1,64,128,1,fp8,fp8,0,0.13004266222318014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,1,64,0,1,float16,fp8,0,0.10403199990590413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,1,64,0,1,fp8,fp8,0,0.1281066636244456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,2,64,128,1,float16,float16,0,0.10708799958229065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,2,64,0,1,float16,float16,0,0.1071519951025645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,2,64,128,1,float16,fp8,0,0.10629866520563762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,2,64,128,1,fp8,fp8,0,0.13000532984733582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,2,64,0,1,float16,fp8,0,0.10559999942779541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,2,64,0,1,fp8,fp8,0,0.12983466188112894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,4,64,0,1,float16,float16,0,0.11286399761835735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,4,64,128,1,float16,float16,0,0.1129866639773051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,4,64,128,1,float16,fp8,0,0.11168000102043152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,8,64,128,1,float16,fp8,0,0.07050666709740956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,4,64,128,1,fp8,fp8,0,0.1377226710319519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,4,64,0,1,float16,fp8,0,0.11126400033632915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,8,64,128,1,float16,float16,0,0.07260266443093617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,4,64,0,1,fp8,fp8,0,0.13846400380134583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,8,64,0,1,float16,float16,0,0.07283199826876323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,8,64,128,1,fp8,fp8,0,0.07932266592979431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,8,64,0,1,float16,fp8,0,0.07106666763623555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,8,64,0,1,fp8,fp8,0,0.0793333351612091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,1,64,128,1,float16,float16,0,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,1,64,0,1,float16,float16,0,0.05680533250172933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,1,64,128,1,float16,fp8,0,0.056159997979799904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,1,64,128,1,fp8,fp8,0,0.07342400153477986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,1,64,0,1,float16,fp8,0,0.05653866628805796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,1,64,0,1,fp8,fp8,0,0.07336000104745229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,2,64,128,1,float16,float16,0,0.059061333537101746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,2,64,0,1,fp8,fp8,0,0.07398933172225952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,2,64,0,1,float16,float16,0,0.0580320010582606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,2,64,128,1,float16,fp8,0,0.05827199916044871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,2,64,128,1,fp8,fp8,0,0.0738453318675359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,2,64,0,1,float16,fp8,0,0.058304001887639366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,4,64,128,1,float16,float16,0,0.06406933565934499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,4,64,0,1,float16,float16,0,0.0637546678384145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,4,64,128,1,float16,fp8,0,0.06248533229033152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,4,64,128,1,fp8,fp8,0,0.07790933549404144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,4,64,0,1,float16,fp8,0,0.06244266529877981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,4,64,0,1,fp8,fp8,0,0.07776000102361043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,8,64,128,1,float16,float16,0,0.0391839991013209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,8,64,0,1,float16,float16,0,0.03783999880154928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,1,64,0,1,float16,float16,0,0.03320533285538355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,8,64,128,1,float16,fp8,0,0.038005332152048744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,8,64,128,1,fp8,fp8,0,0.04867733518282572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,8,64,0,1,float16,fp8,0,0.0369759996732076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,8,64,0,1,fp8,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,1,64,128,1,float16,float16,0,0.03313066562016805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,1,64,128,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,1,64,128,1,fp8,fp8,0,0.04301333427429199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,1,64,0,1,float16,fp8,0,0.03317866722742716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,1,64,0,1,fp8,fp8,0,0.04303466777006785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,2,64,128,1,float16,float16,0,0.03422933320204417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,2,64,0,1,float16,float16,0,0.03390933324893316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,2,64,128,1,float16,fp8,0,0.03383466601371765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,2,64,128,1,fp8,fp8,0,0.04359466830889384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,2,64,0,1,float16,fp8,0,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,2,64,0,1,fp8,fp8,0,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,4,64,128,1,float16,float16,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,4,64,0,1,float16,float16,0,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,4,64,128,1,float16,fp8,0,0.035717333356539406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,8,64,128,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,4,64,128,1,fp8,fp8,0,0.04600533346335093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,4,64,0,1,float16,fp8,0,0.03469333300987879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,4,64,0,1,fp8,fp8,0,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,8,64,128,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,8,64,0,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,8,64,128,1,fp8,fp8,0,0.029738667110602062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,8,64,0,1,float16,fp8,0,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,8,64,0,1,fp8,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,1,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,1,64,0,1,float16,float16,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,1,64,128,1,float16,fp8,0,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,1,64,128,1,fp8,fp8,0,0.028938665986061096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,1,64,0,1,float16,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,1,64,0,1,fp8,fp8,0,0.02903999884923299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,2,64,128,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,2,64,0,1,float16,float16,0,0.023541333774725597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,2,64,128,1,float16,fp8,0,0.023685333629449207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,2,64,128,1,fp8,fp8,0,0.028698667883872986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,2,64,0,1,float16,fp8,0,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,2,64,0,1,fp8,fp8,0,0.029045333464940388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,4,64,128,1,float16,float16,0,0.02474133421977361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,4,64,0,1,float16,float16,0,0.024933333198229473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,4,64,128,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,4,64,128,1,fp8,fp8,0,0.029909332593282063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,4,64,0,1,float16,fp8,0,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,4,64,0,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,8,64,128,1,float16,float16,0,0.018229333062966663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,8,64,0,1,float16,float16,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,8,64,128,1,float16,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,8,64,128,1,fp8,fp8,0,0.02239466706911723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,8,64,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,8,64,0,1,fp8,fp8,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,1,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,1,64,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,1,64,128,1,float16,fp8,0,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,1,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,1,64,0,1,float16,fp8,0,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,1,64,0,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,2,64,128,1,float16,float16,0,0.01788266624013583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,2,64,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,2,64,128,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,2,64,128,1,fp8,fp8,0,0.0207893339296182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,2,64,0,1,float16,fp8,0,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,2,64,0,1,fp8,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,4,64,128,1,float16,float16,0,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,4,64,0,1,float16,float16,0,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,4,64,128,1,float16,fp8,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,4,64,128,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,4,64,0,1,float16,fp8,0,0.01806933308641116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,4,64,0,1,fp8,fp8,0,0.021514666577180225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,8,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,8,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,8,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,8,64,128,1,fp8,fp8,0,0.02011200040578842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,8,64,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,8,64,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,1,64,128,1,float16,float16,0,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,1,64,0,1,float16,float16,0,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,1,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,1,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,1,64,0,1,float16,fp8,0,0.014639999717473984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,1,64,0,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,2,64,128,1,float16,float16,0,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,2,64,0,1,float16,float16,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,2,64,128,1,float16,fp8,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,2,64,128,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,2,64,0,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,2,64,0,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,4,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,4,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,4,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,4,64,128,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,4,64,0,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,4,64,0,1,fp8,fp8,0,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,8,64,128,1,float16,float16,0,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,8,64,0,1,float16,float16,0,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,8,64,128,1,float16,fp8,0,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,8,64,128,1,fp8,fp8,0,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,8,64,0,1,float16,fp8,0,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,8,64,0,1,fp8,fp8,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,1,64,128,1,float16,float16,0,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,1,64,0,1,float16,float16,0,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,1,64,128,1,float16,fp8,0,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,1,64,128,1,fp8,fp8,0,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,2,64,128,1,float16,fp8,0,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,1,64,0,1,float16,fp8,0,0.01403733342885971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,1,64,0,1,fp8,fp8,0,0.01821333294113477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,2,64,128,1,float16,float16,0,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,2,64,0,1,float16,float16,0,0.013573333621025085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,2,64,128,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,2,64,0,1,float16,fp8,0,0.014661333213249842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,2,64,0,1,fp8,fp8,0,0.01851733277241389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,4,64,128,1,float16,float16,0,0.013786666095256805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,4,64,0,1,float16,float16,0,0.01368533323208491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,4,64,128,1,float16,fp8,0,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,4,64,128,1,fp8,fp8,0,0.018432000031073887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,4,64,0,1,float16,fp8,0,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,4,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,8,64,128,1,float16,float16,0,0.013722666849692663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,8,64,0,1,float16,float16,0,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,8,64,128,1,float16,fp8,0,0.013562666873137156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,8,64,128,1,fp8,fp8,0,0.017743999759356182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,8,64,0,1,float16,fp8,0,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,8,64,0,1,fp8,fp8,0,0.017781333376963932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,1,64,128,1,float16,float16,0,0.013605333864688873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,1,64,0,1,float16,float16,0,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,1,64,128,1,float16,fp8,0,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,1,64,128,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,1,64,0,1,float16,fp8,0,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,1,64,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,2,64,128,1,float16,float16,0,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,2,64,0,1,float16,float16,0,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,2,64,128,1,float16,fp8,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,2,64,128,1,fp8,fp8,0,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,2,64,0,1,float16,fp8,0,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,2,64,0,1,fp8,fp8,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,4,64,128,1,float16,float16,0,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,4,64,0,1,float16,float16,0,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,4,64,128,1,float16,fp8,0,0.013349333157142004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,4,64,128,1,fp8,fp8,0,0.017765333255132038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,4,64,0,1,float16,fp8,0,0.014053333550691605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,4,64,0,1,fp8,fp8,0,0.018016000588734944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,8,64,128,1,float16,float16,0,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,8,64,0,1,float16,float16,0,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,8,64,128,1,float16,fp8,0,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,8,64,128,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,8,64,0,1,float16,fp8,0,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,1,64,0,1,float16,fp8,0,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,8,64,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,1,64,128,1,float16,float16,0,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,1,64,0,1,float16,float16,0,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,1,64,128,1,float16,fp8,0,0.013461332768201828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,1,64,128,1,fp8,fp8,0,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,1,64,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,2,64,128,1,float16,float16,0,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,2,64,0,1,float16,float16,0,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,2,64,128,1,float16,fp8,0,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,2,64,128,1,fp8,fp8,0,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,2,64,0,1,float16,fp8,0,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,2,64,0,1,fp8,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,4,64,128,1,float16,float16,0,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,4,64,0,1,float16,float16,0,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,4,64,128,1,float16,fp8,0,0.013477332890033722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,4,64,128,1,fp8,fp8,0,0.017797333498795826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,4,64,0,1,float16,fp8,0,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,4,64,0,1,fp8,fp8,0,0.018042666216691334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,1,64,128,1,float16,float16,0,0.05379199981689453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,1,64,0,1,float16,float16,0,0.05346666773160299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,1,64,128,1,float16,fp8,0,0.05332266787687937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,1,64,128,1,fp8,fp8,0,0.0939520001411438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,1,64,0,1,float16,fp8,0,0.05309333403905233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,1,64,0,1,fp8,fp8,0,0.09537067015965779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,2,64,128,1,float16,float16,0,0.05508266886075338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,2,64,0,1,float16,float16,0,0.05454400181770325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,2,64,128,1,float16,fp8,0,0.054885332783063255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,2,64,128,1,fp8,fp8,0,0.09670399626096089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,2,64,0,1,float16,fp8,0,0.055215999484062195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,2,64,0,1,fp8,fp8,0,0.0953546663125356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,4,64,128,1,float16,float16,0,0.059664001067479454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,4,64,0,1,float16,float16,0,0.059690664211908974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,4,64,128,1,float16,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,4,64,128,1,fp8,fp8,0,0.09985066453615825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,4,64,0,1,float16,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,8,64,128,1,fp8,fp8,0,0.05896000067392985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,4,64,0,1,fp8,fp8,0,0.10122133294741313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,8,64,128,1,float16,float16,0,0.03621866554021835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,8,64,0,1,float16,float16,0,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,8,64,128,1,float16,fp8,0,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,8,64,0,1,float16,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,8,64,0,1,fp8,fp8,0,0.058821335434913635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,1,64,128,1,float16,float16,0,0.03180266668399175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,1,64,0,1,float16,float16,0,0.032272001107533775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,1,64,128,1,float16,fp8,0,0.03206400076548258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,1,64,128,1,fp8,fp8,0,0.05354666709899902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,1,64,0,1,float16,fp8,0,0.03172266731659571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,1,64,0,1,fp8,fp8,0,0.05403733253479004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,2,64,128,1,float16,float16,0,0.03259200106064478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,2,64,0,1,float16,float16,0,0.032058666149775185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,4,64,128,1,float16,float16,0,0.033626665671666466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,2,64,128,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,2,64,128,1,fp8,fp8,0,0.05467733244101206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,2,64,0,1,float16,fp8,0,0.03233066697915395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,2,64,0,1,fp8,fp8,0,0.05435733497142792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,4,64,0,1,float16,float16,0,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,8,64,128,1,float16,float16,0,0.022613334159056347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,4,64,128,1,float16,fp8,0,0.032816000282764435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,4,64,128,1,fp8,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,4,64,0,1,float16,fp8,0,0.03402666747570038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,4,64,0,1,fp8,fp8,0,0.05712000032265981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,8,64,0,1,float16,float16,0,0.02219199885924657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,8,64,128,1,float16,fp8,0,0.02276800076166789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,8,64,128,1,fp8,fp8,0,0.03497066597143809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,8,64,0,1,float16,fp8,0,0.021754667162895203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,8,64,0,1,fp8,fp8,0,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,1,64,128,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,1,64,0,1,float16,float16,0,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,1,64,128,1,float16,fp8,0,0.020682666450738907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,1,64,128,1,fp8,fp8,0,0.034287999073664345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,1,64,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,1,64,0,1,fp8,fp8,0,0.03382399926582972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,2,64,128,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,2,64,0,1,float16,float16,0,0.02201066662867864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,2,64,128,1,float16,fp8,0,0.022042666872342426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,2,64,128,1,fp8,fp8,0,0.03428266694148382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,2,64,0,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,2,64,0,1,fp8,fp8,0,0.03453333427508672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,4,64,128,1,float16,float16,0,0.021935999393463135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,4,64,0,1,float16,float16,0,0.02233600119749705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,8,64,0,1,float16,float16,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,8,64,128,1,float16,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,4,64,128,1,float16,fp8,0,0.0222080002228419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,4,64,128,1,fp8,fp8,0,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,8,64,0,1,fp8,fp8,0,0.024586667617162068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,4,64,0,1,float16,fp8,0,0.022800001005331676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,4,64,0,1,fp8,fp8,0,0.034661332766215004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,8,64,128,1,float16,float16,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,1,64,128,1,fp8,fp8,0,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,8,64,128,1,fp8,fp8,0,0.024933333198229473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,8,64,0,1,float16,fp8,0,0.01783466711640358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,1,64,128,1,float16,float16,0,0.016384000579516094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,1,64,0,1,float16,float16,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,1,64,128,1,float16,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,1,64,0,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,1,64,0,1,fp8,fp8,0,0.023685333629449207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,2,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,2,64,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,2,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,2,64,128,1,fp8,fp8,0,0.023925334215164185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,4,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,2,64,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,2,64,0,1,fp8,fp8,0,0.023775999744733173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,8,64,128,1,float16,float16,0,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,4,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,4,64,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,4,64,128,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,4,64,128,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,4,64,0,1,fp8,fp8,0,0.024570666253566742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,8,64,0,1,float16,float16,0,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,8,64,128,1,float16,fp8,0,0.014032000054915747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,8,64,128,1,fp8,fp8,0,0.02027733375628789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,8,64,0,1,float16,fp8,0,0.01393066719174385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,8,64,0,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,1,64,0,1,fp8,fp8,0,0.019808000574509304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,1,64,128,1,float16,float16,0,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,1,64,0,1,float16,float16,0,0.013909333695967993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,1,64,128,1,float16,fp8,0,0.0136266661187013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,1,64,128,1,fp8,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,2,64,0,1,float16,fp8,0,0.014490666488806406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,1,64,0,1,float16,fp8,0,0.013951999445756277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,2,64,128,1,float16,float16,0,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,2,64,0,1,float16,float16,0,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,2,64,128,1,float16,fp8,0,0.01422400027513504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,2,64,128,1,fp8,fp8,0,0.0199946661790212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,2,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,4,64,128,1,float16,float16,0,0.013861333330472311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,4,64,0,1,float16,float16,0,0.013888000200192133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,4,64,128,1,float16,fp8,0,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,4,64,128,1,fp8,fp8,0,0.020117333779732387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,4,64,0,1,float16,fp8,0,0.013909333695967993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,4,64,0,1,fp8,fp8,0,0.019871999820073444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,8,64,128,1,float16,float16,0,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,8,64,0,1,float16,float16,0,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,8,64,128,1,float16,fp8,0,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,8,64,128,1,fp8,fp8,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,8,64,0,1,float16,fp8,0,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,8,64,0,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,1,64,128,1,float16,float16,0,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,1,64,0,1,float16,float16,0,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,1,64,128,1,float16,fp8,0,0.012346666306257248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,1,64,128,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,1,64,0,1,float16,fp8,0,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,1,64,0,1,fp8,fp8,0,0.018085333208243053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,2,64,128,1,float16,float16,0,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,2,64,0,1,float16,float16,0,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,2,64,128,1,float16,fp8,0,0.01339200014869372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,2,64,128,1,fp8,fp8,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,2,64,0,1,float16,fp8,0,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,2,64,0,1,fp8,fp8,0,0.0179626668492953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,4,64,128,1,float16,float16,0,0.012351999680201212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,4,64,0,1,float16,float16,0,0.012538666526476542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,4,64,128,1,float16,fp8,0,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,4,64,128,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,4,64,0,1,float16,fp8,0,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,4,64,0,1,fp8,fp8,0,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,8,64,128,1,float16,float16,0,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,8,64,0,1,float16,float16,0,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,8,64,128,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,8,64,128,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,8,64,0,1,float16,fp8,0,0.012543999900420507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,8,64,0,1,fp8,fp8,0,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,1,64,128,1,float16,float16,0,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,1,64,0,1,float16,float16,0,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,1,64,128,1,float16,fp8,0,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,1,64,128,1,fp8,fp8,0,0.017792000124851864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,1,64,0,1,float16,fp8,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,1,64,0,1,fp8,fp8,0,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,2,64,128,1,float16,float16,0,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,2,64,0,1,float16,float16,0,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,2,64,128,1,float16,fp8,0,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,2,64,128,1,fp8,fp8,0,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,2,64,0,1,float16,fp8,0,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,2,64,0,1,fp8,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,4,64,128,1,float16,float16,0,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,4,64,0,1,float16,float16,0,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,4,64,128,1,float16,fp8,0,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,4,64,128,1,fp8,fp8,0,0.01770666614174843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,4,64,0,1,float16,fp8,0,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,4,64,0,1,fp8,fp8,0,0.01812800019979477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,8,64,128,1,float16,float16,0,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,8,64,0,1,float16,float16,0,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,8,64,128,1,float16,fp8,0,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,8,64,128,1,fp8,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,8,64,0,1,float16,fp8,0,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,8,64,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,1,64,128,1,float16,float16,0,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,1,64,0,1,float16,float16,0,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,1,64,128,1,float16,fp8,0,0.011999999483426413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,1,64,128,1,fp8,fp8,0,0.017594666530688603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,1,64,0,1,float16,fp8,0,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,2,64,0,1,fp8,fp8,0,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,1,64,0,1,fp8,fp8,0,0.01777600000301997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,2,64,128,1,float16,float16,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,2,64,0,1,float16,float16,0,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,2,64,128,1,float16,fp8,0,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,2,64,128,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,2,64,0,1,float16,fp8,0,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,4,64,128,1,float16,float16,0,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,4,64,0,1,float16,float16,0,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,4,64,128,1,float16,fp8,0,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,4,64,128,1,fp8,fp8,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,4,64,0,1,float16,fp8,0,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,4,64,0,1,fp8,fp8,0,0.017935999979575474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,8,64,128,1,float16,float16,0,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,8,64,0,1,float16,float16,0,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,8,64,128,1,float16,fp8,0,0.011920000116030375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,8,64,128,1,fp8,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,8,64,0,1,float16,fp8,0,0.012448000411192576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,8,64,0,1,fp8,fp8,0,0.017957333475351334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,1,64,128,1,float16,float16,0,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,1,64,0,1,float16,float16,0,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,1,64,128,1,float16,fp8,0,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,1,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,1,64,0,1,float16,fp8,0,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,1,64,0,1,fp8,fp8,0,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,2,64,128,1,float16,float16,0,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,2,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,2,64,128,1,float16,fp8,0,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,2,64,128,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,2,64,0,1,float16,fp8,0,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,2,64,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,4,64,128,1,float16,float16,0,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,4,64,0,1,float16,float16,0,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,1,64,128,1,float16,float16,0,0.034490667283535004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,4,64,128,1,float16,fp8,0,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,1,64,128,1,float16,fp8,0,0.03431999931732813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,4,64,128,1,fp8,fp8,0,0.017610666652520496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,4,64,0,1,float16,fp8,0,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,4,64,0,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,1,64,0,1,float16,float16,0,0.03390933324893316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,1,64,128,1,fp8,fp8,0,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,1,64,0,1,float16,fp8,0,0.03425599883000056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,1,64,0,1,fp8,fp8,0,0.07889600098133087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,2,64,128,1,float16,float16,0,0.03479466587305069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,2,64,0,1,float16,float16,0,0.03498133271932602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,2,64,128,1,float16,fp8,0,0.03426666557788849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,2,64,128,1,fp8,fp8,0,0.07905599971612294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,2,64,0,1,float16,fp8,0,0.034416000048319496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,2,64,0,1,fp8,fp8,0,0.07912000020345052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,4,64,128,1,float16,float16,0,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,4,64,0,1,float16,fp8,0,0.03594133257865906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,4,64,0,1,float16,float16,0,0.03651199986537298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,4,64,128,1,float16,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,4,64,128,1,fp8,fp8,0,0.08092799782752991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,4,64,0,1,fp8,fp8,0,0.08099199831485748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,8,64,128,1,float16,float16,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,8,64,0,1,float16,float16,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,8,64,128,1,float16,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,8,64,128,1,fp8,fp8,0,0.048751999934514366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,8,64,0,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,8,64,0,1,fp8,fp8,0,0.04783466458320618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,1,64,128,1,float16,float16,0,0.022426667312781017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,1,64,0,1,float16,float16,0,0.022309333086013794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,1,64,128,1,float16,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,1,64,128,1,fp8,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,1,64,0,1,float16,fp8,0,0.022789334257443745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,1,64,0,1,fp8,fp8,0,0.04747200012207031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,2,64,128,1,float16,float16,0,0.022570667167504627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,2,64,0,1,float16,float16,0,0.02274666726589203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,2,64,128,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,2,64,128,1,fp8,fp8,0,0.04795733094215393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,4,64,128,1,float16,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,2,64,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,2,64,0,1,fp8,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,4,64,128,1,float16,float16,0,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,4,64,0,1,float16,float16,0,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,4,64,128,1,fp8,fp8,0,0.048565333088239036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,4,64,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,4,64,0,1,fp8,fp8,0,0.04837333162625631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,8,64,128,1,float16,float16,0,0.016069332758585613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,8,64,0,1,float16,float16,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,8,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,8,64,128,1,fp8,fp8,0,0.031114667654037476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,8,64,0,1,float16,fp8,0,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,8,64,0,1,fp8,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,1,64,128,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,1,64,0,1,float16,float16,0,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,1,64,128,1,float16,fp8,0,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,1,64,128,1,fp8,fp8,0,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,1,64,0,1,float16,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,1,64,0,1,fp8,fp8,0,0.03036266565322876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,2,64,128,1,float16,float16,0,0.01578666642308235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,2,64,0,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,2,64,128,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,2,64,128,1,fp8,fp8,0,0.030906667311986286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,2,64,0,1,float16,fp8,0,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,2,64,0,1,fp8,fp8,0,0.031045332551002502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,4,64,128,1,float16,float16,0,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,4,64,0,1,float16,float16,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,4,64,128,1,float16,fp8,0,0.016341333587964375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,4,64,128,1,fp8,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,4,64,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,4,64,0,1,fp8,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,8,64,128,1,float16,float16,0,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,8,64,0,1,float16,float16,0,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,8,64,128,1,float16,fp8,0,0.014165333161751429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,8,64,128,1,fp8,fp8,0,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,1,64,128,1,fp8,fp8,0,0.022858666876951855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,8,64,0,1,float16,fp8,0,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,8,64,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,1,64,128,1,float16,float16,0,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,1,64,0,1,float16,float16,0,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,1,64,128,1,float16,fp8,0,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,1,64,0,1,float16,fp8,0,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,1,64,0,1,fp8,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,2,64,128,1,float16,float16,0,0.013951999445756277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,2,64,0,1,float16,float16,0,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,2,64,128,1,float16,fp8,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,2,64,128,1,fp8,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,2,64,0,1,float16,fp8,0,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,2,64,0,1,fp8,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,4,64,128,1,float16,float16,0,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,4,64,0,1,float16,float16,0,0.014346666634082794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,4,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,4,64,128,1,fp8,fp8,0,0.022885332504908245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,4,64,0,1,float16,fp8,0,0.014357333381970724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,4,64,0,1,fp8,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,8,64,128,1,float16,float16,0,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,8,64,0,1,float16,float16,0,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,8,64,128,1,float16,fp8,0,0.012624000509579977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,8,64,128,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,8,64,0,1,float16,fp8,0,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,8,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,1,64,128,1,float16,float16,0,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,1,64,0,1,float16,float16,0,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,1,64,128,1,float16,fp8,0,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,2,64,128,1,float16,fp8,0,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,1,64,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,1,64,0,1,float16,fp8,0,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,1,64,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,2,64,128,1,float16,float16,0,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,2,64,0,1,float16,float16,0,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,2,64,128,1,fp8,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,2,64,0,1,float16,fp8,0,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,2,64,0,1,fp8,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,4,64,128,1,float16,float16,0,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,4,64,0,1,float16,float16,0,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,4,64,128,1,float16,fp8,0,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,4,64,128,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,4,64,0,1,float16,fp8,0,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,4,64,0,1,fp8,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,8,64,128,1,float16,float16,0,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,8,64,0,1,float16,float16,0,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,8,64,128,1,float16,fp8,0,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,8,64,128,1,fp8,fp8,0,0.02812800059715907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,8,64,0,1,float16,fp8,0,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,8,64,0,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,1,64,128,1,float16,float16,0,0.012122667084137598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,1,64,0,1,float16,float16,0,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,1,64,128,1,float16,fp8,0,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,1,64,128,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,1,64,0,1,float16,fp8,0,0.012335999558369318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,1,64,0,1,fp8,fp8,0,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,2,64,128,1,float16,float16,0,0.012074666718641916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,2,64,0,1,float16,float16,0,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,2,64,128,1,float16,fp8,0,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,2,64,128,1,fp8,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,2,64,0,1,float16,fp8,0,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,2,64,0,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,4,64,128,1,float16,float16,0,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,4,64,0,1,float16,float16,0,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,4,64,128,1,float16,fp8,0,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,8,64,0,1,float16,float16,0,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,4,64,128,1,fp8,fp8,0,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,4,64,0,1,float16,fp8,0,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,4,64,0,1,fp8,fp8,0,0.01868266612291336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,8,64,128,1,float16,float16,0,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,8,64,128,1,float16,fp8,0,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,8,64,128,1,fp8,fp8,0,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,8,64,0,1,float16,fp8,0,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,8,64,0,1,fp8,fp8,0,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,1,64,128,1,float16,float16,0,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,1,64,0,1,float16,float16,0,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,1,64,128,1,float16,fp8,0,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,1,64,128,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,1,64,0,1,float16,fp8,0,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,1,64,0,1,fp8,fp8,0,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,2,64,128,1,float16,float16,0,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,2,64,0,1,float16,float16,0,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,2,64,128,1,float16,fp8,0,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,2,64,128,1,fp8,fp8,0,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,2,64,0,1,float16,fp8,0,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,4,64,0,1,float16,fp8,0,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,2,64,0,1,fp8,fp8,0,0.017792000124851864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,4,64,128,1,float16,float16,0,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,4,64,0,1,float16,float16,0,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,4,64,128,1,float16,fp8,0,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,4,64,128,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,4,64,0,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,8,64,128,1,float16,float16,0,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,8,64,0,1,float16,float16,0,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,8,64,128,1,float16,fp8,0,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,8,64,128,1,fp8,fp8,0,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,8,64,0,1,float16,fp8,0,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,8,64,0,1,fp8,fp8,0,0.01995733380317688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,1,64,128,1,float16,float16,0,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,1,64,0,1,float16,float16,0,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,1,64,128,1,float16,fp8,0,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,1,64,128,1,fp8,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,1,64,0,1,float16,fp8,0,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,2,64,0,1,float16,fp8,0,0.012074666718641916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,2,64,0,1,fp8,fp8,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,1,64,0,1,fp8,fp8,0,0.01802666609485944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,2,64,128,1,float16,float16,0,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,2,64,0,1,float16,float16,0,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,2,64,128,1,float16,fp8,0,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,2,64,128,1,fp8,fp8,0,0.017909333109855652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,4,64,128,1,float16,float16,0,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,4,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,4,64,128,1,float16,fp8,0,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,4,64,128,1,fp8,fp8,0,0.018218666315078735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,4,64,0,1,float16,fp8,0,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,4,64,0,1,fp8,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,8,64,128,1,float16,float16,0,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,8,64,0,1,float16,float16,0,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,8,64,128,1,float16,fp8,0,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,8,64,128,1,fp8,fp8,0,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,8,64,0,1,float16,fp8,0,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,8,64,0,1,fp8,fp8,0,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,1,64,128,1,float16,float16,0,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,1,64,0,1,float16,float16,0,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,1,64,128,1,float16,fp8,0,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,1,64,128,1,fp8,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,1,64,0,1,float16,fp8,0,0.011834666132926941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,1,64,0,1,fp8,fp8,0,0.01785600061217944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,2,64,128,1,float16,float16,0,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,2,64,0,1,float16,float16,0,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,2,64,128,1,float16,fp8,0,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,2,64,128,1,fp8,fp8,0,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,2,64,0,1,float16,fp8,0,0.012335999558369318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,2,64,0,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,4,64,128,1,float16,float16,0,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,4,64,0,1,float16,float16,0,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,4,64,128,1,float16,fp8,0,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,4,64,128,1,fp8,fp8,0,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,4,64,0,1,float16,fp8,0,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,4,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,1,64,128,1,float16,float16,0,0.028346667687098186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,1,64,0,1,float16,float16,0,0.027952000498771667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,1,64,128,1,float16,fp8,0,0.02787200113137563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,1,64,128,1,fp8,fp8,0,0.07235200206438701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,1,64,0,1,float16,fp8,0,0.02824000020821889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,1,64,0,1,fp8,fp8,0,0.07125866909821828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,2,64,128,1,float16,float16,0,0.02869333326816559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,2,64,0,1,float16,float16,0,0.028538666665554047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,2,64,128,1,float16,fp8,0,0.028138667345046997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,2,64,128,1,fp8,fp8,0,0.0713973343372345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,2,64,0,1,float16,fp8,0,0.028207999964555103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,4,64,128,1,float16,float16,0,0.028549333413441975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,2,64,0,1,fp8,fp8,0,0.07292266686757405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,4,64,0,1,float16,float16,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,4,64,128,1,float16,fp8,0,0.028880000114440918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,4,64,128,1,fp8,fp8,0,0.07358933488527934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,4,64,0,1,float16,fp8,0,0.028970666229724884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,4,64,0,1,fp8,fp8,0,0.07238399982452393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,8,64,128,1,float16,float16,0,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,8,64,0,1,float16,float16,0,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,8,64,128,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,8,64,128,1,fp8,fp8,0,0.04516266783078512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,8,64,0,1,float16,fp8,0,0.019813333948453266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,8,64,0,1,fp8,fp8,0,0.044938668608665466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,1,64,128,1,float16,float16,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,1,64,0,1,float16,float16,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,1,64,128,1,float16,fp8,0,0.020154666155576706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,1,64,128,1,fp8,fp8,0,0.04426133135954539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,1,64,0,1,float16,fp8,0,0.019733333339293797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,1,64,0,1,fp8,fp8,0,0.043951998154322304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,2,64,128,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,2,64,0,1,float16,float16,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,2,64,128,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,2,64,128,1,fp8,fp8,0,0.04473066826661428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,2,64,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,2,64,0,1,fp8,fp8,0,0.04483200112978617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,4,64,128,1,float16,float16,0,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,4,64,0,1,float16,float16,0,0.020090666910012562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,4,64,128,1,float16,fp8,0,0.0200853335360686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,4,64,128,1,fp8,fp8,0,0.04453866680463155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,4,64,0,1,float16,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,4,64,0,1,fp8,fp8,0,0.045226668318112694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,8,64,128,1,float16,float16,0,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,8,64,0,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,8,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,8,64,128,1,fp8,fp8,0,0.030074665943781536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,8,64,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,8,64,0,1,fp8,fp8,0,0.02974933385848999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,1,64,128,1,float16,float16,0,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,1,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,1,64,128,1,fp8,fp8,0,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,1,64,0,1,float16,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,1,64,0,1,fp8,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,2,64,128,1,float16,float16,0,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,2,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,2,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,2,64,128,1,fp8,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,2,64,0,1,float16,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,2,64,0,1,fp8,fp8,0,0.029850666721661884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,4,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,4,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,4,64,128,1,float16,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,4,64,128,1,fp8,fp8,0,0.02997333308060964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,8,64,128,1,fp8,fp8,0,0.022789334257443745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,4,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,4,64,0,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,8,64,128,1,float16,float16,0,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,8,64,0,1,float16,float16,0,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,8,64,128,1,float16,fp8,0,0.013557333499193192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,8,64,0,1,float16,fp8,0,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,8,64,0,1,fp8,fp8,0,0.022613334159056347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,1,64,128,1,float16,float16,0,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,1,64,0,1,float16,float16,0,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,1,64,128,1,float16,fp8,0,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,1,64,128,1,fp8,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,2,64,0,1,float16,fp8,0,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,1,64,0,1,float16,fp8,0,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,1,64,0,1,fp8,fp8,0,0.02236266682545344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,2,64,128,1,float16,float16,0,0.013552000125249227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,2,64,0,1,float16,float16,0,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,2,64,128,1,float16,fp8,0,0.013749333719412485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,2,64,128,1,fp8,fp8,0,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,2,64,0,1,fp8,fp8,0,0.022842665513356526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,4,64,128,1,float16,float16,0,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,4,64,0,1,float16,float16,0,0.013359999905029932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,4,64,128,1,float16,fp8,0,0.013349333157142004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,4,64,128,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,4,64,0,1,float16,fp8,0,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,4,64,0,1,fp8,fp8,0,0.022783999641736347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,8,64,128,1,float16,float16,0,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,8,64,0,1,float16,float16,0,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,8,64,128,1,float16,fp8,0,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,8,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,8,64,0,1,float16,fp8,0,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,8,64,0,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,1,64,128,1,float16,float16,0,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,1,64,0,1,float16,float16,0,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,1,64,128,1,float16,fp8,0,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,1,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,1,64,0,1,float16,fp8,0,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,1,64,0,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,2,64,0,1,fp8,fp8,0,0.01829333355029424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,2,64,128,1,float16,float16,0,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,2,64,0,1,float16,float16,0,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,2,64,128,1,float16,fp8,0,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,2,64,128,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,2,64,0,1,float16,fp8,0,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,4,64,128,1,float16,float16,0,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,4,64,0,1,float16,float16,0,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,4,64,128,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,4,64,128,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,4,64,0,1,float16,fp8,0,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,4,64,0,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,8,64,128,1,float16,float16,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,8,64,0,1,float16,float16,0,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,8,64,128,1,float16,fp8,0,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,8,64,128,1,fp8,fp8,0,0.01841066653529803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,8,64,0,1,float16,fp8,0,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,8,64,0,1,fp8,fp8,0,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,1,64,128,1,float16,float16,0,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,1,64,0,1,float16,float16,0,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,1,64,128,1,float16,fp8,0,0.01257066677014033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,1,64,128,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,1,64,0,1,float16,fp8,0,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,1,64,0,1,fp8,fp8,0,0.01830400029818217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,2,64,128,1,float16,float16,0,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,2,64,0,1,float16,float16,0,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,2,64,128,1,float16,fp8,0,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,2,64,128,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,2,64,0,1,float16,fp8,0,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,2,64,0,1,fp8,fp8,0,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,4,64,128,1,float16,float16,0,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,4,64,0,1,float16,float16,0,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,4,64,128,1,float16,fp8,0,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,4,64,128,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,4,64,0,1,float16,fp8,0,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,4,64,0,1,fp8,fp8,0,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,8,64,128,1,float16,float16,0,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,8,64,0,1,float16,float16,0,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,8,64,128,1,float16,fp8,0,0.012074666718641916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,8,64,128,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,8,64,0,1,float16,fp8,0,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,8,64,0,1,fp8,fp8,0,0.017957333475351334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,1,64,128,1,float16,float16,0,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,1,64,0,1,float16,float16,0,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,2,64,0,1,float16,float16,0,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,2,64,128,1,float16,fp8,0,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,1,64,128,1,float16,fp8,0,0.01156266654531161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,1,64,128,1,fp8,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,1,64,0,1,float16,fp8,0,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,1,64,0,1,fp8,fp8,0,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,2,64,128,1,float16,float16,0,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,2,64,128,1,fp8,fp8,0,0.01770666614174843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,2,64,0,1,float16,fp8,0,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,2,64,0,1,fp8,fp8,0,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,4,64,128,1,float16,float16,0,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,4,64,0,1,float16,float16,0,0.01201066623131434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,4,64,128,1,float16,fp8,0,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,4,64,128,1,fp8,fp8,0,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,4,64,0,1,float16,fp8,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,4,64,0,1,fp8,fp8,0,0.018053332964579265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,8,64,0,1,fp8,fp8,0,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,8,64,128,1,float16,float16,0,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,8,64,0,1,float16,float16,0,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,8,64,128,1,float16,fp8,0,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,8,64,128,1,fp8,fp8,0,0.018133333573738735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,8,64,0,1,float16,fp8,0,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,1,64,128,1,float16,float16,0,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,1,64,0,1,float16,float16,0,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,1,64,128,1,float16,fp8,0,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,1,64,128,1,fp8,fp8,0,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,2,64,128,1,fp8,fp8,0,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,1,64,0,1,float16,fp8,0,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,1,64,0,1,fp8,fp8,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,2,64,128,1,float16,float16,0,0.011930666863918304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,2,64,0,1,float16,float16,0,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,2,64,128,1,float16,fp8,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,2,64,0,1,float16,fp8,0,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,2,64,0,1,fp8,fp8,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,4,64,128,1,float16,float16,0,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,4,64,0,1,float16,float16,0,0.012074666718641916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,8,64,0,1,float16,float16,0,0.011663999408483505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,4,64,128,1,float16,fp8,0,0.011962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,4,64,128,1,fp8,fp8,0,0.018042666216691334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,4,64,0,1,float16,fp8,0,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,4,64,0,1,fp8,fp8,0,0.017968000223239262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,8,64,128,1,float16,float16,0,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,8,64,128,1,float16,fp8,0,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,8,64,128,1,fp8,fp8,0,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,8,64,0,1,float16,fp8,0,0.011760000139474869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,8,64,0,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,1,64,128,1,float16,float16,0,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,1,64,0,1,float16,float16,0,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,1,64,128,1,float16,fp8,0,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,1,64,128,1,fp8,fp8,0,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,1,64,0,1,float16,fp8,0,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,1,64,0,1,fp8,fp8,0,0.01841066653529803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,2,64,128,1,float16,float16,0,0.011653333902359009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,2,64,0,1,fp8,fp8,0,0.020165332903464634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,2,64,0,1,float16,float16,0,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,2,64,128,1,float16,fp8,0,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,2,64,128,1,fp8,fp8,0,0.01781333362062772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,2,64,0,1,float16,fp8,0,0.012319999436537424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,4,64,128,1,float16,float16,0,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,4,64,0,1,float16,float16,0,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,4,64,128,1,float16,fp8,0,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,4,64,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,4,64,0,1,float16,fp8,0,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,4,64,0,1,fp8,fp8,0,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,float16,0,0.3829866647720337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,fp8,0,0.37861335277557373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,64,128,1,fp8,fp8,0,0.5032906532287598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,float16,0,0.39602665106455487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,float16,0,2.4768640200297036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,fp8,0,0.39316801230112713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,64,128,1,fp8,fp8,0,0.5160799821217855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,fp8,0,2.4364479382832847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,64,0,1,fp8,fp8,0,2.2591306368509927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,float16,0,0.22057066361109415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,float16,0,2.4851840337117515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,fp8,0,0.22138667106628418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,64,128,1,fp8,fp8,0,0.294597327709198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,fp8,0,2.4650826454162598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,float16,0,1.290005366007487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,float16,0,0.20501333475112915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,64,0,1,fp8,fp8,0,2.278341293334961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,fp8,0,1.2935413519541423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,fp8,0,0.20428800582885742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,float16,0,1.2850240071614583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,64,0,1,fp8,fp8,0,1.1345439751942952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,64,128,1,fp8,fp8,0,0.27354133129119873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,float16,0,0.21162132422129312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,fp8,0,1.281215985616048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,fp8,0,0.21158399184544882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,64,0,1,fp8,fp8,0,1.1114239692687988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,64,128,1,fp8,fp8,0,0.284986674785614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,float16,0,1.3003093401590984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,float16,0,0.12761066357294717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,fp8,0,1.2997600237528484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,64,0,1,fp8,fp8,0,1.1089333693186443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,fp8,0,0.6290506521860758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,64,0,1,fp8,fp8,0,0.5879253149032593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,float16,0,0.626261313756307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,fp8,0,0.13029332955678305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,64,128,1,fp8,fp8,0,0.17478932936986288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,float16,0,0.12035733461380005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,fp8,0,0.12037332852681477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,float16,0,0.6256639957427979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,64,128,1,fp8,fp8,0,0.16222932934761047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,64,0,1,fp8,fp8,0,0.569109320640564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,fp8,0,0.6216906706492106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,float16,0,0.12358933687210083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,fp8,0,0.12372799714406331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,float16,0,0.6187359889348348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,64,128,1,fp8,fp8,0,0.1666719913482666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,float16,0,0.09191999832789104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,fp8,0,0.6234453519185384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,64,0,1,fp8,fp8,0,0.5791999896367391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,float16,0,0.3370453516642253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,fp8,0,0.09216533104578654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,64,128,1,fp8,fp8,0,0.11756267150243123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,fp8,0,0.3384586572647095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,float16,0,0.08878399928410848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,64,0,1,fp8,fp8,0,0.31601067384084064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,fp8,0,0.08956799904505412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,float16,0,0.33857067426045734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,64,128,1,fp8,fp8,0,0.10674666364987691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,fp8,0,0.33855998516082764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,64,0,1,fp8,fp8,0,0.31593066453933716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,float16,0,0.0902453362941742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,float16,0,0.336458683013916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,fp8,0,0.08915733297665913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,64,128,1,fp8,fp8,0,0.110944002866745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,fp8,0,0.33910401662190753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,64,0,1,fp8,fp8,0,0.3168053428332011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,float16,0,0.2919999957084656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,fp8,0,0.28757333755493164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,64,128,1,fp8,fp8,0,0.3845440149307251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,float16,0,1.4527146021525066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,float16,0,0.3046506643295288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,64,0,1,fp8,fp8,0,1.2803893089294434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,fp8,0,1.4425066312154133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,fp8,0,0.30144532521565753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,64,128,1,fp8,fp8,0,0.3980960051218669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,float16,0,1.469370683034261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,float16,0,0.17037866512934366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,fp8,0,1.4493865966796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,fp8,0,0.17196265856424967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,64,0,1,fp8,fp8,0,1.2870240211486816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,float16,0,0.7198186715443929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,64,128,1,fp8,fp8,0,0.23018133640289307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,float16,0,0.7026293277740479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,float16,0,0.15876799821853638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,fp8,0,0.7184426784515381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,64,0,1,fp8,fp8,0,0.6741120020548502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,fp8,0,0.15709867080052695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,64,128,1,fp8,fp8,0,0.2139893372853597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,fp8,0,0.7087360223134359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,64,0,1,fp8,fp8,0,0.6547306776046753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,float16,0,0.16158933440844217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,fp8,0,0.16378666957219443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,float16,0,0.7088053226470947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,64,128,1,fp8,fp8,0,0.2214720050493876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,fp8,0,0.711733341217041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,float16,0,0.10194133718808492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,64,0,1,fp8,fp8,0,0.6645546754201254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,float16,0,0.3718133370081584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,fp8,0,0.10334400335947673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,64,128,1,fp8,fp8,0,0.13784000277519226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,fp8,0,0.37413867314656574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,64,0,1,fp8,fp8,0,0.35363201300303143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,float16,0,0.09549867113431294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,float16,0,0.37033601601918537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,fp8,0,0.09400000174840291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,64,128,1,fp8,fp8,0,0.12530666589736938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,fp8,0,0.36640000343322754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,64,0,1,fp8,fp8,0,0.3423680067062378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,float16,0,0.09698667128880818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,fp8,0,0.3729706605275472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,float16,0,0.3720000187555949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,fp8,0,0.0977280040582021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,64,128,1,fp8,fp8,0,0.13160533706347147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,64,0,1,fp8,fp8,0,0.3469546635945638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,float16,0,0.07204266885916392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,float16,0,0.2202720046043396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,fp8,0,0.07249600191911061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,64,128,1,fp8,fp8,0,0.09150933225949605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,fp8,0,0.22131200631459555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,64,0,1,fp8,fp8,0,0.2039146622021993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,float16,0,0.07277333239714305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,float16,0,0.22076267004013062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,fp8,0,0.07202133536338806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,64,128,1,fp8,fp8,0,0.08885866403579712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,fp8,0,0.22221332788467407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,64,0,1,fp8,fp8,0,0.20435200134913126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,float16,0,0.0713866651058197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,fp8,0,0.07187200089295705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,float16,0,0.22110400597254434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,64,128,1,fp8,fp8,0,0.0886400043964386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,fp8,0,0.22020266453425089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,64,0,1,fp8,fp8,0,0.20552533864974976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,float16,0,0.2445440093676249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,fp8,0,0.24077334006627402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,64,128,1,fp8,fp8,0,0.32737600803375244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,float16,0,1.045082648595174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,float16,0,0.2553706765174866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,64,128,1,fp8,fp8,0,0.33683733145395917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,fp8,0,1.044368028640747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,64,0,1,fp8,fp8,0,0.9138933022816976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,fp8,0,0.255023996035258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,float16,0,1.062549352645874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,float16,0,0.14568533500035605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,fp8,0,1.0540640354156494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,fp8,0,0.14702399571736655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,float16,0,0.5162933270136515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,64,0,1,fp8,fp8,0,0.9303092956542969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,64,128,1,fp8,fp8,0,0.19669334093729654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,fp8,0,0.5221386750539144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,64,0,1,fp8,fp8,0,0.48872001965840656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,float16,0,0.13552000125249228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,float16,0,0.5072853167851766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,fp8,0,0.13473066687583923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,64,128,1,fp8,fp8,0,0.1837760011355082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,fp8,0,0.5105439821879069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,float16,0,0.13865066568056741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,64,0,1,fp8,fp8,0,0.47200532754262287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,fp8,0,0.13966400424639383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,float16,0,0.5158666769663492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,64,128,1,fp8,fp8,0,0.19038933515548706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,fp8,0,0.5140159924825033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,64,0,1,fp8,fp8,0,0.4817599852879842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,float16,0,0.08185066779454549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,float16,0,0.08729599912961324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,float16,0,0.27821866671244305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,fp8,0,0.08900800347328186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,64,128,1,fp8,fp8,0,0.12026666601498921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,fp8,0,0.27988799413045246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,64,0,1,fp8,fp8,0,0.26145599285761517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,fp8,0,0.08164266745249431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,float16,0,0.2720426718393962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,64,128,1,fp8,fp8,0,0.10617066423098247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,fp8,0,0.2737920085589091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,64,0,1,fp8,fp8,0,0.2521653374036153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,float16,0,0.08428266644477844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,float16,0,0.27436800797780353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,fp8,0,0.08472533027331035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,64,128,1,fp8,fp8,0,0.114656001329422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,fp8,0,0.2744800051053365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,64,0,1,fp8,fp8,0,0.2563146750132243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,float16,0,0.06554666658242543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,float16,0,0.16477866967519125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,fp8,0,0.06522666911284129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,64,128,1,fp8,fp8,0,0.0774239997069041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,fp8,0,0.16448533535003662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,64,0,1,fp8,fp8,0,0.15363199512163797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,float16,0,0.06471466521422069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,float16,0,0.16566399733225504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,fp8,0,0.06524799764156342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,64,128,1,fp8,fp8,0,0.0753653347492218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,fp8,0,0.16531733671824136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,64,0,1,fp8,fp8,0,0.15413866440455118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,float16,0,0.06493866443634033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,float16,0,0.16454933087031046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,fp8,0,0.06469333171844482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,64,128,1,fp8,fp8,0,0.07554133236408234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,fp8,0,0.16421332955360413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,64,0,1,fp8,fp8,0,0.15385599931081137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,float16,0,0.376581350962321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,fp8,0,0.372869332631429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,64,128,1,fp8,fp8,0,0.4986293315887451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,float16,0,1.3558932940165203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,float16,0,0.3907359838485718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,fp8,0,1.353600025177002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,64,0,1,fp8,fp8,0,1.252618630727132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,fp8,0,0.3874986569086711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,64,128,1,fp8,fp8,0,0.5134933392206827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,float16,0,1.3653705914815266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,float16,0,0.21707733472188315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,fp8,0,1.367919921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,fp8,0,0.21606934070587158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,64,0,1,fp8,fp8,0,1.2713759740193684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,float16,0,0.7222293217976888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,float16,0,0.19962133963902792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,64,128,1,fp8,fp8,0,0.2892213265101115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,fp8,0,0.7196693420410156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,64,0,1,fp8,fp8,0,0.6375840107599894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,fp8,0,0.1975626746813456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,float16,0,0.7043253580729166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,64,128,1,fp8,fp8,0,0.26794666051864624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,fp8,0,0.7076799869537354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,float16,0,0.20680000384648642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,64,0,1,fp8,fp8,0,0.6194186607996622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,fp8,0,0.2067199945449829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,fp8,0,0.7146773338317871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,float16,0,0.7161493301391602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,64,128,1,fp8,fp8,0,0.2773120005925496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,float16,0,0.1216266651948293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,64,0,1,fp8,fp8,0,0.6284106572469076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,float16,0,0.35385600725809735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,fp8,0,0.12355200449625652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,64,128,1,fp8,fp8,0,0.16698133945465088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,fp8,0,0.35499731699625653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,fp8,0,0.11348799864451091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,float16,0,0.3468426863352458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,64,0,1,fp8,fp8,0,0.33355732758839923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,float16,0,0.11346667011578877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,64,128,1,fp8,fp8,0,0.15439466635386148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,fp8,0,0.34621334075927734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,float16,0,0.1162559986114502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,64,0,1,fp8,fp8,0,0.32313599189122516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,float16,0,0.34939201672871906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,fp8,0,0.11612266302108765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,64,128,1,fp8,fp8,0,0.15957333644231161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,fp8,0,0.3474453290303548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,float16,0,0.07442133128643036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,64,0,1,fp8,fp8,0,0.32923734188079834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,float16,0,0.18870933850606283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,fp8,0,0.07583466668923695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,64,128,1,fp8,fp8,0,0.10436266660690308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,fp8,0,0.1882773240407308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,64,0,1,fp8,fp8,0,0.18210132916768393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,float16,0,0.07072000205516815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,float16,0,0.18542399009068808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,fp8,0,0.07028266787528992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,64,128,1,fp8,fp8,0,0.08850133419036865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,fp8,0,0.18639999628067017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,64,0,1,fp8,fp8,0,0.17539199193318686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,float16,0,0.0709386666615804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,float16,0,0.18578133980433145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,fp8,0,0.07196266452471416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,64,128,1,fp8,fp8,0,0.09834667046864827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,fp8,0,0.18655999501546225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,64,0,1,fp8,fp8,0,0.1756053368250529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,float16,0,0.05593066910902659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,float16,0,0.11103999614715576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,fp8,0,0.05531733234723409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,64,128,1,fp8,fp8,0,0.06644266843795776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,fp8,0,0.1109279990196228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,64,0,1,fp8,fp8,0,0.10539199908574422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,float16,0,0.05545066793759664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,float16,0,0.11058666308720906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,fp8,0,0.055685331424077354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,64,128,1,fp8,fp8,0,0.06609599788983662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,fp8,0,0.11079466342926025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,64,0,1,fp8,fp8,0,0.1048906644185384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,float16,0,0.05539733171463013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,float16,0,0.11077333490053813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,fp8,0,0.055589333176612854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,64,128,1,fp8,fp8,0,0.06588266789913177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,fp8,0,0.11160533626874287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,64,0,1,fp8,fp8,0,0.10524800419807434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,float16,0,0.2858346700668335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,fp8,0,0.2820746699968974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,float16,0,0.8170133431752523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,64,128,1,fp8,fp8,0,0.38091198603312176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,fp8,0,0.8176320393880209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,64,0,1,fp8,fp8,0,0.7353066603342692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,float16,0,0.3000906705856323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,fp8,0,0.2982880075772603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,float16,0,0.16708266735076904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,float16,0,0.8361226717631022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,64,128,1,fp8,fp8,0,0.3934933344523112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,fp8,0,0.8295093377431234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,64,0,1,fp8,fp8,0,0.7482133706410726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,float16,0,0.416101336479187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,fp8,0,0.16792533795038858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,64,128,1,fp8,fp8,0,0.22525866826375326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,fp8,0,0.41813333829243976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,64,0,1,fp8,fp8,0,0.3920746644337972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,float16,0,0.15448000033696493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,float16,0,0.40628798802693683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,fp8,0,0.15331733226776123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,64,128,1,fp8,fp8,0,0.2084266742070516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,fp8,0,0.40219732125600177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,64,0,1,fp8,fp8,0,0.37785065174102783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,float16,0,0.1591093341509501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,float16,0,0.409226655960083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,fp8,0,0.1602666676044464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,64,128,1,fp8,fp8,0,0.2179093360900879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,fp8,0,0.4058346748352051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,64,0,1,fp8,fp8,0,0.3866026798884074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,float16,0,0.09646933277448018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,float16,0,0.21849600474039713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,fp8,0,0.09662399689356486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,64,128,1,fp8,fp8,0,0.13300800323486328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,fp8,0,0.22259199619293213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,64,0,1,fp8,fp8,0,0.20996799071629843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,float16,0,0.08937600255012512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,float16,0,0.2140586574872335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,fp8,0,0.08821333448092143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,64,128,1,fp8,fp8,0,0.12271466851234436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,float16,0,0.21645333369572958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,fp8,0,0.2148053248723348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,64,0,1,fp8,fp8,0,0.2018453280131022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,float16,0,0.0922986666361491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,fp8,0,0.09195733070373535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,64,128,1,fp8,fp8,0,0.12680533528327942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,fp8,0,0.2158986727396647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,64,0,1,fp8,fp8,0,0.2060640056927999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,float16,0,0.057258665561676025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,float16,0,0.1255626678466797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,fp8,0,0.05774933099746704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,64,128,1,fp8,fp8,0,0.07829866806666057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,fp8,0,0.12494933605194092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,64,0,1,fp8,fp8,0,0.11973866820335388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,float16,0,0.05600533386071523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,float16,0,0.12447999914487202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,fp8,0,0.05695466697216034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,64,128,1,fp8,fp8,0,0.0718453327814738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,fp8,0,0.12359999616940816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,64,0,1,fp8,fp8,0,0.11633599797884624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,float16,0,0.05699199934800466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,float16,0,0.1237600048383077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,64,128,1,fp8,fp8,0,0.07610133290290833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,fp8,0,0.12468266487121582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,64,0,1,fp8,fp8,0,0.11803199847539265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,float16,0,0.04890666902065277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,fp8,0,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,float16,0,0.08522666494051616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,64,128,1,fp8,fp8,0,0.051642666260401406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,fp8,0,0.085125337044398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,64,0,1,fp8,fp8,0,0.08215466638406117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,float16,0,0.048810665806134544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,float16,0,0.08515200018882751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,fp8,0,0.046069333950678505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,64,128,1,fp8,fp8,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,fp8,0,0.08530666430791219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,64,0,1,fp8,fp8,0,0.08160000046094258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,float16,0,0.04628799855709076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,float16,0,0.08598933617273967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,fp8,0,0.04781866570313772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,64,128,1,fp8,fp8,0,0.05213866631189982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,fp8,0,0.08505066235860188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,64,0,1,fp8,fp8,0,0.08169599870840709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,float16,0,0.3803946574529012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,fp8,0,0.3739519913991292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,float16,0,0.8110133012135824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,64,128,1,fp8,fp8,0,0.4978346824645996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,fp8,0,0.7984480063120524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,float16,0,0.39209600289662677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,64,0,1,fp8,fp8,0,0.7558133602142334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,fp8,0,0.3887253204981486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,fp8,0,0.8224319616953532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,float16,0,0.21526400248209634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,float16,0,0.818394660949707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,64,128,1,fp8,fp8,0,0.5161013205846151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,64,0,1,fp8,fp8,0,0.7712480227152506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,float16,0,0.43641066551208496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,fp8,0,0.21573867400487265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,64,128,1,fp8,fp8,0,0.28678399324417114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,fp8,0,0.19590399662653604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,fp8,0,0.43729066848754883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,64,128,1,fp8,fp8,0,0.26523200670878094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,64,0,1,fp8,fp8,0,0.3943519989649455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,float16,0,0.19732266664505005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,float16,0,0.42071465651194256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,fp8,0,0.4203999837239583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,float16,0,0.20624534289042154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,64,0,1,fp8,fp8,0,0.37564265727996826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,float16,0,0.4271786610285441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,fp8,0,0.20382400353749594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,64,128,1,fp8,fp8,0,0.2749066750208537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,fp8,0,0.4296213388442993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,64,0,1,fp8,fp8,0,0.38598934809366864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,float16,0,0.11809066931406657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,float16,0,0.21781333287556967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,fp8,0,0.1197653313477834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,64,128,1,fp8,fp8,0,0.16204800208409628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,fp8,0,0.22010666131973267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,64,0,1,fp8,fp8,0,0.21064533789952597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,fp8,0,0.20884267489115396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,float16,0,0.10973333319028218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,float16,0,0.21136534214019775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,fp8,0,0.1090186635653178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,64,128,1,fp8,fp8,0,0.15065600474675497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,64,0,1,fp8,fp8,0,0.19783467054367065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,float16,0,0.11316266655921936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,float16,0,0.2125599980354309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,fp8,0,0.1127839982509613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,64,128,1,fp8,fp8,0,0.15471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,fp8,0,0.21223467588424683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,64,0,1,fp8,fp8,0,0.2023413379987081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,float16,0,0.07101866602897644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,float16,0,0.1185706655184428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,fp8,0,0.07183466851711273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,64,128,1,fp8,fp8,0,0.09995200236638387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,fp8,0,0.12127466996510823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,64,0,1,fp8,fp8,0,0.11505599816640218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,fp8,0,0.11414933204650879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,float16,0,0.06554133196671803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,float16,0,0.11518399914105733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,fp8,0,0.06593066453933716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,64,128,1,fp8,fp8,0,0.08357866605122884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,64,0,1,fp8,fp8,0,0.10885333021481831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,float16,0,0.06799999872843425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,float16,0,0.11555199821790059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,fp8,0,0.06756799916426341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,64,128,1,fp8,fp8,0,0.09220799803733826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,fp8,0,0.1158026655515035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,64,0,1,fp8,fp8,0,0.10971732934315999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,float16,0,0.045696000258127846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,float16,0,0.06881066660086314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,fp8,0,0.0469706654548645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,64,128,1,fp8,fp8,0,0.05885333319505056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,fp8,0,0.06896000107129414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,64,0,1,fp8,fp8,0,0.06635733445485432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,float16,0,0.04507199923197428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,float16,0,0.06816000243028005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,fp8,0,0.04531733194986979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,64,128,1,fp8,fp8,0,0.05669333537419637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,fp8,0,0.06858133276303609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,64,0,1,fp8,fp8,0,0.06592000027497609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,float16,0,0.04567466676235199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,float16,0,0.06865066786607106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,fp8,0,0.04540266593297323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,64,128,1,fp8,fp8,0,0.057589332262674965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,fp8,0,0.06807999809583028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,64,0,1,fp8,fp8,0,0.06587733328342438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,float16,0,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,float16,0,0.06238399942715963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,64,128,1,fp8,fp8,0,0.04219200213750204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,fp8,0,0.06287999947865804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,64,0,1,fp8,fp8,0,0.06127466758092245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,float16,0,0.03461333364248276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,float16,0,0.06289066871007283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,fp8,0,0.03586133321126302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,64,128,1,fp8,fp8,0,0.041749333341916404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,fp8,0,0.062218666076660156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,64,0,1,fp8,fp8,0,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,float16,0,0.03501333296298981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,float16,0,0.0626453310251236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,fp8,0,0.03499733408292135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,64,128,1,fp8,fp8,0,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,float16,0,0.5078773498535156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,fp8,0,0.06289066871007283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,64,0,1,fp8,fp8,0,0.0612960010766983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,float16,0,0.2849280039469401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,fp8,0,0.2815413276354472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,64,128,1,fp8,fp8,0,0.3762986660003662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,fp8,0,0.5019946495691935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,64,0,1,fp8,fp8,0,0.46058134237925213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,float16,0,0.29286932945251465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,float16,0,0.5167359908421835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,fp8,0,0.291706661383311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,64,128,1,fp8,fp8,0,0.3878186543782552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,fp8,0,0.5147573153177897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,float16,0,0.16598400473594666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,64,0,1,fp8,fp8,0,0.47430400053660077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,float16,0,0.2672906716664632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,fp8,0,0.16583466529846191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,64,128,1,fp8,fp8,0,0.22322134176890054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,fp8,0,0.26822932561238605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,64,128,1,fp8,fp8,0,0.20737600326538086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,fp8,0,0.25198932488759357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,64,0,1,fp8,fp8,0,0.25779734055201214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,float16,0,0.15401599804560342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,float16,0,0.2522346576054891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,fp8,0,0.15238400300343832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,64,0,1,fp8,fp8,0,0.24152000745137533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,64,128,1,fp8,fp8,0,0.21264000733693442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,float16,0,0.1577173372109731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,float16,0,0.2567360003789266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,fp8,0,0.1562399963537852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,fp8,0,0.25617067019144696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,64,0,1,fp8,fp8,0,0.24764267603556314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,float16,0,0.09425066908200581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,float16,0,0.14282133181889853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,fp8,0,0.09598400195439656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,64,128,1,fp8,fp8,0,0.1318186620871226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,fp8,0,0.14441066980361938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,64,0,1,fp8,fp8,0,0.14030933380126953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,float16,0,0.08733333150545756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,float16,0,0.13590932885805765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,fp8,0,0.08642133076985677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,64,128,1,fp8,fp8,0,0.11939199765523274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,fp8,0,0.13449066877365112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,64,0,1,fp8,fp8,0,0.12982400258382162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,float16,0,0.08918933073679607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,float16,0,0.13924800356229147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,fp8,0,0.09011200070381165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,64,128,1,fp8,fp8,0,0.12505066394805908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,fp8,0,0.1397226651509603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,64,0,1,fp8,fp8,0,0.1351626714070638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,float16,0,0.05474133292833964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,float16,0,0.08022400240103404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,fp8,0,0.0557226687669754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,64,128,1,fp8,fp8,0,0.07622933387756348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,fp8,0,0.08126399914423625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,64,0,1,fp8,fp8,0,0.08046400050322215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,float16,0,0.05312533179918925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,float16,0,0.07888533174991608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,fp8,0,0.05264533559481303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,64,128,1,fp8,fp8,0,0.07238933444023132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,fp8,0,0.07880533238252004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,64,0,1,fp8,fp8,0,0.07613866527875264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,float16,0,0.05457599957784017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,float16,0,0.07922666768232982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,fp8,0,0.05324266850948334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,64,128,1,fp8,fp8,0,0.07005866865317027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,fp8,0,0.08046933511892955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,64,0,1,fp8,fp8,0,0.07700266440709432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,float16,0,0.04091199984153112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,float16,0,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,fp8,0,0.03888533264398575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,64,128,1,fp8,fp8,0,0.045328001181284584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,64,0,1,fp8,fp8,0,0.05121066669623057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,fp8,0,0.052522664268811546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,float16,0,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,float16,0,0.05226133267084757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,fp8,0,0.03841066608826319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,64,128,1,fp8,fp8,0,0.04509866734345754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,fp8,0,0.05230933427810669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,64,0,1,fp8,fp8,0,0.05040533343950907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,float16,0,0.04181866844495138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,float16,0,0.052373334765434265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,fp8,0,0.038917332887649536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,64,128,1,fp8,fp8,0,0.04540266593297323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,fp8,0,0.05235200126965841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,64,0,1,fp8,fp8,0,0.051301335295041404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,float16,0,0.03249066571394602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,float16,0,0.05116266508897146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,fp8,0,0.0322773332397143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,64,128,1,fp8,fp8,0,0.03916800022125244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,fp8,0,0.051594664653142296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,64,0,1,fp8,fp8,0,0.05064000189304352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,float16,0,0.032170665760835014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,float16,0,0.05147733290990194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,fp8,0,0.03223466624816259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,64,128,1,fp8,fp8,0,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,fp8,0,0.05120533208052317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,64,0,1,fp8,fp8,0,0.050373335679372154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,float16,0,0.032058666149775185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,float16,0,0.051455999414126076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,fp8,0,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,64,128,1,fp8,fp8,0,0.03885333240032196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,fp8,0,0.05115200082461039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,64,0,1,fp8,fp8,0,0.04976533353328705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,float16,0,0.36416534582773846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,fp8,0,0.361077348391215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,float16,0,0.5244373480478922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,64,128,1,fp8,fp8,0,0.4793706734975179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,fp8,0,0.5220533212025961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,64,0,1,fp8,fp8,0,0.49354132016499835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,float16,0,0.3763466676076253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,float16,0,0.535370667775472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,fp8,0,0.3745280106862386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,64,128,1,fp8,fp8,0,0.503050684928894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,fp8,0,0.5332213242848715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,64,0,1,fp8,fp8,0,0.5138560136159261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,float16,0,0.21091200908025107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,float16,0,0.29236799478530884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,fp8,0,0.213919997215271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,64,128,1,fp8,fp8,0,0.28429333368937176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,fp8,0,0.2957226634025574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,64,0,1,fp8,fp8,0,0.2775146762530009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,float16,0,0.19553599754969278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,float16,0,0.2808213432629903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,fp8,0,0.19296000401178995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,64,128,1,fp8,fp8,0,0.25835732618967694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,fp8,0,0.2771466573079427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,64,0,1,fp8,fp8,0,0.2521013418833415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,float16,0,0.2029119928677877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,float16,0,0.2829759915669759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,64,0,1,fp8,fp8,0,0.2615306576093038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,fp8,0,0.20153067509333292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,64,128,1,fp8,fp8,0,0.26920533180236816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,fp8,0,0.2837653358777364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,float16,0,0.11635200182596843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,float16,0,0.1508853336175283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,fp8,0,0.11932266751925151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,64,128,1,fp8,fp8,0,0.15897066394488016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,fp8,0,0.1534346640110016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,64,0,1,fp8,fp8,0,0.1495466629664103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,float16,0,0.10915733377138774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,float16,0,0.14197867115338644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,fp8,0,0.10883200168609619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,64,128,1,fp8,fp8,0,0.14738667011260986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,fp8,0,0.14141333103179932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,64,0,1,fp8,fp8,0,0.13773866494496664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,float16,0,0.11090667049090068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,float16,0,0.14365333318710327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,fp8,0,0.11125866572062175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,64,128,1,fp8,fp8,0,0.15385066469510397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,fp8,0,0.1436746617158254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,64,0,1,fp8,fp8,0,0.14255999525388083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,float16,0,0.07004799942175548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,float16,0,0.08359466989835103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,fp8,0,0.07092266778151195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,64,128,1,fp8,fp8,0,0.09856533010800679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,fp8,0,0.08601599931716919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,64,0,1,fp8,fp8,0,0.08543999989827473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,float16,0,0.06491200129191081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,float16,0,0.08041066428025563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,fp8,0,0.06427200138568878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,64,128,1,fp8,fp8,0,0.08193066716194153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,fp8,0,0.07974400122960408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,64,0,1,fp8,fp8,0,0.07798933486143748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,float16,0,0.06612800061702728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,float16,0,0.0809440016746521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,fp8,0,0.06675200164318085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,64,128,1,fp8,fp8,0,0.08618666728337605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,64,128,1,fp8,fp8,0,0.05795733133951823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,fp8,0,0.08166933556397755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,64,0,1,fp8,fp8,0,0.08009600142637889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,float16,0,0.04422399898370107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,float16,0,0.050757333636283875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,fp8,0,0.045221333702405296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,fp8,0,0.05141866703828176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,64,0,1,fp8,fp8,0,0.048991998036702476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,float16,0,0.043621331453323364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,float16,0,0.04316799839337667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,float16,0,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,64,128,1,fp8,fp8,0,0.054661333560943604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,fp8,0,0.04959466556708018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,64,0,1,fp8,fp8,0,0.04791999856630961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,fp8,0,0.04391466577847799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,float16,0,0.049322664737701416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,64,128,1,fp8,fp8,0,0.054976001381874084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,fp8,0,0.05013866722583771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,64,0,1,fp8,fp8,0,0.048170665899912514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,float16,0,0.030479999879995983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,float16,0,0.041034666200478874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,fp8,0,0.031114667654037476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,64,128,1,fp8,fp8,0,0.03749866783618927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,64,128,1,fp8,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,fp8,0,0.041264000038305916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,64,0,1,fp8,fp8,0,0.04020266731580099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,float16,0,0.030917334059874218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,float16,0,0.04078399886687597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,fp8,0,0.03038399914900462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,fp8,0,0.040933333337306976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,64,0,1,fp8,fp8,0,0.04002666721741358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,float16,0,0.030453334252039593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,float16,0,0.040778666734695435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,fp8,0,0.03067733347415924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,64,128,1,fp8,fp8,0,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,fp8,0,0.04083200047413508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,64,0,1,fp8,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,float16,0,0.029152000943819683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,float16,0,0.039994666973749794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,fp8,0,0.02900800108909607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,64,128,1,fp8,fp8,0,0.035936000446478523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,fp8,0,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,64,0,1,fp8,fp8,0,0.038917332887649536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,float16,0,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,float16,0,0.039093332986036934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,64,128,1,fp8,fp8,0,0.036117332677046456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,fp8,0,0.03978666663169861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,64,0,1,fp8,fp8,0,0.03874133278926214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,64,0,1,fp8,fp8,0,0.03869866579771042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,float16,0,0.029093332588672638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,float16,0,0.03997333347797394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,64,128,1,fp8,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,float16,0,0.30339733759562176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,float16,0,0.3841386636098226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,fp8,0,0.29736000299453735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,64,128,1,fp8,fp8,0,0.38646399974823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,fp8,0,0.37271467844645184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,64,0,1,fp8,fp8,0,0.3412799835205078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,float16,0,0.3078773419062297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,float16,0,0.3849173386891683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,fp8,0,0.307861328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,64,128,1,fp8,fp8,0,0.3932480017344157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,fp8,0,0.3798559904098511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,64,0,1,fp8,fp8,0,0.3505706787109375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,float16,0,0.1720106601715088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,float16,0,0.20625599225362143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,fp8,0,0.1718719998995463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,64,128,1,fp8,fp8,0,0.22512000799179077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,fp8,0,0.2023573319117228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,64,0,1,fp8,fp8,0,0.19562133153279623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,float16,0,0.16065067052841187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,float16,0,0.1842986742655436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,fp8,0,0.15569600462913513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,64,128,1,fp8,fp8,0,0.2109760046005249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,fp8,0,0.1822986602783203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,64,0,1,fp8,fp8,0,0.17931199073791504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,float16,0,0.1643946667512258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,float16,0,0.1905440092086792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,fp8,0,0.16267200311024985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,64,128,1,fp8,fp8,0,0.21588265895843506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,fp8,0,0.09585600097974141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,fp8,0,0.18819733460744223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,64,0,1,fp8,fp8,0,0.1883466641108195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,float16,0,0.09572800000508626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,float16,0,0.10508267084757487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,64,128,1,fp8,fp8,0,0.12984533111254373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,fp8,0,0.10700800021489461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,64,0,1,fp8,fp8,0,0.10895466804504395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,float16,0,0.08479467034339905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,float16,0,0.09964799880981445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,fp8,0,0.08453866839408875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,64,128,1,fp8,fp8,0,0.11828266580899556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,fp8,0,0.09643200039863586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,64,0,1,fp8,fp8,0,0.09660266836484273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,float16,0,0.09046399593353271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,float16,0,0.1016426682472229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,fp8,0,0.09092799822489421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,64,128,1,fp8,fp8,0,0.12346667051315308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,fp8,0,0.10105599959691365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,64,0,1,fp8,fp8,0,0.10231999556223552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,float16,0,0.05561600128809611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,float16,0,0.06173333525657654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,float16,0,0.059936001896858215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,fp8,0,0.05566399792830149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,64,128,1,fp8,fp8,0,0.07773866752783458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,fp8,0,0.062447999914487205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,64,0,1,fp8,fp8,0,0.06048533320426941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,float16,0,0.05277866621812185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,fp8,0,0.05209066470464071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,64,128,1,fp8,fp8,0,0.06954666475454967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,fp8,0,0.0591786652803421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,64,0,1,fp8,fp8,0,0.05759466687838236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,float16,0,0.053157334526379905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,float16,0,0.0603359987338384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,fp8,0,0.05366933345794678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,64,128,1,fp8,fp8,0,0.07113066812356313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,fp8,0,0.060175999999046326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,64,0,1,fp8,fp8,0,0.05909866591294607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,float16,0,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,float16,0,0.040133332212766014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,fp8,0,0.038005332152048744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,64,128,1,fp8,fp8,0,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,fp8,0,0.04031999905904134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,64,0,1,fp8,fp8,0,0.03889599939187368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,float16,0,0.03893866638342539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,float16,0,0.03985599925120672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,fp8,0,0.0372533326347669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,64,128,1,fp8,fp8,0,0.0433599998553594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,fp8,0,0.038959999879201256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,fp8,0,0.040037333965301514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,64,0,1,fp8,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,float16,0,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,float16,0,0.039850667119026184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,fp8,0,0.039642666776975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,64,128,1,fp8,fp8,0,0.04468800127506256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,64,0,1,fp8,fp8,0,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,float16,0,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,float16,0,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,64,128,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,64,0,1,fp8,fp8,0,0.03457599878311157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,float16,0,0.028927999238173168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,float16,0,0.02900800108909607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,float16,0,0.035114665826161705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,fp8,0,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,64,128,1,fp8,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,64,0,1,fp8,fp8,0,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,float16,0,0.03518400092919668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,64,128,1,fp8,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,fp8,0,0.03515200068553289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,64,0,1,fp8,fp8,0,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,float16,0,0.028277332584063213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,float16,0,0.03410666684309641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,fp8,0,0.027664000789324444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,64,128,1,fp8,fp8,0,0.03405333310365677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,fp8,0,0.034117333590984344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,64,0,1,fp8,fp8,0,0.03312533348798752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,float16,0,0.028021333118279774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,float16,0,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,64,128,1,fp8,fp8,0,0.034458667039871216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,fp8,0,0.03383466601371765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,64,0,1,fp8,fp8,0,0.0329066663980484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,float16,0,0.027952000498771667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,float16,0,0.034373333056767784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,fp8,0,0.02807466685771942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,64,128,1,fp8,fp8,0,0.03440533330043157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,fp8,0,0.03404266635576884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,64,0,1,fp8,fp8,0,0.032730666299661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,float16,0,0.32972800731658936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,float16,0,0.3689546585083008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,fp8,0,0.3278719981511434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,64,128,1,fp8,fp8,0,0.43885334332784015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,fp8,0,0.37038934230804443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,64,0,1,fp8,fp8,0,0.3429439862569173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,float16,0,0.3349813222885132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,float16,0,0.37757333119710285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,fp8,0,0.3340640068054199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,64,0,1,fp8,fp8,0,0.3516853253046672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,64,128,1,fp8,fp8,0,0.44305066267649335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,fp8,0,0.37778135140736896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,float16,0,0.1923840045928955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,64,0,1,fp8,fp8,0,0.1895786722501119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,float16,0,0.21129600207010904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,fp8,0,0.18898133436838785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,64,128,1,fp8,fp8,0,0.24860266844431558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,fp8,0,0.2121280034383138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,float16,0,0.20014933745066324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,float16,0,0.17898666858673096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,fp8,0,0.17765865723292032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,fp8,0,0.1822133262952169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,64,128,1,fp8,fp8,0,0.23894399404525757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,fp8,0,0.1991306742032369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,64,0,1,fp8,fp8,0,0.1811786691347758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,float16,0,0.1826080083847046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,float16,0,0.2055786649386088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,64,128,1,fp8,fp8,0,0.24267200628916422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,fp8,0,0.20429333051045737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,64,0,1,fp8,fp8,0,0.1839253306388855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,float16,0,0.10889599720637004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,float16,0,0.11317333579063416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,fp8,0,0.10646933317184448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,64,128,1,fp8,fp8,0,0.14328533411026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,fp8,0,0.11169067025184631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,64,0,1,fp8,fp8,0,0.10596266388893127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,float16,0,0.09986666838328044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,float16,0,0.10373333096504211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,float16,0,0.10594666997591655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,fp8,0,0.10056533416112264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,64,128,1,fp8,fp8,0,0.13457600275675455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,fp8,0,0.10533333818117778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,64,0,1,fp8,fp8,0,0.0995253324508667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,float16,0,0.10734400153160095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,fp8,0,0.10345600048700969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,64,128,1,fp8,fp8,0,0.13740266362826029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,fp8,0,0.10889066259066264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,64,0,1,fp8,fp8,0,0.10122133294741313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,64,0,1,fp8,fp8,0,0.062074666221936546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,float16,0,0.06357333560784657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,float16,0,0.06387199958165486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,fp8,0,0.06262933214505513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,64,128,1,fp8,fp8,0,0.08629866441090901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,fp8,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,64,0,1,fp8,fp8,0,0.05713599920272827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,float16,0,0.060218666990598045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,float16,0,0.061119998494784035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,fp8,0,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,64,128,1,fp8,fp8,0,0.07659199833869934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,fp8,0,0.06121066709359487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,float16,0,0.06149866680304209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,float16,0,0.06170133252938589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,float16,0,0.04031466692686081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,fp8,0,0.06061866879463196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,64,128,1,fp8,fp8,0,0.07843199868996938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,fp8,0,0.06170133252938589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,64,0,1,fp8,fp8,0,0.05842666824658712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,float16,0,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,fp8,0,0.04219200213750204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,64,128,1,fp8,fp8,0,0.052757332722345986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,fp8,0,0.040522667268911995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,64,0,1,fp8,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,float16,0,0.04032533367474874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,float16,0,0.03931200007597605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,fp8,0,0.04098666707674662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,64,128,1,fp8,fp8,0,0.05116266508897146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,fp8,0,0.03924266745646795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,64,0,1,fp8,fp8,0,0.03644266724586487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,float16,0,0.04075733323891958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,float16,0,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,float16,0,0.027727998793125153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,fp8,0,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,64,128,1,fp8,fp8,0,0.0521919975678126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,fp8,0,0.038719999293486275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,64,0,1,fp8,fp8,0,0.037045332292715706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,float16,0,0.030293333033720653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,fp8,0,0.028346667687098186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,64,128,1,fp8,fp8,0,0.035114665826161705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,fp8,0,0.031285333136717476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,64,0,1,fp8,fp8,0,0.02998399982849757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,float16,0,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,float16,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,64,128,1,fp8,fp8,0,0.034245334565639496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,fp8,0,0.028010666370391846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,fp8,0,0.029818666477998097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,64,0,1,fp8,fp8,0,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,float16,0,0.02773333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,float16,0,0.029818666477998097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,64,128,1,fp8,fp8,0,0.0341333324710528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,fp8,0,0.029813334345817566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,64,0,1,fp8,fp8,0,0.028794666131337483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,float16,0,0.025557334224383037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,float16,0,0.02779199928045273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,fp8,0,0.02628266563018163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,64,128,1,fp8,fp8,0,0.03241066634654999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,fp8,0,0.02826133370399475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,64,0,1,fp8,fp8,0,0.026954665780067444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,float16,0,0.024800000091393787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,float16,0,0.027877333263556164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,64,128,1,fp8,fp8,0,0.03213333338499069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,fp8,0,0.028373333315054577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,64,0,1,fp8,fp8,0,0.026922665536403656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,float16,0,0.02571733295917511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,float16,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,fp8,0,0.025983999172846477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,64,128,1,fp8,fp8,0,0.03199466566244761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,fp8,0,0.028421332438786823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,64,0,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,float16,0,0.024656000236670177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,fp8,0,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,64,128,1,fp8,fp8,0,0.03182933231194814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,fp8,0,0.027679999669392902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,fp8,0,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,64,0,1,fp8,fp8,0,0.02624000112215678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,float16,0,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,64,128,1,fp8,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,64,128,1,fp8,fp8,0,0.03169066707293192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,64,0,1,fp8,fp8,0,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,float16,0,0.024330665667851765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,float16,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,fp8,0,0.02773866554101308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,1,64,128,1,fp8,fp8,0,0.4116533199946086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,64,0,1,fp8,fp8,0,0.02606933315594991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,1,64,128,1,float16,float16,0,0.3163786729176839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,1,64,0,1,float16,float16,0,0.30805333455403644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,1,64,128,1,float16,fp8,0,0.3136319915453593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,1,64,0,1,float16,fp8,0,0.30595733722050983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,1,64,0,1,fp8,fp8,0,0.2818079988161723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,2,64,128,1,float16,float16,0,0.32381866375605267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,4,64,128,1,float16,float16,0,0.18383999665578207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,2,64,0,1,float16,float16,0,0.31357866525650024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,2,64,128,1,float16,fp8,0,0.3203306595484416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,2,64,128,1,fp8,fp8,0,0.4217333396275838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,2,64,0,1,float16,fp8,0,0.3129813273747762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,2,64,0,1,fp8,fp8,0,0.2854880094528198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,4,64,0,1,float16,float16,0,0.17798399925231934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,4,64,128,1,float16,fp8,0,0.1816693345705668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,4,64,128,1,fp8,fp8,0,0.23721067110697427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,1,64,128,1,float16,fp8,0,0.16986133654912314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,4,64,0,1,float16,fp8,0,0.175818661848704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,4,64,0,1,fp8,fp8,0,0.15963733196258545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,1,64,128,1,float16,float16,0,0.17202132940292358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,1,64,0,1,float16,float16,0,0.16662399967511496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,1,64,128,1,fp8,fp8,0,0.22747733195622763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,1,64,0,1,float16,fp8,0,0.1647040049235026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,1,64,0,1,fp8,fp8,0,0.1518933375676473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,2,64,128,1,float16,float16,0,0.17558934291203818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,2,64,0,1,float16,float16,0,0.16962667306264242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,2,64,128,1,float16,fp8,0,0.17351466417312622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,2,64,128,1,fp8,fp8,0,0.22822932402292886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,2,64,0,1,float16,fp8,0,0.16954133907953897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,2,64,0,1,fp8,fp8,0,0.15474133690198263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,4,64,128,1,float16,float16,0,0.10430933038393657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,4,64,0,1,float16,float16,0,0.09856533010800679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,4,64,128,1,float16,fp8,0,0.10392000277837117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,4,64,128,1,fp8,fp8,0,0.13481600085894266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,4,64,0,1,float16,fp8,0,0.09672533472379048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,4,64,0,1,fp8,fp8,0,0.09084799885749817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,1,64,128,1,float16,float16,0,0.09570667147636414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,1,64,0,1,float16,float16,0,0.09126933415730794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,1,64,128,1,float16,fp8,0,0.09736532966295879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,1,64,128,1,fp8,fp8,0,0.13009066383043924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,1,64,0,1,float16,fp8,0,0.09185600280761719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,1,64,0,1,fp8,fp8,0,0.08362133304278056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,2,64,128,1,fp8,fp8,0,0.1302880048751831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,2,64,128,1,float16,float16,0,0.09830400347709656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,2,64,0,1,float16,float16,0,0.09366933504740398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,2,64,128,1,float16,fp8,0,0.09815466403961182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,4,64,128,1,float16,fp8,0,0.06098666787147522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,2,64,0,1,float16,fp8,0,0.09292266766230266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,2,64,0,1,fp8,fp8,0,0.08612266182899475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,4,64,128,1,float16,float16,0,0.06117333471775055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,4,64,0,1,float16,float16,0,0.05715199808279673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,4,64,128,1,fp8,fp8,0,0.0798773318529129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,1,64,128,1,float16,fp8,0,0.05727999905745188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,4,64,0,1,float16,fp8,0,0.0572266678015391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,4,64,0,1,fp8,fp8,0,0.052255998055140175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,1,64,128,1,float16,float16,0,0.057477335135142006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,1,64,0,1,float16,float16,0,0.05355200171470642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,1,64,128,1,fp8,fp8,0,0.07444266478220622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,1,64,0,1,float16,fp8,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,1,64,0,1,fp8,fp8,0,0.04868799944718679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,2,64,128,1,float16,float16,0,0.05886933207511902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,2,64,0,1,float16,float16,0,0.053690666953722634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,2,64,128,1,float16,fp8,0,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,2,64,128,1,fp8,fp8,0,0.07608533402283986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,2,64,0,1,float16,fp8,0,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,2,64,0,1,fp8,fp8,0,0.05037866532802582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,4,64,128,1,float16,float16,0,0.041237334410349526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,4,64,0,1,float16,float16,0,0.03623999903599421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,4,64,128,1,float16,fp8,0,0.04221866528193156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,1,64,0,1,float16,float16,0,0.03409600009520849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,4,64,128,1,fp8,fp8,0,0.05268799761931101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,4,64,0,1,float16,fp8,0,0.036277333895365395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,4,64,0,1,fp8,fp8,0,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,1,64,0,1,fp8,fp8,0,0.032058666149775185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,1,64,128,1,float16,float16,0,0.040175999204317726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,1,64,128,1,float16,fp8,0,0.04015466570854187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,1,64,128,1,fp8,fp8,0,0.051258668303489685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,1,64,0,1,float16,fp8,0,0.03429333368937174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,2,64,0,1,float16,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,2,64,128,1,float16,float16,0,0.04038399954636892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,2,64,0,1,float16,float16,0,0.03515733281771342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,2,64,128,1,float16,fp8,0,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,2,64,128,1,fp8,fp8,0,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,2,64,0,1,fp8,fp8,0,0.03263466556866964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,4,64,128,1,float16,float16,0,0.027850667635599773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,4,64,0,1,float16,float16,0,0.026362667481104534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,4,64,128,1,float16,fp8,0,0.027802666028340656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,4,64,128,1,fp8,fp8,0,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,4,64,0,1,float16,fp8,0,0.02640533447265625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,4,64,0,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,1,64,128,1,float16,float16,0,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,1,64,0,1,float16,float16,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,1,64,128,1,float16,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,1,64,128,1,fp8,fp8,0,0.034202667574087776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,1,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,1,64,0,1,fp8,fp8,0,0.024154665569464367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,2,64,128,1,float16,float16,0,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,2,64,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,2,64,128,1,float16,fp8,0,0.02811199923356374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,4,64,128,1,fp8,fp8,0,0.032618666688601174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,2,64,128,1,fp8,fp8,0,0.034602666894594826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,2,64,0,1,float16,fp8,0,0.025797332326571148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,2,64,0,1,fp8,fp8,0,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,4,64,128,1,float16,float16,0,0.02489600082238515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,4,64,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,4,64,128,1,float16,fp8,0,0.026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,4,64,0,1,float16,fp8,0,0.023941333095232647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,4,64,0,1,fp8,fp8,0,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,1,64,128,1,float16,float16,0,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,1,64,0,1,float16,float16,0,0.02258133391539256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,1,64,128,1,float16,fp8,0,0.025653332471847534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,1,64,128,1,fp8,fp8,0,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,1,64,0,1,float16,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,1,64,0,1,fp8,fp8,0,0.022672000030676525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,2,64,128,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,2,64,0,1,float16,float16,0,0.02386133372783661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,2,64,128,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,2,64,128,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,2,64,0,1,float16,fp8,0,0.024090667565663654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,2,64,0,1,fp8,fp8,0,0.022309333086013794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,4,64,128,1,float16,float16,0,0.02481599897146225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,4,64,0,1,float16,float16,0,0.02269333352645238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,4,64,128,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,4,64,128,1,fp8,fp8,0,0.03182400017976761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,4,64,0,1,float16,fp8,0,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,4,64,0,1,fp8,fp8,0,0.022570667167504627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,1,64,128,1,float16,float16,0,0.024666666984558105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,1,64,0,1,float16,float16,0,0.02203733225663503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,1,64,128,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,2,64,128,1,float16,fp8,0,0.024735999604066212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,1,64,128,1,fp8,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,1,64,0,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,1,64,0,1,fp8,fp8,0,0.02197866638501485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,2,64,128,1,float16,float16,0,0.023984000086784363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,2,64,0,1,float16,float16,0,0.022511998812357586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,2,64,128,1,fp8,fp8,0,0.031018666923046112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,2,64,0,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,2,64,0,1,fp8,fp8,0,0.02187199890613556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,4,64,128,1,float16,float16,0,0.024133334557215374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,4,64,0,1,float16,float16,0,0.022341333329677582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,4,64,128,1,float16,fp8,0,0.02478400121132533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,4,64,128,1,fp8,fp8,0,0.030458666384220123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,4,64,0,1,float16,fp8,0,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,4,64,0,1,fp8,fp8,0,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,1,64,128,1,float16,float16,0,0.023573334018389385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,1,64,0,1,float16,float16,0,0.02160533269246419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,1,64,128,1,float16,fp8,0,0.024608001112937927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,1,64,128,1,fp8,fp8,0,0.03108799954255422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,1,64,0,1,float16,fp8,0,0.021856000026067097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,1,64,0,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,2,64,128,1,float16,float16,0,0.02422400067249934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,2,64,0,1,fp8,fp8,0,0.02045866722861926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,2,64,0,1,float16,float16,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,2,64,128,1,float16,fp8,0,0.024085332949956257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,2,64,128,1,fp8,fp8,0,0.03068800022204717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,2,64,0,1,float16,fp8,0,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,1,64,128,1,fp8,fp8,0,0.18716265757878622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,1,64,128,1,float16,float16,0,0.14421332875887552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,1,64,0,1,float16,float16,0,0.14056533575057983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,1,64,128,1,float16,fp8,0,0.14364266395568848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,1,64,0,1,float16,fp8,0,0.14061333735783896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,1,64,0,1,fp8,fp8,0,0.13266666730244955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,2,64,128,1,float16,float16,0,0.151418666044871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,2,64,0,1,float16,float16,0,0.148799995581309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,2,64,128,1,float16,fp8,0,0.14907733599344888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,2,64,128,1,fp8,fp8,0,0.18864534298578897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,2,64,0,1,float16,fp8,0,0.14601066708564758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,2,64,0,1,fp8,fp8,0,0.1343946655591329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,4,64,128,1,float16,float16,0,0.09041600426038106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,4,64,0,1,float16,float16,0,0.0897173285484314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,4,64,128,1,float16,fp8,0,0.0892693301041921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,4,64,128,1,fp8,fp8,0,0.10791466633478801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,4,64,0,1,float16,fp8,0,0.08753599723180135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,4,64,0,1,fp8,fp8,0,0.08075733482837677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,1,64,0,1,fp8,fp8,0,0.07666133344173431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,1,64,128,1,float16,float16,0,0.07667733232180278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,1,64,0,1,float16,float16,0,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,1,64,128,1,float16,fp8,0,0.07618133227030437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,2,64,128,1,fp8,fp8,0,0.10283733407656352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,1,64,128,1,fp8,fp8,0,0.102101335922877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,1,64,0,1,float16,fp8,0,0.07516799867153168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,2,64,128,1,float16,float16,0,0.08018133540948232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,2,64,0,1,float16,float16,0,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,2,64,128,1,float16,fp8,0,0.07940799991289775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,2,64,0,1,float16,fp8,0,0.07723199824492137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,2,64,0,1,fp8,fp8,0,0.0775679995616277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,4,64,128,1,float16,float16,0,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,4,64,0,1,float16,float16,0,0.04640533526738485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,4,64,128,1,float16,fp8,0,0.047413334250450134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,4,64,128,1,fp8,fp8,0,0.06249066690603892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,4,64,0,1,float16,fp8,0,0.04644800225893656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,4,64,0,1,fp8,fp8,0,0.04818666477998098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,1,64,128,1,float16,float16,0,0.042165334026018776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,1,64,0,1,float16,float16,0,0.04232533276081085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,1,64,128,1,float16,fp8,0,0.04388799766699473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,1,64,128,1,fp8,fp8,0,0.05715199808279673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,1,64,0,1,float16,fp8,0,0.042405332128206887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,1,64,0,1,fp8,fp8,0,0.04410133262475332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,2,64,128,1,float16,float16,0,0.04437866806983948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,2,64,0,1,float16,float16,0,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,2,64,128,1,float16,fp8,0,0.04318400224049886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,2,64,128,1,fp8,fp8,0,0.058133333921432495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,2,64,0,1,float16,fp8,0,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,2,64,0,1,fp8,fp8,0,0.044997334480285645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,4,64,128,1,float16,float16,0,0.03412266572316488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,4,64,0,1,float16,float16,0,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,4,64,128,1,float16,fp8,0,0.03402666747570038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,4,64,128,1,fp8,fp8,0,0.03700799991687139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,4,64,0,1,float16,fp8,0,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,4,64,0,1,fp8,fp8,0,0.0306986669699351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,1,64,0,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,1,64,128,1,float16,float16,0,0.032138665517171226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,1,64,0,1,float16,float16,0,0.031850665807724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,1,64,128,1,float16,fp8,0,0.03199466566244761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,1,64,128,1,fp8,fp8,0,0.03631466627120972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,2,64,0,1,fp8,fp8,0,0.03017599880695343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,1,64,0,1,float16,fp8,0,0.03207999964555105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,2,64,128,1,float16,float16,0,0.0330079992612203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,2,64,0,1,float16,float16,0,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,2,64,128,1,float16,fp8,0,0.03306133300065994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,2,64,128,1,fp8,fp8,0,0.03623999903599421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,2,64,0,1,float16,fp8,0,0.032560000816980995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,4,64,128,1,float16,float16,0,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,4,64,0,1,float16,float16,0,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,4,64,128,1,float16,fp8,0,0.022672000030676525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,4,64,128,1,fp8,fp8,0,0.026608000199000042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,4,64,0,1,float16,fp8,0,0.0220266655087471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,4,64,0,1,fp8,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,1,64,128,1,float16,float16,0,0.02070933332045873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,1,64,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,1,64,128,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,1,64,128,1,fp8,fp8,0,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,1,64,0,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,1,64,0,1,fp8,fp8,0,0.022304000953833263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,2,64,128,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,2,64,0,1,float16,float16,0,0.02160533269246419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,2,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,2,64,128,1,fp8,fp8,0,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,2,64,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,2,64,0,1,fp8,fp8,0,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,4,64,128,1,float16,float16,0,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,4,64,0,1,float16,float16,0,0.018122666825850803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,4,64,128,1,float16,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,4,64,128,1,fp8,fp8,0,0.024080000817775726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,4,64,0,1,float16,fp8,0,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,4,64,0,1,fp8,fp8,0,0.020101333657900494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,1,64,128,1,float16,float16,0,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,1,64,0,1,float16,float16,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,1,64,128,1,float16,fp8,0,0.017957333475351334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,1,64,128,1,fp8,fp8,0,0.023749334116776783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,1,64,0,1,float16,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,1,64,0,1,fp8,fp8,0,0.02053333322207133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,2,64,128,1,float16,float16,0,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,2,64,0,1,float16,float16,0,0.01773333301146825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,2,64,128,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,2,64,128,1,fp8,fp8,0,0.023999998966852825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,2,64,0,1,float16,fp8,0,0.01812800019979477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,2,64,0,1,fp8,fp8,0,0.02042666698495547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,4,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,4,64,0,1,float16,float16,0,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,4,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,4,64,128,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,4,64,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,4,64,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,1,64,128,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,1,64,0,1,float16,float16,0,0.016336000214020412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,1,64,128,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,1,64,128,1,fp8,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,1,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,2,64,128,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,2,64,0,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,1,64,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,2,64,128,1,float16,float16,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,2,64,0,1,float16,float16,0,0.016666666915019352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,2,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,2,64,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,4,64,0,1,fp8,fp8,0,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,4,64,128,1,float16,float16,0,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,4,64,0,1,float16,float16,0,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,4,64,128,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,4,64,128,1,fp8,fp8,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,4,64,0,1,float16,fp8,0,0.016336000214020412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,1,64,128,1,float16,float16,0,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,1,64,0,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,1,64,128,1,float16,fp8,0,0.016650666793187458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,1,64,128,1,fp8,fp8,0,0.0227360005180041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,1,64,0,1,float16,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,1,64,0,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,2,64,128,1,float16,float16,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,2,64,0,1,float16,float16,0,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,2,64,128,1,float16,fp8,0,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,2,64,128,1,fp8,fp8,0,0.022837333381175995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,2,64,0,1,float16,fp8,0,0.016399999459584553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,2,64,0,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,4,64,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,4,64,128,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,4,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,4,64,128,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,4,64,128,1,fp8,fp8,0,0.02195200075705846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,4,64,0,1,float16,fp8,0,0.01613333324591319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,1,64,128,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,1,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,1,64,128,1,float16,fp8,0,0.01617066686352094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,1,64,128,1,fp8,fp8,0,0.022341333329677582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,1,64,0,1,float16,fp8,0,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,1,64,0,1,fp8,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,2,64,128,1,float16,float16,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,2,64,0,1,float16,float16,0,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,2,64,128,1,float16,fp8,0,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,2,64,128,1,fp8,fp8,0,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,2,64,0,1,float16,fp8,0,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,2,64,0,1,fp8,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,1,64,128,1,float16,float16,0,0.05871999760468801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,1,64,0,1,float16,float16,0,0.05807999769846598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,1,64,128,1,float16,fp8,0,0.05824000140031179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,1,64,128,1,fp8,fp8,0,0.07525866727034251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,1,64,0,1,float16,fp8,0,0.05750399827957153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,1,64,0,1,fp8,fp8,0,0.07446399827798207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,2,64,128,1,float16,float16,0,0.062319998939832054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,2,64,0,1,float16,float16,0,0.06249066690603892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,2,64,128,1,float16,fp8,0,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,2,64,128,1,fp8,fp8,0,0.07710400223731995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,2,64,0,1,float16,fp8,0,0.06155199805895487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,2,64,0,1,fp8,fp8,0,0.07718933125336964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,4,64,128,1,float16,float16,0,0.03875733415285746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,4,64,0,1,float16,float16,0,0.03875733415285746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,4,64,128,1,float16,fp8,0,0.03688533355792364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,4,64,128,1,fp8,fp8,0,0.048122664292653404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,4,64,0,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,4,64,0,1,fp8,fp8,0,0.047770669062932335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,1,64,128,1,float16,float16,0,0.03409600009520849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,1,64,0,1,float16,float16,0,0.03389333436886469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,1,64,128,1,float16,fp8,0,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,1,64,128,1,fp8,fp8,0,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,1,64,0,1,float16,fp8,0,0.03404266635576884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,1,64,0,1,fp8,fp8,0,0.044112001856168113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,2,64,128,1,float16,float16,0,0.03607466568549474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,2,64,0,1,float16,float16,0,0.035216001172860466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,2,64,128,1,float16,fp8,0,0.035173334181308746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,2,64,128,1,fp8,fp8,0,0.045279999574025474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,2,64,0,1,float16,fp8,0,0.035455999275048576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,2,64,0,1,fp8,fp8,0,0.045935998360315956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,4,64,128,1,float16,float16,0,0.025642665723959606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,4,64,0,1,float16,float16,0,0.026330667237440746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,4,64,128,1,float16,fp8,0,0.0262773334980011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,4,64,128,1,fp8,fp8,0,0.03048533449570338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,4,64,0,1,float16,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,4,64,0,1,fp8,fp8,0,0.03044266750415166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,1,64,128,1,float16,float16,0,0.024586667617162068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,1,64,0,1,float16,float16,0,0.024245334168275196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,1,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,1,64,128,1,fp8,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,1,64,0,1,float16,fp8,0,0.024527999262015026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,1,64,0,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,2,64,128,1,float16,float16,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,2,64,0,1,float16,float16,0,0.02492800106604894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,2,64,128,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,2,64,128,1,fp8,fp8,0,0.02980799973011017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,2,64,0,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,4,64,128,1,fp8,fp8,0,0.022490667800108593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,2,64,0,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,4,64,0,1,fp8,fp8,0,0.022170667846997578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,1,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,1,64,0,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,4,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,4,64,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,4,64,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,2,64,128,1,float16,float16,0,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,4,64,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,1,64,128,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,1,64,128,1,fp8,fp8,0,0.021930667261282604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,1,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,1,64,0,1,fp8,fp8,0,0.02207999924818675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,2,64,0,1,float16,float16,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,4,64,128,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,4,64,0,1,float16,float16,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,2,64,128,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,2,64,128,1,fp8,fp8,0,0.022277332842350006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,2,64,0,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,2,64,0,1,fp8,fp8,0,0.022277332842350006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,4,64,128,1,float16,fp8,0,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,1,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,4,64,128,1,fp8,fp8,0,0.02021866664290428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,4,64,0,1,float16,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,4,64,0,1,fp8,fp8,0,0.02250666668017705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,1,64,128,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,1,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,1,64,128,1,fp8,fp8,0,0.020442667106787365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,1,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,1,64,0,1,fp8,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,2,64,128,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,4,64,128,1,float16,float16,0,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,2,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,4,64,128,1,float16,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,2,64,128,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,2,64,128,1,fp8,fp8,0,0.020714666694402695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,2,64,0,1,float16,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,2,64,0,1,fp8,fp8,0,0.020703999946514767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,4,64,0,1,float16,float16,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,4,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,4,64,0,1,float16,fp8,0,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,4,64,0,1,fp8,fp8,0,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,1,64,128,1,float16,float16,0,0.013989333063364029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,2,64,128,1,float16,float16,0,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,1,64,0,1,float16,float16,0,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,1,64,128,1,float16,fp8,0,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,1,64,128,1,fp8,fp8,0,0.019738666713237762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,1,64,0,1,float16,fp8,0,0.014607999473810196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,1,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,2,64,0,1,float16,float16,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,2,64,128,1,float16,fp8,0,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,2,64,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,4,64,128,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,2,64,0,1,float16,fp8,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,1,64,128,1,float16,float16,0,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,2,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,4,64,128,1,float16,float16,0,0.014032000054915747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,4,64,0,1,float16,float16,0,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,4,64,128,1,float16,fp8,0,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,4,64,0,1,float16,fp8,0,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,4,64,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,1,64,0,1,float16,float16,0,0.013621332744757334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,1,64,128,1,float16,fp8,0,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,1,64,128,1,fp8,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,1,64,0,1,float16,fp8,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,1,64,0,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,2,64,128,1,float16,float16,0,0.013781332721312841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,2,64,0,1,float16,float16,0,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,2,64,128,1,float16,fp8,0,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,2,64,128,1,fp8,fp8,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,2,64,0,1,float16,fp8,0,0.014325333138306936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,2,64,0,1,fp8,fp8,0,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,4,64,128,1,float16,float16,0,0.013562666873137156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,4,64,0,1,float16,float16,0,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,4,64,128,1,float16,fp8,0,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,4,64,128,1,fp8,fp8,0,0.018602666755517323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,4,64,0,1,float16,fp8,0,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,4,64,0,1,fp8,fp8,0,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,1,64,128,1,float16,float16,0,0.013674666484196981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,1,64,0,1,float16,float16,0,0.01379199946920077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,1,64,128,1,float16,fp8,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,1,64,128,1,fp8,fp8,0,0.018218666315078735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,1,64,0,1,float16,fp8,0,0.014032000054915747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,1,64,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,2,64,128,1,float16,float16,0,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,2,64,0,1,float16,float16,0,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,2,64,128,1,float16,fp8,0,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,2,64,128,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,2,64,0,1,float16,fp8,0,0.014256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,2,64,0,1,fp8,fp8,0,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,4,64,128,1,float16,float16,0,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,4,64,0,1,float16,float16,0,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,4,64,128,1,float16,fp8,0,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,4,64,128,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,1,64,128,1,float16,fp8,0,0.013557333499193192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,4,64,0,1,float16,fp8,0,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,1,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,4,64,0,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,1,64,128,1,float16,float16,0,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,1,64,0,1,float16,float16,0,0.013466666142145792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,1,64,128,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,1,64,0,1,fp8,fp8,0,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,2,64,128,1,float16,float16,0,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,2,64,0,1,float16,float16,0,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,2,64,128,1,float16,fp8,0,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,2,64,128,1,fp8,fp8,0,0.018453333526849747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,2,64,0,1,float16,fp8,0,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,2,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,1,64,128,1,float16,float16,0,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,1,64,0,1,float16,float16,0,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,1,64,128,1,float16,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,1,64,128,1,fp8,fp8,0,0.0554613322019577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,1,64,0,1,float16,fp8,0,0.033173332611719765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,1,64,0,1,fp8,fp8,0,0.05550399919350942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,2,64,128,1,float16,float16,0,0.034128000338872276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,2,64,0,1,float16,float16,0,0.03401600072781245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,2,64,128,1,float16,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,2,64,128,1,fp8,fp8,0,0.05745600163936615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,2,64,0,1,float16,fp8,0,0.033743999898433685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,2,64,0,1,fp8,fp8,0,0.057328000664711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,4,64,128,1,float16,float16,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,4,64,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,4,64,0,1,fp8,fp8,0,0.036101333796978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,4,64,128,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,4,64,128,1,fp8,fp8,0,0.03589333345492681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,4,64,0,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,1,64,128,1,float16,float16,0,0.022197333474953968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,1,64,0,1,float16,float16,0,0.02229333420594533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,1,64,128,1,float16,fp8,0,0.02257599929968516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,1,64,128,1,fp8,fp8,0,0.035002666215101876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,1,64,0,1,float16,fp8,0,0.022815999885400135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,1,64,0,1,fp8,fp8,0,0.03487999985615412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,2,64,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,2,64,0,1,float16,float16,0,0.022485333184401195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,2,64,128,1,float16,fp8,0,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,2,64,128,1,fp8,fp8,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,2,64,0,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,2,64,0,1,fp8,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,4,64,128,1,float16,float16,0,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,4,64,0,1,float16,float16,0,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,4,64,128,1,float16,fp8,0,0.018320000420014065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,4,64,128,1,fp8,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,4,64,0,1,float16,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,4,64,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,1,64,128,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,1,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,1,64,128,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,1,64,128,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,1,64,0,1,float16,fp8,0,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,1,64,0,1,fp8,fp8,0,0.02463999887307485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,2,64,128,1,float16,float16,0,0.01777600000301997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,2,64,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,2,64,128,1,float16,fp8,0,0.018090666582187016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,2,64,128,1,fp8,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,2,64,0,1,float16,fp8,0,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,2,64,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,4,64,128,1,float16,float16,0,0.014607999473810196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,4,64,0,1,float16,float16,0,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,4,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,4,64,128,1,fp8,fp8,0,0.020634666085243225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,4,64,0,1,float16,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,4,64,0,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,1,64,128,1,float16,float16,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,1,64,0,1,float16,float16,0,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,1,64,128,1,float16,fp8,0,0.014629332969586054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,1,64,128,1,fp8,fp8,0,0.02019199977318446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,1,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,1,64,0,1,fp8,fp8,0,0.02027733375628789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,2,64,128,1,float16,float16,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,2,64,0,1,float16,float16,0,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,2,64,128,1,float16,fp8,0,0.014629332969586054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,4,64,128,1,float16,fp8,0,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,2,64,128,1,fp8,fp8,0,0.020453333854675293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,4,64,0,1,float16,fp8,0,0.013338666409254074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,2,64,0,1,float16,fp8,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,2,64,0,1,fp8,fp8,0,0.020495999604463577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,4,64,128,1,float16,float16,0,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,4,64,0,1,float16,float16,0,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,4,64,128,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,4,64,0,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,1,64,128,1,float16,float16,0,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,1,64,0,1,float16,float16,0,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,1,64,128,1,float16,fp8,0,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,1,64,128,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,1,64,0,1,float16,fp8,0,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,1,64,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,2,64,0,1,float16,fp8,0,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,2,64,128,1,float16,float16,0,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,2,64,0,1,float16,float16,0,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,2,64,128,1,float16,fp8,0,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,2,64,128,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,2,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,4,64,0,1,float16,fp8,0,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,4,64,128,1,float16,float16,0,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,4,64,0,1,float16,float16,0,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,4,64,128,1,float16,fp8,0,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,4,64,128,1,fp8,fp8,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,4,64,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,1,64,128,1,float16,float16,0,0.012282667060693106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,1,64,0,1,float16,float16,0,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,1,64,128,1,float16,fp8,0,0.013370666652917862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,1,64,128,1,fp8,fp8,0,0.0186666672428449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,1,64,0,1,float16,fp8,0,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,1,64,0,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,2,64,128,1,float16,float16,0,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,2,64,0,1,float16,float16,0,0.012469333906968435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,2,64,128,1,float16,fp8,0,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,2,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,2,64,0,1,float16,fp8,0,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,2,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,4,64,128,1,float16,float16,0,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,4,64,0,1,float16,float16,0,0.012538666526476542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,4,64,128,1,float16,fp8,0,0.01246400053302447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,4,64,128,1,fp8,fp8,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,4,64,0,1,float16,fp8,0,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,4,64,0,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,1,64,128,1,float16,float16,0,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,1,64,0,1,float16,float16,0,0.012479999413092932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,1,64,128,1,float16,fp8,0,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,1,64,128,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,1,64,0,1,float16,fp8,0,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,1,64,0,1,fp8,fp8,0,0.018672000616788864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,2,64,0,1,fp8,fp8,0,0.018320000420014065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,2,64,128,1,float16,float16,0,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,2,64,0,1,float16,float16,0,0.012543999900420507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,2,64,128,1,float16,fp8,0,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,2,64,128,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,2,64,0,1,float16,fp8,0,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,4,64,128,1,float16,float16,0,0.01221866657336553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,4,64,0,1,float16,float16,0,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,4,64,128,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,4,64,128,1,fp8,fp8,0,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,4,64,0,1,float16,fp8,0,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,4,64,0,1,fp8,fp8,0,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,1,64,128,1,float16,float16,0,0.012335999558369318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,1,64,0,1,float16,float16,0,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,1,64,128,1,float16,fp8,0,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,1,64,128,1,fp8,fp8,0,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,1,64,0,1,float16,fp8,0,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,1,64,0,1,fp8,fp8,0,0.018133333573738735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,2,64,128,1,float16,float16,0,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,2,64,0,1,float16,float16,0,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,2,64,128,1,float16,fp8,0,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,2,64,128,1,fp8,fp8,0,0.018191999445358913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,2,64,0,1,float16,fp8,0,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,2,64,0,1,fp8,fp8,0,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,4,64,128,1,float16,float16,0,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,4,64,0,1,float16,float16,0,0.011978667229413986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,4,64,128,1,float16,fp8,0,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,1,64,128,1,float16,fp8,0,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,4,64,128,1,fp8,fp8,0,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,4,64,0,1,float16,fp8,0,0.012335999558369318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,4,64,0,1,fp8,fp8,0,0.01851733277241389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,1,64,128,1,float16,float16,0,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,1,64,0,1,float16,float16,0,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,1,64,128,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,1,64,0,1,float16,fp8,0,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,1,64,0,1,fp8,fp8,0,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,2,64,128,1,float16,float16,0,0.012378666549921036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,2,64,0,1,float16,float16,0,0.012495999534924826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,1,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,2,64,128,1,float16,fp8,0,0.012522666404644648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,2,64,128,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,2,64,0,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,2,64,0,1,fp8,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,1,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,1,64,128,1,float16,fp8,0,0.023845332364241283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,1,64,128,1,fp8,fp8,0,0.048570667703946434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,1,64,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,1,64,0,1,fp8,fp8,0,0.04822400212287903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,2,64,128,1,float16,float16,0,0.02418133368094762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,2,64,0,1,float16,float16,0,0.02404800057411194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,2,64,128,1,float16,fp8,0,0.023813332120577495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,2,64,128,1,fp8,fp8,0,0.048751999934514366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,2,64,0,1,float16,fp8,0,0.024245334168275196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,2,64,0,1,fp8,fp8,0,0.048938666780789696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,4,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,4,64,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,4,64,128,1,float16,fp8,0,0.017583999782800674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,4,64,128,1,fp8,fp8,0,0.032085334261258446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,4,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,4,64,0,1,fp8,fp8,0,0.03243733445803324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,1,64,128,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,1,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,1,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,1,64,128,1,fp8,fp8,0,0.031727999448776245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,1,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,1,64,0,1,fp8,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,2,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,2,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,2,64,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,2,64,128,1,fp8,fp8,0,0.031930667658646904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,2,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,2,64,0,1,fp8,fp8,0,0.03105599929889043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,4,64,128,1,float16,float16,0,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,4,64,0,1,float16,float16,0,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,4,64,128,1,float16,fp8,0,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,4,64,128,1,fp8,fp8,0,0.026554666459560394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,4,64,0,1,float16,fp8,0,0.01443733274936676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,4,64,0,1,fp8,fp8,0,0.024138666689395905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,1,64,128,1,float16,float16,0,0.014085333794355392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,1,64,0,1,float16,float16,0,0.014165333161751429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,1,64,128,1,float16,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,1,64,128,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,1,64,0,1,float16,fp8,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,1,64,0,1,fp8,fp8,0,0.02372266600529353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,2,64,128,1,float16,float16,0,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,2,64,0,1,float16,float16,0,0.014495999862750372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,2,64,128,1,float16,fp8,0,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,2,64,128,1,fp8,fp8,0,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,2,64,0,1,float16,fp8,0,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,2,64,0,1,fp8,fp8,0,0.023749334116776783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,4,64,128,1,float16,float16,0,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,4,64,0,1,float16,float16,0,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,4,64,128,1,float16,fp8,0,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,4,64,128,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,4,64,0,1,float16,fp8,0,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,4,64,0,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,1,64,128,1,float16,float16,0,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,1,64,0,1,float16,float16,0,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,1,64,128,1,float16,fp8,0,0.013525333255529404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,1,64,128,1,fp8,fp8,0,0.019589333484570186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,1,64,0,1,float16,fp8,0,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,1,64,0,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,2,64,128,1,float16,float16,0,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,2,64,0,1,float16,float16,0,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,2,64,128,1,float16,fp8,0,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,2,64,128,1,fp8,fp8,0,0.02236266682545344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,2,64,0,1,float16,fp8,0,0.013546666751305262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,4,64,0,1,float16,fp8,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,2,64,0,1,fp8,fp8,0,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,4,64,128,1,float16,float16,0,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,4,64,0,1,float16,float16,0,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,4,64,128,1,float16,fp8,0,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,4,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,4,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,1,64,128,1,float16,float16,0,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,1,64,0,1,float16,float16,0,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,1,64,128,1,float16,fp8,0,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,1,64,128,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,1,64,0,1,float16,fp8,0,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,1,64,0,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,2,64,128,1,float16,float16,0,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,2,64,0,1,float16,float16,0,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,2,64,128,1,float16,fp8,0,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,2,64,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,2,64,0,1,float16,fp8,0,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,2,64,0,1,fp8,fp8,0,0.01850133389234543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,4,64,128,1,float16,float16,0,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,4,64,0,1,float16,float16,0,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,4,64,128,1,float16,fp8,0,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,4,64,128,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,4,64,0,1,float16,fp8,0,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,4,64,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,1,64,128,1,float16,float16,0,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,1,64,0,1,float16,float16,0,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,1,64,128,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,1,64,128,1,fp8,fp8,0,0.018458666900793713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,1,64,0,1,float16,fp8,0,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,1,64,0,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,2,64,128,1,float16,float16,0,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,2,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,2,64,128,1,float16,fp8,0,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,2,64,128,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,2,64,0,1,float16,fp8,0,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,2,64,0,1,fp8,fp8,0,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,4,64,128,1,float16,float16,0,0.011962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,4,64,0,1,float16,float16,0,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,4,64,128,1,float16,fp8,0,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,4,64,128,1,fp8,fp8,0,0.018346666047970455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,4,64,0,1,float16,fp8,0,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,4,64,0,1,fp8,fp8,0,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,1,64,128,1,float16,float16,0,0.01166933278242747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,1,64,0,1,float16,float16,0,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,1,64,128,1,float16,fp8,0,0.012618667135636011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,1,64,128,1,fp8,fp8,0,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,1,64,0,1,float16,fp8,0,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,1,64,0,1,fp8,fp8,0,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,2,64,128,1,float16,float16,0,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,2,64,0,1,float16,float16,0,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,2,64,128,1,float16,fp8,0,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,2,64,128,1,fp8,fp8,0,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,2,64,0,1,float16,fp8,0,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,4,64,128,1,fp8,fp8,0,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,2,64,0,1,fp8,fp8,0,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,4,64,128,1,float16,float16,0,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,4,64,0,1,float16,float16,0,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,4,64,128,1,float16,fp8,0,0.012378666549921036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,4,64,0,1,float16,fp8,0,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,4,64,0,1,fp8,fp8,0,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,1,64,128,1,float16,float16,0,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,1,64,0,1,float16,float16,0,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,1,64,128,1,float16,fp8,0,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,1,64,128,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,1,64,0,1,float16,fp8,0,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,1,64,0,1,fp8,fp8,0,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,2,64,128,1,float16,float16,0,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,2,64,0,1,float16,float16,0,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,2,64,128,1,float16,fp8,0,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,2,64,128,1,fp8,fp8,0,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,2,64,0,1,float16,fp8,0,0.012527999778588613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,2,64,0,1,fp8,fp8,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,4,64,128,1,float16,float16,0,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,4,64,0,1,float16,float16,0,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,4,64,128,1,float16,fp8,0,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,4,64,128,1,fp8,fp8,0,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,4,64,0,1,float16,fp8,0,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,4,64,0,1,fp8,fp8,0,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,1,64,128,1,float16,float16,0,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,1,64,0,1,float16,float16,0,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,1,64,128,1,float16,fp8,0,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,1,64,128,1,fp8,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,1,64,0,1,float16,fp8,0,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,1,64,0,1,fp8,fp8,0,0.017808000246683758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,2,64,128,1,float16,float16,0,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,2,64,0,1,float16,float16,0,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,2,64,128,1,float16,fp8,0,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,2,64,128,1,fp8,fp8,0,0.017770666629076004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,2,64,0,1,float16,fp8,0,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,2,64,0,1,fp8,fp8,0,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,1,64,128,1,float16,float16,0,0.020074666788180668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,1,64,0,1,float16,float16,0,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,2,64,128,1,float16,float16,0,0.02012266715367635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,2,64,0,1,float16,float16,0,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,1,64,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,1,64,128,1,fp8,fp8,0,0.04464533428351084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,1,64,0,1,float16,fp8,0,0.019941333681344986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,1,64,0,1,fp8,fp8,0,0.04487466812133789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,2,64,128,1,fp8,fp8,0,0.045141334335009255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,2,64,128,1,float16,fp8,0,0.02027200038234393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,2,64,0,1,float16,fp8,0,0.020293333878119785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,2,64,0,1,fp8,fp8,0,0.04674666623274485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,4,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,4,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,4,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,4,64,128,1,fp8,fp8,0,0.030229332546393078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,4,64,0,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,4,64,0,1,fp8,fp8,0,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,1,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,1,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,1,64,128,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,1,64,128,1,fp8,fp8,0,0.029738667110602062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,1,64,0,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,1,64,0,1,fp8,fp8,0,0.02985599885384242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,2,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,2,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,2,64,128,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,2,64,128,1,fp8,fp8,0,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,2,64,0,1,float16,fp8,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,2,64,0,1,fp8,fp8,0,0.030048000315825146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,4,64,128,1,float16,float16,0,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,4,64,0,1,float16,float16,0,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,4,64,128,1,float16,fp8,0,0.013616000612576803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,4,64,128,1,fp8,fp8,0,0.02274133265018463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,4,64,0,1,float16,fp8,0,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,4,64,0,1,fp8,fp8,0,0.022682666778564453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,1,64,128,1,float16,float16,0,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,1,64,0,1,float16,float16,0,0.013370666652917862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,1,64,128,1,float16,fp8,0,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,1,64,128,1,fp8,fp8,0,0.02290133386850357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,1,64,0,1,float16,fp8,0,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,1,64,0,1,fp8,fp8,0,0.022810667753219604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,2,64,128,1,float16,float16,0,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,2,64,0,1,float16,float16,0,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,4,64,0,1,float16,float16,0,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,2,64,128,1,float16,fp8,0,0.013679999858140945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,2,64,128,1,fp8,fp8,0,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,2,64,0,1,float16,fp8,0,0.013514666507641474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,4,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,1,64,128,1,float16,float16,0,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,2,64,0,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,4,64,128,1,float16,float16,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,4,64,128,1,float16,fp8,0,0.012437333663304647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,4,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,2,64,128,1,float16,float16,0,0.012229333321253458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,1,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,4,64,0,1,float16,fp8,0,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,1,64,0,1,float16,float16,0,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,1,64,128,1,float16,fp8,0,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,1,64,128,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,1,64,0,1,float16,fp8,0,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,2,64,0,1,float16,float16,0,0.01232533281048139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,2,64,128,1,float16,fp8,0,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,2,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,2,64,0,1,float16,fp8,0,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,2,64,0,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,4,64,128,1,float16,float16,0,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,4,64,0,1,float16,float16,0,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,4,64,128,1,float16,fp8,0,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,4,64,128,1,fp8,fp8,0,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,4,64,0,1,float16,fp8,0,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,4,64,0,1,fp8,fp8,0,0.01825599993268649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,1,64,128,1,float16,float16,0,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,1,64,0,1,float16,float16,0,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,1,64,128,1,float16,fp8,0,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,1,64,128,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,1,64,0,1,float16,fp8,0,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,1,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,2,64,128,1,float16,float16,0,0.012042666474978128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,2,64,0,1,float16,float16,0,0.012437333663304647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,2,64,128,1,float16,fp8,0,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,2,64,128,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,2,64,0,1,float16,fp8,0,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,2,64,0,1,fp8,fp8,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,4,64,128,1,float16,float16,0,0.011733333269755045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,4,64,0,1,float16,float16,0,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,4,64,128,1,float16,fp8,0,0.012293333808581034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,4,64,128,1,fp8,fp8,0,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,4,64,0,1,float16,fp8,0,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,4,64,0,1,fp8,fp8,0,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,1,64,128,1,float16,float16,0,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,1,64,0,1,float16,float16,0,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,1,64,128,1,float16,fp8,0,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,1,64,128,1,fp8,fp8,0,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,1,64,0,1,float16,fp8,0,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,1,64,0,1,fp8,fp8,0,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,2,64,128,1,float16,float16,0,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,2,64,0,1,float16,float16,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,2,64,128,1,float16,fp8,0,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,2,64,128,1,fp8,fp8,0,0.018298666924238205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,2,64,0,1,fp8,fp8,0,0.018191999445358913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,2,64,0,1,float16,fp8,0,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,4,64,128,1,float16,float16,0,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,4,64,0,1,float16,float16,0,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,4,64,128,1,float16,fp8,0,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,1,64,128,1,fp8,fp8,0,0.018346666047970455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,1,64,0,1,float16,fp8,0,0.01201066623131434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,4,64,128,1,fp8,fp8,0,0.01830400029818217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,4,64,0,1,float16,fp8,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,4,64,0,1,fp8,fp8,0,0.018165333817402523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,1,64,128,1,float16,float16,0,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,1,64,0,1,float16,float16,0,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,1,64,128,1,float16,fp8,0,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,1,64,0,1,fp8,fp8,0,0.01814933369557063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,2,64,128,1,float16,float16,0,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,2,64,0,1,float16,float16,0,0.012186666329701742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,2,64,128,1,float16,fp8,0,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,2,64,128,1,fp8,fp8,0,0.017887999614079792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,2,64,0,1,float16,fp8,0,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,2,64,0,1,fp8,fp8,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,4,64,0,1,fp8,fp8,0,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,4,64,128,1,float16,float16,0,0.011322667201360067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,4,64,0,1,float16,float16,0,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,4,64,128,1,float16,fp8,0,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,4,64,128,1,fp8,fp8,0,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,4,64,0,1,float16,fp8,0,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,1,64,128,1,float16,float16,0,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,1,64,0,1,float16,float16,0,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,1,64,128,1,float16,fp8,0,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,1,64,128,1,fp8,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,1,64,0,1,float16,fp8,0,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,1,64,0,1,fp8,fp8,0,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,2,64,128,1,float16,float16,0,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,2,64,0,1,float16,float16,0,0.011760000139474869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,2,64,128,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,2,64,128,1,fp8,fp8,0,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,2,64,0,1,float16,fp8,0,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,2,64,0,1,fp8,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,4,64,128,1,float16,float16,0,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,4,64,0,1,float16,float16,0,0.011535999675591787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,4,64,128,1,float16,fp8,0,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,4,64,128,1,fp8,fp8,0,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,4,64,0,1,float16,fp8,0,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,4,64,0,1,fp8,fp8,0,0.017877332866191864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,1,64,128,1,float16,float16,0,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,1,64,0,1,float16,float16,0,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,1,64,128,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,1,64,128,1,fp8,fp8,0,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,1,64,0,1,float16,fp8,0,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,1,64,0,1,fp8,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,2,64,128,1,float16,float16,0,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,2,64,0,1,float16,float16,0,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,2,64,128,1,float16,fp8,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,2,64,128,1,fp8,fp8,0,0.017829333742459614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,2,64,0,1,float16,fp8,0,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,2,64,0,1,fp8,fp8,0,0.017984000345071156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,float16,0,0.20913066466649374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,fp8,0,0.2078239917755127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,2,1,64,128,1,fp8,fp8,0,0.2765493392944336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,float16,0,1.2883626619974773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,float16,0,0.12449600299199422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,fp8,0,1.2874666849772136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,2,1,64,0,1,fp8,fp8,0,1.1026826699574788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,float16,0,0.6238506635030111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,fp8,0,0.12603200475374857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,2,64,128,1,fp8,fp8,0,0.16897066434224448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,fp8,0,0.6235733429590861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,2,64,0,1,fp8,fp8,0,0.5780160029729208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,float16,0,0.11716266473134358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,fp8,0,0.11806399623552959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,1,64,128,1,fp8,fp8,0,0.16132266322771707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,float16,0,0.6127093235651652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,fp8,0,0.6139039993286133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,1,64,0,1,fp8,fp8,0,0.5726293325424194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,float16,0,0.07585600018501282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,float16,0,0.32387200991312665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,fp8,0,0.07658666869004567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,2,64,128,1,fp8,fp8,0,0.1051626702149709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,fp8,0,0.32359466950098675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,2,64,0,1,fp8,fp8,0,0.3044480085372925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,float16,0,0.0732479989528656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,float16,0,0.32095466057459515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,fp8,0,0.07319466769695282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,1,64,128,1,fp8,fp8,0,0.09831466277440389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,fp8,0,0.3210560083389282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,1,64,0,1,fp8,fp8,0,0.2999573349952698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,float16,0,0.05686933298905691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,float16,0,0.17941333850224814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,fp8,0,0.18126932779947916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,fp8,0,0.05663466453552246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,2,64,128,1,fp8,fp8,0,0.06817066669464111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,2,64,0,1,fp8,fp8,0,0.16820265849431357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,float16,0,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,float16,0,0.18021865685780844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,fp8,0,0.05619200070699056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,1,64,0,1,fp8,fp8,0,0.16869332393010458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,1,64,128,1,fp8,fp8,0,0.06691733499368031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,fp8,0,0.17930134137471518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,float16,0,0.16204266746838888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,fp8,0,0.16130666931470236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,float16,0,0.7103466987609863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,2,1,64,128,1,fp8,fp8,0,0.2187839945157369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,float16,0,0.09781333804130554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,fp8,0,0.7126080195109049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,float16,0,0.36800531546274823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,fp8,0,0.09899200002352397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,2,1,64,0,1,fp8,fp8,0,0.6593120098114014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,2,64,128,1,fp8,fp8,0,0.13537599643071493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,fp8,0,0.37249600887298584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,float16,0,0.3656533161799113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,1,64,128,1,fp8,fp8,0,0.1288586656252543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,2,64,0,1,fp8,fp8,0,0.34881067276000977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,float16,0,0.09330133597056071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,fp8,0,0.09277866284052531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,fp8,0,0.36495999495188397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,float16,0,0.06069866816202799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,1,64,0,1,fp8,fp8,0,0.34410667419433594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,float16,0,0.20857600371042886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,fp8,0,0.06091199815273285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,2,64,128,1,fp8,fp8,0,0.084197332461675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,fp8,0,0.21090133984883627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,2,64,0,1,fp8,fp8,0,0.19533334175745645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,float16,0,0.059205333391825356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,float16,0,0.20746666193008423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,fp8,0,0.05931200087070465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,fp8,0,0.046757335464159645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,1,64,128,1,fp8,fp8,0,0.07691733539104462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,fp8,0,0.2085813283920288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,1,64,0,1,fp8,fp8,0,0.19312532742818198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,float16,0,0.045834665497144066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,float16,0,0.13737600048383078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,2,64,128,1,fp8,fp8,0,0.052416001756985985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,fp8,0,0.13782933354377747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,1,64,128,1,fp8,fp8,0,0.05235200126965841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,2,64,0,1,fp8,fp8,0,0.13045333822568259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,float16,0,0.04624533156553904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,float16,0,0.1376213332017263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,fp8,0,0.05012266834576925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,fp8,0,0.13774399956067404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,1,64,0,1,fp8,fp8,0,0.130213330189387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,float16,0,0.13734400272369385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,float16,0,0.5112853447596232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,fp8,0,0.1362506647904714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,2,1,64,128,1,fp8,fp8,0,0.18623467286427817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,fp8,0,0.5144160191218058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,float16,0,0.08371733625729878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,2,1,64,0,1,fp8,fp8,0,0.4742186864217122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,float16,0,0.2723520000775655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,fp8,0,0.08585066596666972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,2,64,128,1,fp8,fp8,0,0.11681600411732991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,fp8,0,0.2757813334465027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,2,64,0,1,fp8,fp8,0,0.25727999210357666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,float16,0,0.08009600142637889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,fp8,0,0.2717653314272563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,float16,0,0.27081600824991864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,fp8,0,0.07960000137488048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,1,64,128,1,fp8,fp8,0,0.1050986647605896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,1,64,0,1,fp8,fp8,0,0.2518240014712016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,float16,0,0.05358933409055074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,float16,0,0.15396799643834433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,fp8,0,0.05454400181770325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,2,64,128,1,fp8,fp8,0,0.06734933455785115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,fp8,0,0.15467733144760132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,2,64,0,1,fp8,fp8,0,0.14498666922251383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,float16,0,0.05288533369700114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,float16,0,0.15350932876269022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,fp8,0,0.05342400074005127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,1,64,128,1,fp8,fp8,0,0.0656160016854604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,fp8,0,0.15375999609629312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,1,64,0,1,fp8,fp8,0,0.14333867033322653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,float16,0,0.04310933252175649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,fp8,0,0.11734400192896526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,float16,0,0.11777066191037495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,fp8,0,0.04463466505209605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,2,64,128,1,fp8,fp8,0,0.04943466683228811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,2,64,0,1,fp8,fp8,0,0.11266666650772095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,float16,0,0.04781866570313772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,float16,0,0.11667199929555257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,fp8,0,0.043322667479515076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,1,64,128,1,fp8,fp8,0,0.049413333336512245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,fp8,0,0.11701333522796631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,1,64,0,1,fp8,fp8,0,0.11195199688275655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,float16,0,0.20694400866826376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,fp8,0,0.20571200052897134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,float16,0,0.713701327641805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,2,1,64,128,1,fp8,fp8,0,0.27668267488479614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,fp8,0,0.7162240346272787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,float16,0,0.12019200126330058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,2,1,64,0,1,fp8,fp8,0,0.6256693204243978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,float16,0,0.3538293441136678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,fp8,0,0.12070932984352112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,2,64,128,1,fp8,fp8,0,0.16564800341924033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,fp8,0,0.35468800862630206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,float16,0,0.11355200409889221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,2,64,0,1,fp8,fp8,0,0.3330613374710083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,float16,0,0.34413333733876544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,fp8,0,0.11447999874750774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,float16,0,0.07131200035413106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,1,64,128,1,fp8,fp8,0,0.15575466553370157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,fp8,0,0.3466026782989502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,1,64,0,1,fp8,fp8,0,0.3256053328514099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,float16,0,0.18524267276128134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,fp8,0,0.07222933570543925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,2,64,128,1,fp8,fp8,0,0.10036266843477885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,fp8,0,0.18565332889556885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,2,64,0,1,fp8,fp8,0,0.1776533325513204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,float16,0,0.06752533217271169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,float16,0,0.18340800205866495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,fp8,0,0.0674773355325063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,1,64,128,1,fp8,fp8,0,0.08680533369382222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,fp8,0,0.18186666568120322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,1,64,0,1,fp8,fp8,0,0.17271467049916586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,2,64,0,1,fp8,fp8,0,0.0974720021088918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,float16,0,0.046800002455711365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,float16,0,0.04570133487383524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,float16,0,0.10068266590436299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,fp8,0,0.04695466657479604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,2,64,128,1,fp8,fp8,0,0.059104000528653465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,fp8,0,0.10244266192118327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,fp8,0,0.046570668617884316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,float16,0,0.10168533523877461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,fp8,0,0.10227200388908386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,1,64,128,1,fp8,fp8,0,0.0572213331858317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,1,64,0,1,fp8,fp8,0,0.0960693359375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,float16,0,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,float16,0,0.09680533409118652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,fp8,0,0.03583999971548716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,2,64,128,1,fp8,fp8,0,0.04257600009441376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,fp8,0,0.09734933574994405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,2,64,0,1,fp8,fp8,0,0.0937653382619222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,float16,0,0.03557866563399633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,float16,0,0.09659733374913533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,fp8,0,0.03608000030120214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,1,64,128,1,fp8,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,fp8,0,0.15714133779207864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,fp8,0,0.09751466910044353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,1,64,0,1,fp8,fp8,0,0.09303999940554301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,float16,0,0.15901333093643188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,float16,0,0.4063413143157959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,2,1,64,128,1,fp8,fp8,0,0.21454399824142456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,fp8,0,0.4081333478291829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,float16,0,0.094842662413915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,2,1,64,0,1,fp8,fp8,0,0.38495465119679767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,float16,0,0.218341330687205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,fp8,0,0.09689600268999736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,2,64,128,1,fp8,fp8,0,0.1339946687221527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,fp8,0,0.2187946637471517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,2,64,0,1,fp8,fp8,0,0.2100106676419576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,float16,0,0.09085866808891296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,float16,0,0.21526400248209634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,fp8,0,0.09020800391832988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,1,64,128,1,fp8,fp8,0,0.1250933309396108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,fp8,0,0.21503466367721558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,1,64,0,1,fp8,fp8,0,0.20269866784413657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,float16,0,0.05599466462930044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,float16,0,0.12125333150227864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,fp8,0,0.05624533196290334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,2,64,128,1,fp8,fp8,0,0.07889600098133087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,fp8,0,0.12379733721415202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,2,64,0,1,fp8,fp8,0,0.11801066994667053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,float16,0,0.05364799996217092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,float16,0,0.12226133545239766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,fp8,0,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,1,64,128,1,fp8,fp8,0,0.07338133454322815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,fp8,0,0.12114133437474568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,1,64,0,1,fp8,fp8,0,0.11611732840538025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,float16,0,0.03843733419974645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,float16,0,0.07793599863847096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,fp8,0,0.04141866664091746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,2,64,128,1,fp8,fp8,0,0.04516266783078512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,fp8,0,0.07792533437410991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,2,64,0,1,fp8,fp8,0,0.07563200096289317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,float16,0,0.04154133299986521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,float16,0,0.07740266621112823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,fp8,0,0.042250668009122215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,1,64,128,1,fp8,fp8,0,0.04491733511288961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,fp8,0,0.07761066655317943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,1,64,0,1,fp8,fp8,0,0.07559466858704884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,float16,0,0.03207999964555105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,float16,0,0.07649600009123485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,fp8,0,0.03172266731659571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,2,64,128,1,fp8,fp8,0,0.039061332742373146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,fp8,0,0.0317493329445521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,fp8,0,0.0772213339805603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,2,64,0,1,fp8,fp8,0,0.07373866438865662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,float16,0,0.03169066707293192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,float16,0,0.07700799902280171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,1,64,128,1,fp8,fp8,0,0.03836799909671148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,fp8,0,0.07666133344173431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,fp8,0,0.20553600788116455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,float16,0,0.42902934551239014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,1,64,0,1,fp8,fp8,0,0.0751146674156189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,float16,0,0.20690667629241943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,2,1,64,128,1,fp8,fp8,0,0.2753066619237264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,fp8,0,0.4302613337834676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,float16,0,0.11930132905642192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,float16,0,0.21744000911712646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,2,1,64,0,1,fp8,fp8,0,0.38356268405914307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,fp8,0,0.12140799562136333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,2,64,128,1,fp8,fp8,0,0.16337600350379944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,fp8,0,0.22188266118367514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,2,64,0,1,fp8,fp8,0,0.210207998752594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,float16,0,0.11360533038775127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,float16,0,0.2109546661376953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,fp8,0,0.11225600043932597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,1,64,128,1,fp8,fp8,0,0.1553600033124288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,fp8,0,0.21194666624069214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,1,64,0,1,fp8,fp8,0,0.20282665888468424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,float16,0,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,float16,0,0.11733866731325786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,fp8,0,0.07125333448251088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,2,64,128,1,fp8,fp8,0,0.09841600060462952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,fp8,0,0.11901332934697469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,2,64,0,1,fp8,fp8,0,0.11414933204650879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,float16,0,0.06563200056552887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,float16,0,0.1144586702187856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,fp8,0,0.06704000135262807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,1,64,128,1,fp8,fp8,0,0.08538666367530823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,fp8,0,0.114847997824351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,1,64,0,1,fp8,fp8,0,0.10842667023340861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,float16,0,0.04388799766699473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,float16,0,0.06638933221499126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,fp8,0,0.04484266539414724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,2,64,128,1,fp8,fp8,0,0.05683733522891998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,fp8,0,0.06772266825040181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,2,64,0,1,fp8,fp8,0,0.06442666550477345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,float16,0,0.043231998880704246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,float16,0,0.06520000100135803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,fp8,0,0.04354666670163473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,fp8,0,0.03053866575161616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,1,64,128,1,fp8,fp8,0,0.054933334390322365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,fp8,0,0.05780800183614095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,fp8,0,0.06577600042025249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,1,64,0,1,fp8,fp8,0,0.06382933259010315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,float16,0,0.03062933435042699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,fp8,0,0.029919999341169994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,float16,0,0.0572266678015391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,2,64,128,1,fp8,fp8,0,0.03686933219432831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,2,64,0,1,fp8,fp8,0,0.057061334451039634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,float16,0,0.03050133337577184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,float16,0,0.05749333401521047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,1,64,128,1,fp8,fp8,0,0.03711466739575068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,fp8,0,0.057520002126693726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,1,64,0,1,fp8,fp8,0,0.05695466697216034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,float16,0,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,float16,0,0.05633600056171417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,fp8,0,0.02864533414443334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,2,64,128,1,fp8,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,fp8,0,0.05685866872469584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,2,64,0,1,fp8,fp8,0,0.055733333031336464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,float16,0,0.028794666131337483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,float16,0,0.05622399846712748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,fp8,0,0.16525866587956747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,1,64,128,1,fp8,fp8,0,0.0351946676770846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,fp8,0,0.056426664193471275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,1,64,0,1,fp8,fp8,0,0.055402666330337524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,float16,0,0.1689866582552592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,float16,0,0.2704000075658162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,2,1,64,128,1,fp8,fp8,0,0.2172266642252604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,fp8,0,0.2607626716295878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,float16,0,0.09642133116722107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,2,1,64,0,1,fp8,fp8,0,0.2540160020192464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,float16,0,0.14296000202496847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,fp8,0,0.09869866569836934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,2,64,128,1,fp8,fp8,0,0.13318933049837747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,fp8,0,0.14356799920399985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,2,64,0,1,fp8,fp8,0,0.14294933279355368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,float16,0,0.09093333284060161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,float16,0,0.1400266687075297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,fp8,0,0.09019200007120769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,1,64,128,1,fp8,fp8,0,0.1246453324953715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,fp8,0,0.14058132966359457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,1,64,0,1,fp8,fp8,0,0.1341546674569448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,float16,0,0.05575466652711233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,float16,0,0.07906133433183034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,fp8,0,0.0562666654586792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,2,64,128,1,fp8,fp8,0,0.07580266892910004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,fp8,0,0.08205866813659668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,2,64,0,1,fp8,fp8,0,0.07918400069077809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,float16,0,0.05259733398755392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,float16,0,0.07855999966462453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,fp8,0,0.052709331115086876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,1,64,128,1,fp8,fp8,0,0.07245866457621257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,fp8,0,0.0795306662718455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,1,64,0,1,fp8,fp8,0,0.07713066538174947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,float16,0,0.040549332896868386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,float16,0,0.050714666644732155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,fp8,0,0.037952000896135964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,float16,0,0.05090666810671488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,2,64,128,1,fp8,fp8,0,0.04435733457406362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,fp8,0,0.05216533442338308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,2,64,0,1,fp8,fp8,0,0.05132266879081726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,float16,0,0.039808000127474465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,1,64,128,1,fp8,fp8,0,0.04387733340263367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,fp8,0,0.050613333781560264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,1,64,0,1,fp8,fp8,0,0.050250664353370667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,float16,0,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,float16,0,0.048357332746187844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,2,64,128,1,fp8,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,fp8,0,0.047770669062932335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,2,64,0,1,fp8,fp8,0,0.04667733112970988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,float16,0,0.028853334486484528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,float16,0,0.047930667797724404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,fp8,0,0.028560000161329906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,1,64,128,1,fp8,fp8,0,0.03583466758330663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,fp8,0,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,1,64,0,1,fp8,fp8,0,0.047050664822260536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,float16,0,0.027642667293548584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,float16,0,0.04715733230113983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,2,64,128,1,fp8,fp8,0,0.03482133398453394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,fp8,0,0.04696533580621084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,2,64,0,1,fp8,fp8,0,0.045781334241231285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,float16,0,0.02777066578467687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,float16,0,0.047007997830708824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,fp8,0,0.028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,float16,0,0.28379199902216595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,1,64,128,1,fp8,fp8,0,0.03453866640726725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,fp8,0,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,1,64,0,1,fp8,fp8,0,0.04604266583919525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,float16,0,0.1986400087674459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,fp8,0,0.19726399580637613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,2,1,64,128,1,fp8,fp8,0,0.26501866181691486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,fp8,0,0.2820319930712382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,2,1,64,0,1,fp8,fp8,0,0.262170672416687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,float16,0,0.11570133765538533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,float16,0,0.14666133125623068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,fp8,0,0.11698666214942932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,2,64,128,1,fp8,fp8,0,0.15901866555213928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,fp8,0,0.15212266643842062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,2,64,0,1,fp8,fp8,0,0.14776532848676047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,float16,0,0.10995733737945557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,float16,0,0.14392000436782837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,fp8,0,0.11072533329327901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,1,64,128,1,fp8,fp8,0,0.1544319987297058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,fp8,0,0.14332266648610434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,1,64,0,1,fp8,fp8,0,0.14056533575057983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,float16,0,0.06970133384068807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,float16,0,0.08442667126655579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,fp8,0,0.07126399874687195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,2,64,128,1,fp8,fp8,0,0.09807999928792317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,fp8,0,0.08381332953770955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,2,64,0,1,fp8,fp8,0,0.08492799599965413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,float16,0,0.06545066833496094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,float16,0,0.08078933258851369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,fp8,0,0.06620266536871593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,1,64,128,1,fp8,fp8,0,0.08550933003425598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,2,64,128,1,fp8,fp8,0,0.056362668673197426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,fp8,0,0.08142399787902832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,1,64,0,1,fp8,fp8,0,0.08028266827265422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,float16,0,0.04460266729195913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,float16,0,0.050613333781560264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,fp8,0,0.04456000030040741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,fp8,0,0.05130666494369507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,2,64,0,1,fp8,fp8,0,0.04941866795221964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,float16,0,0.043141335248947144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,float16,0,0.049829334020614624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,fp8,0,0.0440586656332016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,1,64,128,1,fp8,fp8,0,0.05538133283456167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,fp8,0,0.050330668687820435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,1,64,0,1,fp8,fp8,0,0.048112000028292336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,float16,0,0.029792000850041706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,float16,0,0.03951466580231985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,fp8,0,0.03018666555484136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,2,64,128,1,fp8,fp8,0,0.037205333511034645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,2,64,0,1,fp8,fp8,0,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,float16,0,0.029765332738558452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,float16,0,0.0396373321612676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,fp8,0,0.028922667105992634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,1,64,128,1,fp8,fp8,0,0.03580799947182337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,fp8,0,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,1,64,0,1,fp8,fp8,0,0.038719999293486275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,float16,0,0.026842666169007618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,float16,0,0.03801066676775614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,2,64,128,1,fp8,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,fp8,0,0.038218667109807335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,2,64,0,1,fp8,fp8,0,0.03711466739575068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,float16,0,0.03751999884843826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,1,64,128,1,fp8,fp8,0,0.033946665624777474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,fp8,0,0.03805333375930786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,1,64,0,1,fp8,fp8,0,0.03614933292071024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,float16,0,0.026538667579491932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,float16,0,0.037274666130542755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,fp8,0,0.02625600000222524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,2,64,128,1,fp8,fp8,0,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,fp8,0,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,2,64,0,1,fp8,fp8,0,0.03634133438269297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,float16,0,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,float16,0,0.03716800113519033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,fp8,0,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,1,64,128,1,fp8,fp8,0,0.032511999209721885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,fp8,0,0.03794133414824804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,1,64,0,1,fp8,fp8,0,0.03604800005753835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,2,1,64,128,1,fp8,fp8,0,0.21596799294153848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,float16,0,0.16421866416931152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,float16,0,0.18933866421381632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,fp8,0,0.16215466459592184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,fp8,0,0.18731733163197836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,2,1,64,0,1,fp8,fp8,0,0.1851360003153483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,float16,0,0.09593600034713745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,float16,0,0.104912002881368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,fp8,0,0.09733333190282185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,2,64,128,1,fp8,fp8,0,0.12994133432706198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,fp8,0,0.10603732864061992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,2,64,0,1,fp8,fp8,0,0.10790399710337321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,float16,0,0.09319466352462769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,float16,0,0.10345066587130229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,fp8,0,0.09221866726875305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,1,64,128,1,fp8,fp8,0,0.12214932839075725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,fp8,0,0.10315733154614766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,1,64,0,1,fp8,fp8,0,0.10151466727256775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,float16,0,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,float16,0,0.06100266675154368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,fp8,0,0.055120001236597695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,fp8,0,0.053930665055910744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,2,64,128,1,fp8,fp8,0,0.07690133154392242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,fp8,0,0.061424002051353455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,2,64,0,1,fp8,fp8,0,0.061333333452542625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,float16,0,0.053904001911481224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,float16,0,0.06044800082842509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,fp8,0,0.040607998768488564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,1,64,128,1,fp8,fp8,0,0.07026666899522145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,fp8,0,0.06089599927266439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,1,64,0,1,fp8,fp8,0,0.058778668443361916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,float16,0,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,float16,0,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,float16,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,2,64,128,1,fp8,fp8,0,0.04498666524887085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,fp8,0,0.04065600037574768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,2,64,0,1,fp8,fp8,0,0.03890133400758108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,float16,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,fp8,0,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,1,64,128,1,fp8,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,fp8,0,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,1,64,0,1,fp8,fp8,0,0.03779733429352442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,float16,0,0.028437333802382152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,float16,0,0.034629332522551216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,fp8,0,0.02850666642189026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,2,64,128,1,fp8,fp8,0,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,fp8,0,0.03513066718975703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,2,64,0,1,fp8,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,float16,0,0.028357334434986115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,float16,0,0.03465066601832708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,fp8,0,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,1,64,128,1,fp8,fp8,0,0.03497066597143809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,fp8,0,0.034634667138258614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,1,64,0,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,float16,0,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,float16,0,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,fp8,0,0.026554666459560394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,2,64,128,1,fp8,fp8,0,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,fp8,0,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,2,64,0,1,fp8,fp8,0,0.03201066702604294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,float16,0,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,float16,0,0.03274133304754893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,fp8,0,0.026346666117509205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,1,64,128,1,fp8,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,fp8,0,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,2,64,128,1,fp8,fp8,0,0.03253333270549774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,1,64,0,1,fp8,fp8,0,0.0316746657093366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,2,64,0,1,fp8,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,float16,0,0.02611200014750163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,float16,0,0.03170666595300039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,fp8,0,0.03285333265860876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,float16,0,0.03234666585922241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,fp8,0,0.02611733227968216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,1,64,128,1,fp8,fp8,0,0.03278933217128118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,fp8,0,0.031930667658646904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,1,64,0,1,fp8,fp8,0,0.031023999055226643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,float16,0,0.18039999405543009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,float16,0,0.2016213337580363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,fp8,0,0.18056533734003702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,float16,0,0.10761066277821858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,2,1,64,128,1,fp8,fp8,0,0.2401919960975647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,fp8,0,0.20082666476567587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,2,64,128,1,fp8,fp8,0,0.14283200105031332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,fp8,0,0.11153067151705424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,2,1,64,0,1,fp8,fp8,0,0.18131200472513834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,float16,0,0.11411733428637187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,fp8,0,0.10847466190656026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,2,64,0,1,fp8,fp8,0,0.10655466715494792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,float16,0,0.10289600491523743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,float16,0,0.10854400197664897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,fp8,0,0.102783997853597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,1,64,128,1,fp8,fp8,0,0.137445330619812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,fp8,0,0.10838933785756429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,1,64,0,1,fp8,fp8,0,0.10269866387049358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,float16,0,0.0634986658891042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,float16,0,0.06491200129191081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,fp8,0,0.06268266836802165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,2,64,128,1,fp8,fp8,0,0.08531733353932698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,fp8,0,0.06443733473618825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,2,64,0,1,fp8,fp8,0,0.06146133442719778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,float16,0,0.06054399907588959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,float16,0,0.06222933530807495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,fp8,0,0.0602453351020813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,1,64,128,1,fp8,fp8,0,0.08107199768225352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,fp8,0,0.06241600215435028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,1,64,0,1,fp8,fp8,0,0.05830933153629303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,float16,0,0.04207466542720795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,2,64,0,1,fp8,fp8,0,0.03763733307520548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,float16,0,0.04055999964475632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,2,64,128,1,fp8,fp8,0,0.05253866811593374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,fp8,0,0.04055999964475632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,float16,0,0.04068800061941147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,float16,0,0.03921066721280416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,1,64,128,1,fp8,fp8,0,0.052522664268811546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,1,64,0,1,fp8,fp8,0,0.0366239994764328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,float16,0,0.02834133307139079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,float16,0,0.02701333413521449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,float16,0,0.030410667260487873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,fp8,0,0.02845333268245061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,2,64,128,1,fp8,fp8,0,0.034847999612490334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,fp8,0,0.03090133269627889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,2,64,0,1,fp8,fp8,0,0.029722665747006733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,float16,0,0.029669334491093952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,fp8,0,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,1,64,128,1,fp8,fp8,0,0.03398933261632919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,fp8,0,0.030154667794704437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,1,64,0,1,fp8,fp8,0,0.028815999627113342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,float16,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,float16,0,0.028181334336598713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,fp8,0,0.026144000391165417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,2,64,128,1,fp8,fp8,0,0.03266133368015289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,fp8,0,0.028394666810830433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,fp8,0,0.028704000016053517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,2,64,0,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,float16,0,0.025600001215934753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,float16,0,0.02773866554101308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,1,64,128,1,fp8,fp8,0,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,1,64,0,1,fp8,fp8,0,0.026928000152111053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,float16,0,0.024832000335057575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,float16,0,0.024490666886170704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,float16,0,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,2,64,128,1,fp8,fp8,0,0.031712000568707786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,fp8,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,2,64,0,1,fp8,fp8,0,0.02624000112215678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,1,64,128,1,fp8,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,fp8,0,0.02683199942111969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,1,64,0,1,fp8,fp8,0,0.026074667771657307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,float16,0,0.024277334411938984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,float16,0,0.026191999514897663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,fp8,0,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,2,64,128,1,fp8,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,2,64,0,1,fp8,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,float16,0,0.023936000963052113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,float16,0,0.025946666797002155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,fp8,0,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,1,64,128,1,fp8,fp8,0,0.030608000854651134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,fp8,0,0.02679466704527537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,1,64,0,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,2,1,64,128,1,float16,float16,0,0.17197332779566446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,2,1,64,0,1,float16,float16,0,0.16851200660069784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,2,1,64,128,1,float16,fp8,0,0.1734559933344523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,2,1,64,128,1,fp8,fp8,0,0.2266826629638672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,2,1,64,0,1,float16,fp8,0,0.16922666629155478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,2,1,64,0,1,fp8,fp8,0,0.15174399813016257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,2,64,128,1,float16,float16,0,0.10445333520571391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,2,64,0,1,float16,float16,0,0.09773332873980205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,2,64,128,1,float16,fp8,0,0.10318932930628459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,2,64,128,1,fp8,fp8,0,0.13391466935475668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,2,64,0,1,float16,fp8,0,0.0971999963124593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,2,64,0,1,fp8,fp8,0,0.09082667032877605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,1,64,128,1,float16,float16,0,0.0977226694424947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,1,64,0,1,float16,float16,0,0.09252799550692241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,1,64,128,1,float16,fp8,0,0.09806933005650838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,1,64,128,1,fp8,fp8,0,0.13331733147303262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,1,64,0,1,float16,fp8,0,0.09307199716567993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,1,64,0,1,fp8,fp8,0,0.08667199810345967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,2,64,128,1,float16,float16,0,0.06124266485373179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,2,64,0,1,float16,float16,0,0.056757330894470215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,2,64,128,1,float16,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,2,64,128,1,fp8,fp8,0,0.08374933401743571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,2,64,0,1,float16,fp8,0,0.0565280020236969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,2,64,0,1,fp8,fp8,0,0.05349866549173991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,1,64,128,1,float16,float16,0,0.05826666454474131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,1,64,0,1,float16,float16,0,0.054197331269582115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,1,64,128,1,float16,fp8,0,0.0574186642964681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,1,64,128,1,fp8,fp8,0,0.0748586654663086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,1,64,0,1,float16,fp8,0,0.054197331269582115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,1,64,0,1,fp8,fp8,0,0.04930133124192556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,2,64,128,1,float16,float16,0,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,2,64,0,1,float16,float16,0,0.036346666514873505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,2,64,128,1,float16,fp8,0,0.04120533416668574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,1,64,0,1,float16,float16,0,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,2,64,128,1,fp8,fp8,0,0.05132266879081726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,2,64,0,1,float16,fp8,0,0.034976000587145485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,2,64,0,1,fp8,fp8,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,1,64,128,1,float16,float16,0,0.04041599979003271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,1,64,128,1,float16,fp8,0,0.041034666200478874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,1,64,128,1,fp8,fp8,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,1,64,0,1,float16,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,1,64,0,1,fp8,fp8,0,0.031914666295051575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,2,64,128,1,float16,float16,0,0.027994667490323383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,2,64,0,1,float16,float16,0,0.026501332720120747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,2,64,128,1,float16,fp8,0,0.028666667640209198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,2,64,128,1,fp8,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,2,64,0,1,float16,fp8,0,0.026933332284291584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,1,64,128,1,float16,float16,0,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,2,64,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,1,64,0,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,1,64,128,1,float16,fp8,0,0.027957332630952198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,1,64,128,1,fp8,fp8,0,0.034501334031422935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,1,64,0,1,float16,fp8,0,0.0262773334980011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,1,64,0,1,fp8,fp8,0,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,2,64,128,1,float16,float16,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,2,64,0,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,2,64,128,1,float16,fp8,0,0.026021334032217663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,1,64,128,1,float16,fp8,0,0.02569066733121872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,2,64,128,1,fp8,fp8,0,0.03198933353026708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,2,64,0,1,float16,fp8,0,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,2,64,0,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,1,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,2,64,0,1,float16,float16,0,0.02254933367172877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,1,64,0,1,float16,float16,0,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,1,64,128,1,fp8,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,1,64,0,1,float16,fp8,0,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,1,64,0,1,fp8,fp8,0,0.02256533255179723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,2,64,128,1,float16,float16,0,0.02455466737349828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,2,64,128,1,float16,fp8,0,0.024821333587169647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,2,64,128,1,fp8,fp8,0,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,2,64,0,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,2,64,0,1,fp8,fp8,0,0.021962667504946392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,1,64,128,1,float16,float16,0,0.024218666056791942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,1,64,0,1,float16,float16,0,0.022554665803909302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,2,64,0,1,float16,float16,0,0.022128000855445862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,1,64,128,1,float16,fp8,0,0.024671999116738636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,1,64,128,1,fp8,fp8,0,0.031125334401925404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,1,64,0,1,float16,fp8,0,0.022800001005331676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,1,64,0,1,fp8,fp8,0,0.021850667893886566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,2,64,128,1,float16,float16,0,0.023999998966852825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,2,64,128,1,float16,fp8,0,0.024192000428835552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,2,64,128,1,fp8,fp8,0,0.03070933371782303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,2,64,0,1,float16,fp8,0,0.02237333357334137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,2,64,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,1,64,128,1,float16,float16,0,0.023525332411130268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,1,64,0,1,float16,float16,0,0.021722666919231415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,1,64,128,1,float16,fp8,0,0.02426133304834366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,1,64,128,1,fp8,fp8,0,0.03073066721359889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,1,64,0,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,1,64,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,2,64,128,1,float16,float16,0,0.023946667710940044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,2,64,0,1,fp8,fp8,0,0.02067733307679494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,2,64,0,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,2,64,128,1,float16,fp8,0,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,2,64,128,1,fp8,fp8,0,0.03048533449570338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,2,64,0,1,float16,fp8,0,0.022064000368118286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,1,64,0,1,float16,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,1,64,128,1,float16,float16,0,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,1,64,0,1,float16,float16,0,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,1,64,128,1,float16,fp8,0,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,1,64,128,1,fp8,fp8,0,0.030447999636332195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,2,1,64,128,1,float16,fp8,0,0.07860266665617625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,1,64,0,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,2,1,64,128,1,float16,float16,0,0.07979199786980946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,2,1,64,0,1,float16,float16,0,0.07871466875076294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,2,1,64,128,1,fp8,fp8,0,0.10482666889826457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,2,1,64,0,1,float16,fp8,0,0.07813866436481476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,2,64,128,1,float16,float16,0,0.047466665506362915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,2,1,64,0,1,fp8,fp8,0,0.07790933549404144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,2,64,0,1,float16,float16,0,0.04725333551565806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,2,64,128,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,2,64,128,1,fp8,fp8,0,0.06268266836802165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,2,64,0,1,float16,fp8,0,0.04742933313051859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,2,64,0,1,fp8,fp8,0,0.04827199876308441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,1,64,128,1,float16,float16,0,0.04422399898370107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,1,64,0,1,float16,fp8,0,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,1,64,0,1,float16,float16,0,0.04452266792456309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,1,64,128,1,float16,fp8,0,0.04433600107828776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,1,64,128,1,fp8,fp8,0,0.057914664347966514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,1,64,0,1,fp8,fp8,0,0.0452159990866979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,2,64,128,1,float16,float16,0,0.03409600009520849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,2,64,0,1,float16,float16,0,0.03414933383464813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,2,64,128,1,float16,fp8,0,0.034416000048319496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,2,64,128,1,fp8,fp8,0,0.037445334096749626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,2,64,0,1,float16,fp8,0,0.03385599950949351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,2,64,0,1,fp8,fp8,0,0.03050133337577184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,1,64,128,1,float16,float16,0,0.03321066747109095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,1,64,0,1,float16,float16,0,0.0323786661028862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,1,64,128,1,float16,fp8,0,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,1,64,128,1,fp8,fp8,0,0.03643200049797694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,1,64,0,1,float16,fp8,0,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,1,64,0,1,fp8,fp8,0,0.029813334345817566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,2,64,128,1,float16,float16,0,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,2,64,0,1,float16,float16,0,0.022266666094462078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,2,64,128,1,float16,fp8,0,0.022645334402720135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,2,64,128,1,fp8,fp8,0,0.02645866572856903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,2,64,0,1,float16,fp8,0,0.021957332889238994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,2,64,0,1,fp8,fp8,0,0.022837333381175995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,1,64,128,1,float16,float16,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,1,64,0,1,float16,float16,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,2,64,128,1,float16,float16,0,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,1,64,128,1,float16,fp8,0,0.022181332111358643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,2,64,128,1,fp8,fp8,0,0.024175999065240223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,1,64,128,1,fp8,fp8,0,0.02603200078010559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,1,64,0,1,float16,fp8,0,0.02176533391078313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,1,64,0,1,fp8,fp8,0,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,2,64,0,1,float16,float16,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,2,64,128,1,float16,fp8,0,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,2,64,0,1,float16,fp8,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,1,64,0,1,float16,fp8,0,0.01812800019979477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,2,64,0,1,fp8,fp8,0,0.020703999946514767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,1,64,128,1,float16,float16,0,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,1,64,0,1,float16,float16,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,1,64,128,1,float16,fp8,0,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,1,64,128,1,fp8,fp8,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,1,64,0,1,fp8,fp8,0,0.02019199977318446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,2,64,128,1,float16,float16,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,2,64,0,1,float16,float16,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,2,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,2,64,128,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,2,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,2,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,1,64,128,1,float16,float16,0,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,1,64,0,1,float16,float16,0,0.01647466669480006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,1,64,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,1,64,128,1,fp8,fp8,0,0.022687998910744984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,1,64,0,1,float16,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,1,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,2,64,128,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,2,64,0,1,float16,float16,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,2,64,128,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,2,64,128,1,fp8,fp8,0,0.02276800076166789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,2,64,0,1,float16,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,2,64,0,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,1,64,128,1,float16,float16,0,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,1,64,0,1,float16,float16,0,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,1,64,128,1,float16,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,1,64,128,1,fp8,fp8,0,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,1,64,0,1,float16,fp8,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,1,64,0,1,fp8,fp8,0,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,2,64,128,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,2,64,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,2,64,128,1,float16,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,2,64,128,1,fp8,fp8,0,0.022309333086013794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,2,64,0,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,2,64,0,1,fp8,fp8,0,0.018640000373125076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,1,64,128,1,float16,float16,0,0.01595199977358182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,1,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,1,64,128,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,1,64,128,1,fp8,fp8,0,0.02199466774861018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,1,64,0,1,float16,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,1,64,0,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,2,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,2,64,0,1,float16,float16,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,2,64,128,1,float16,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,2,64,128,1,fp8,fp8,0,0.022469334304332733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,2,64,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,2,64,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,1,64,128,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,1,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,1,64,128,1,float16,fp8,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,1,64,128,1,fp8,fp8,0,0.02203733225663503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,1,64,0,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,1,64,0,1,fp8,fp8,0,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,2,1,64,128,1,float16,float16,0,0.03618133316437403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,2,1,64,0,1,float16,float16,0,0.03604800005753835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,2,1,64,128,1,float16,fp8,0,0.03533333291610082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,2,1,64,128,1,fp8,fp8,0,0.04465599854787191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,2,1,64,0,1,float16,fp8,0,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,2,1,64,0,1,fp8,fp8,0,0.04562133550643921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,2,64,128,1,float16,float16,0,0.026074667771657307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,2,64,0,1,float16,float16,0,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,2,64,128,1,float16,fp8,0,0.026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,2,64,128,1,fp8,fp8,0,0.0305226668715477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,2,64,0,1,float16,fp8,0,0.026320000489552815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,2,64,0,1,fp8,fp8,0,0.030058667063713074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,1,64,128,1,float16,float16,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,1,64,0,1,float16,float16,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,1,64,128,1,float16,fp8,0,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,1,64,128,1,fp8,fp8,0,0.030181333422660828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,1,64,0,1,float16,fp8,0,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,1,64,0,1,fp8,fp8,0,0.02979733298222224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,2,64,128,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,2,64,0,1,float16,float16,0,0.019823999454577763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,2,64,128,1,float16,fp8,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,2,64,128,1,fp8,fp8,0,0.022474666436513264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,2,64,0,1,float16,fp8,0,0.01971199984351794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,2,64,0,1,fp8,fp8,0,0.0227360005180041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,1,64,128,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,1,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,1,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,1,64,128,1,fp8,fp8,0,0.021882665654023487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,1,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,1,64,0,1,fp8,fp8,0,0.022122666239738464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,2,64,128,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,2,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,2,64,128,1,float16,fp8,0,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,2,64,128,1,fp8,fp8,0,0.019989332805077236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,2,64,0,1,float16,fp8,0,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,2,64,0,1,fp8,fp8,0,0.019973333925008774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,1,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,1,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,1,64,128,1,float16,fp8,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,1,64,128,1,fp8,fp8,0,0.019776000330845516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,1,64,0,1,float16,fp8,0,0.016117333124081295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,1,64,0,1,fp8,fp8,0,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,2,64,128,1,float16,float16,0,0.01403733342885971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,2,64,0,1,float16,float16,0,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,2,64,128,1,float16,fp8,0,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,2,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,2,64,0,1,float16,fp8,0,0.014581333845853806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,2,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,1,64,128,1,float16,float16,0,0.014101333916187286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,1,64,0,1,float16,float16,0,0.014101333916187286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,2,64,0,1,float16,float16,0,0.013786666095256805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,1,64,128,1,float16,fp8,0,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,1,64,128,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,1,64,0,1,float16,fp8,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,1,64,0,1,fp8,fp8,0,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,2,64,128,1,float16,float16,0,0.022170667846997578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,2,64,128,1,float16,fp8,0,0.014010666559139887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,2,64,128,1,fp8,fp8,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,2,64,0,1,float16,fp8,0,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,2,64,0,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,1,64,128,1,float16,float16,0,0.013343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,1,64,0,1,float16,float16,0,0.013573333621025085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,1,64,128,1,float16,fp8,0,0.014453332871198654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,1,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,1,64,0,1,float16,fp8,0,0.014074667046467463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,1,64,0,1,fp8,fp8,0,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,2,64,128,1,float16,float16,0,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,2,64,0,1,float16,float16,0,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,2,64,128,1,float16,fp8,0,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,2,64,128,1,fp8,fp8,0,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,2,64,0,1,float16,fp8,0,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,2,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,2,64,128,1,float16,float16,0,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,1,64,128,1,float16,float16,0,0.013584000368913015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,1,64,0,1,float16,float16,0,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,1,64,128,1,float16,fp8,0,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,1,64,128,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,1,64,0,1,float16,fp8,0,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,1,64,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,2,64,0,1,float16,float16,0,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,2,64,128,1,float16,fp8,0,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,2,64,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,2,64,0,1,float16,fp8,0,0.01422400027513504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,2,64,0,1,fp8,fp8,0,0.01801066721479098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,1,64,128,1,float16,float16,0,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,1,64,0,1,float16,float16,0,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,1,64,128,1,float16,fp8,0,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,1,64,128,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,1,64,0,1,float16,fp8,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,1,64,0,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,2,64,0,1,float16,fp8,0,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,2,64,128,1,float16,float16,0,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,2,64,0,1,float16,float16,0,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,2,64,128,1,float16,fp8,0,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,2,64,128,1,fp8,fp8,0,0.017893332988023758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,2,64,0,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,1,64,128,1,float16,float16,0,0.013418667018413544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,1,64,0,1,float16,float16,0,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,1,64,128,1,float16,fp8,0,0.01368533323208491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,1,64,128,1,fp8,fp8,0,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,1,64,0,1,float16,fp8,0,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,1,64,0,1,fp8,fp8,0,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,2,1,64,128,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,2,1,64,0,1,float16,float16,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,2,1,64,128,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,2,1,64,128,1,fp8,fp8,0,0.03514133393764496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,2,1,64,0,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,2,1,64,0,1,fp8,fp8,0,0.034927998979886375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,2,64,128,1,float16,float16,0,0.017946666727463405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,2,64,0,1,float16,float16,0,0.017808000246683758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,2,64,128,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,2,64,128,1,fp8,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,2,64,0,1,float16,fp8,0,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,1,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,1,64,0,1,float16,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,2,64,0,1,fp8,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,1,64,0,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,1,64,128,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,1,64,128,1,fp8,fp8,0,0.024634666740894318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,1,64,0,1,fp8,fp8,0,0.024357333779335022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,2,64,128,1,float16,float16,0,0.013631999492645264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,2,64,0,1,float16,float16,0,0.014261333892742792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,2,64,128,1,float16,fp8,0,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,2,64,128,1,fp8,fp8,0,0.019866666446129482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,2,64,0,1,float16,fp8,0,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,2,64,0,1,fp8,fp8,0,0.019823999454577763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,1,64,128,1,float16,float16,0,0.01393066719174385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,1,64,0,1,float16,float16,0,0.013749333719412485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,1,64,128,1,float16,fp8,0,0.013573333621025085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,1,64,128,1,fp8,fp8,0,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,1,64,0,1,float16,fp8,0,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,1,64,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,2,64,128,1,float16,float16,0,0.012410666793584824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,2,64,0,1,float16,float16,0,0.012426666915416718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,2,64,128,1,float16,fp8,0,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,2,64,128,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,2,64,0,1,float16,fp8,0,0.012346666306257248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,2,64,0,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,1,64,128,1,float16,float16,0,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,1,64,0,1,float16,float16,0,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,1,64,128,1,float16,fp8,0,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,1,64,128,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,1,64,0,1,float16,fp8,0,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,1,64,0,1,fp8,fp8,0,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,2,64,128,1,float16,float16,0,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,2,64,0,1,float16,float16,0,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,2,64,128,1,float16,fp8,0,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,2,64,128,1,fp8,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,2,64,0,1,float16,fp8,0,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,2,64,0,1,fp8,fp8,0,0.018229333062966663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,1,64,128,1,float16,float16,0,0.012229333321253458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,1,64,0,1,float16,float16,0,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,1,64,128,1,float16,fp8,0,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,1,64,128,1,fp8,fp8,0,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,1,64,0,1,float16,fp8,0,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,1,64,0,1,fp8,fp8,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,2,64,128,1,float16,float16,0,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,2,64,0,1,float16,float16,0,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,2,64,128,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,2,64,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,2,64,0,1,float16,fp8,0,0.01201066623131434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,1,64,0,1,float16,fp8,0,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,2,64,0,1,fp8,fp8,0,0.017957333475351334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,1,64,128,1,float16,float16,0,0.012309333930412928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,1,64,0,1,float16,float16,0,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,1,64,128,1,float16,fp8,0,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,1,64,128,1,fp8,fp8,0,0.017786666750907898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,1,64,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,2,64,128,1,float16,float16,0,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,2,64,0,1,float16,float16,0,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,2,64,128,1,float16,fp8,0,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,2,64,128,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,2,64,0,1,float16,fp8,0,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,2,64,0,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,1,64,128,1,float16,float16,0,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,1,64,0,1,float16,float16,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,1,64,128,1,float16,fp8,0,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,1,64,128,1,fp8,fp8,0,0.01770666614174843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,1,64,0,1,float16,fp8,0,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,1,64,0,1,fp8,fp8,0,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,2,64,128,1,float16,float16,0,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,2,64,0,1,float16,float16,0,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,2,64,128,1,float16,fp8,0,0.012005332857370377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,2,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,2,64,0,1,float16,fp8,0,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,2,64,0,1,fp8,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,1,64,128,1,float16,float16,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,1,64,0,1,float16,float16,0,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,1,64,128,1,float16,fp8,0,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,1,64,128,1,fp8,fp8,0,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,1,64,0,1,float16,fp8,0,0.01227733368674914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,1,64,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,2,64,128,1,float16,float16,0,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,2,64,0,1,float16,float16,0,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,2,64,128,1,float16,fp8,0,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,2,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,2,64,0,1,float16,fp8,0,0.01157333329319954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,2,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,1,64,128,1,float16,float16,0,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,1,64,0,1,float16,float16,0,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,1,64,128,1,float16,fp8,0,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,1,64,128,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,1,64,0,1,float16,fp8,0,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,1,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,2,1,64,128,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,2,1,64,0,1,float16,float16,0,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,2,1,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,2,1,64,128,1,fp8,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,2,64,128,1,float16,fp8,0,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,2,1,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,2,64,128,1,float16,float16,0,0.013381333400805792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,2,1,64,0,1,fp8,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,2,64,0,1,float16,float16,0,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,2,64,128,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,2,64,0,1,float16,fp8,0,0.01422400027513504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,1,64,128,1,float16,float16,0,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,2,64,0,1,fp8,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,1,64,0,1,float16,float16,0,0.013973332941532135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,1,64,128,1,float16,fp8,0,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,2,64,128,1,float16,fp8,0,0.012416000167528788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,1,64,128,1,fp8,fp8,0,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,1,64,0,1,float16,fp8,0,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,1,64,0,1,fp8,fp8,0,0.02274133265018463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,2,64,128,1,float16,float16,0,0.012479999413092932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,1,64,0,1,float16,float16,0,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,2,64,0,1,float16,float16,0,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,2,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,2,64,0,1,float16,fp8,0,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,2,64,0,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,1,64,128,1,float16,float16,0,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,1,64,128,1,float16,fp8,0,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,1,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,1,64,0,1,float16,fp8,0,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,1,64,0,1,fp8,fp8,0,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,2,64,128,1,float16,float16,0,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,2,64,0,1,float16,float16,0,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,2,64,128,1,float16,fp8,0,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,2,64,128,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,2,64,0,1,float16,fp8,0,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,2,64,0,1,fp8,fp8,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,1,64,0,1,fp8,fp8,0,0.01777600000301997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,1,64,128,1,float16,float16,0,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,1,64,0,1,float16,float16,0,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,1,64,128,1,float16,fp8,0,0.012362666428089142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,1,64,128,1,fp8,fp8,0,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,1,64,0,1,float16,fp8,0,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,2,64,128,1,float16,float16,0,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,2,64,0,1,float16,float16,0,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,2,64,128,1,float16,fp8,0,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,2,64,128,1,fp8,fp8,0,0.018277333428462345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,2,64,0,1,float16,fp8,0,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,2,64,0,1,fp8,fp8,0,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,1,64,128,1,float16,float16,0,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,1,64,0,1,float16,float16,0,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,1,64,128,1,float16,fp8,0,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,1,64,128,1,fp8,fp8,0,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,1,64,0,1,float16,fp8,0,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,1,64,0,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,2,64,128,1,float16,float16,0,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,2,64,0,1,float16,float16,0,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,2,64,128,1,float16,fp8,0,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,2,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,2,64,0,1,float16,fp8,0,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,2,64,0,1,fp8,fp8,0,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,1,64,128,1,float16,float16,0,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,1,64,0,1,float16,float16,0,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,1,64,128,1,float16,fp8,0,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,1,64,128,1,fp8,fp8,0,0.017935999979575474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,1,64,0,1,float16,fp8,0,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,1,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,2,64,128,1,float16,float16,0,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,2,64,0,1,float16,float16,0,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,2,64,128,1,float16,fp8,0,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,2,64,128,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,2,64,0,1,float16,fp8,0,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,2,64,0,1,fp8,fp8,0,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,1,64,128,1,float16,float16,0,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,1,64,0,1,float16,float16,0,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,1,64,128,1,float16,fp8,0,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,1,64,128,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,1,64,0,1,float16,fp8,0,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,1,64,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,2,64,128,1,float16,float16,0,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,2,64,0,1,float16,float16,0,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,2,64,128,1,float16,fp8,0,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,2,64,128,1,fp8,fp8,0,0.016271999726692837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,2,64,0,1,float16,fp8,0,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,2,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,1,64,128,1,float16,float16,0,0.011306667079528173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,1,64,0,1,float16,float16,0,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,1,64,128,1,float16,fp8,0,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,1,64,128,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,1,64,0,1,float16,fp8,0,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,1,64,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,2,64,128,1,float16,float16,0,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,2,64,0,1,float16,float16,0,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,2,64,128,1,float16,fp8,0,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,2,64,128,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,2,64,0,1,float16,fp8,0,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,2,64,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,1,64,128,1,float16,float16,0,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,1,64,0,1,float16,float16,0,0.011760000139474869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,1,64,128,1,float16,fp8,0,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,1,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,1,64,0,1,float16,fp8,0,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,1,64,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,2,1,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,2,1,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,2,1,64,128,1,float16,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,2,1,64,128,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,2,1,64,0,1,float16,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,2,1,64,0,1,fp8,fp8,0,0.028751999139785767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,2,64,128,1,float16,float16,0,0.012426666915416718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,2,64,0,1,float16,float16,0,0.01249066616098086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,2,64,128,1,float16,fp8,0,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,2,64,128,1,fp8,fp8,0,0.02242133269707362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,2,64,0,1,float16,fp8,0,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,2,64,0,1,fp8,fp8,0,0.02236266682545344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,1,64,128,1,float16,float16,0,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,1,64,0,1,float16,float16,0,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,1,64,128,1,float16,fp8,0,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,1,64,128,1,fp8,fp8,0,0.0220266655087471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,1,64,0,1,float16,fp8,0,0.01358933374285698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,1,64,0,1,fp8,fp8,0,0.02214933435122172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,2,64,128,1,float16,float16,0,0.011930666863918304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,2,64,0,1,float16,float16,0,0.011957333733638128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,2,64,128,1,float16,fp8,0,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,2,64,128,1,fp8,fp8,0,0.01788266624013583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,2,64,0,1,float16,fp8,0,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,2,64,0,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,1,64,128,1,float16,float16,0,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,1,64,0,1,float16,float16,0,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,1,64,128,1,float16,fp8,0,0.012272000312805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,1,64,128,1,fp8,fp8,0,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,1,64,0,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,1,64,0,1,fp8,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,2,64,128,1,float16,float16,0,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,2,64,0,1,float16,float16,0,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,2,64,128,1,float16,fp8,0,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,2,64,128,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,2,64,0,1,float16,fp8,0,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,2,64,0,1,fp8,fp8,0,0.018042666216691334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,1,64,128,1,float16,float16,0,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,1,64,0,1,float16,float16,0,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,1,64,128,1,float16,fp8,0,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,1,64,128,1,fp8,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,1,64,0,1,float16,fp8,0,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,1,64,0,1,fp8,fp8,0,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,2,64,128,1,float16,float16,0,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,2,64,0,1,float16,float16,0,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,1,64,0,1,float16,float16,0,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,2,64,128,1,float16,fp8,0,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,2,64,128,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,2,64,0,1,float16,fp8,0,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,1,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,2,64,0,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,1,64,128,1,float16,float16,0,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,1,64,128,1,float16,fp8,0,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,1,64,128,1,fp8,fp8,0,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,1,64,0,1,float16,fp8,0,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,2,64,128,1,float16,float16,0,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,2,64,0,1,float16,float16,0,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,2,64,128,1,float16,fp8,0,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,2,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,2,64,0,1,float16,fp8,0,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,2,64,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,1,64,128,1,float16,float16,0,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,1,64,0,1,float16,float16,0,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,1,64,128,1,float16,fp8,0,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,1,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,1,64,0,1,float16,fp8,0,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,1,64,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,2,64,128,1,float16,float16,0,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,2,64,0,1,float16,float16,0,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,2,64,128,1,float16,fp8,0,0.011760000139474869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,2,64,128,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,2,64,0,1,float16,fp8,0,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,2,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,1,64,128,1,float16,float16,0,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,1,64,0,1,float16,float16,0,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,1,64,128,1,float16,fp8,0,0.01231466606259346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,1,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,1,64,0,1,float16,fp8,0,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,1,64,0,1,fp8,fp8,0,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,2,64,128,1,float16,float16,0,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,2,64,0,1,float16,float16,0,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,2,64,128,1,float16,fp8,0,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,2,64,128,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,2,64,0,1,float16,fp8,0,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,2,64,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,1,64,128,1,float16,float16,0,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,1,64,0,1,float16,float16,0,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,1,64,128,1,float16,fp8,0,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,1,64,128,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,1,64,0,1,float16,fp8,0,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,1,64,0,1,fp8,fp8,0,0.017781333376963932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,2,64,128,1,float16,float16,0,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,2,64,0,1,float16,float16,0,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,2,64,128,1,float16,fp8,0,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,2,64,128,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,2,64,0,1,float16,fp8,0,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,2,64,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,1,64,128,1,float16,float16,0,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,1,64,0,1,float16,float16,0,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,1,64,128,1,float16,fp8,0,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,1,64,128,1,fp8,fp8,0,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,1,64,0,1,float16,fp8,0,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,1,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,float16,0,0.07234666744867961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,float16,0,0.3238346576690674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,fp8,0,0.07347733279069264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,1,1,64,128,1,fp8,fp8,0,0.10019200046857198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,fp8,0,0.3222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,1,1,64,0,1,fp8,fp8,0,0.3022186756134033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,float16,0,0.047151997685432434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,float16,0,0.17125866810480753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,fp8,0,0.04780800143877665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,1,1,64,128,1,fp8,fp8,0,0.05889600018660227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,fp8,0,0.17072000106175741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,1,1,64,0,1,fp8,fp8,0,0.15970666209856668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,float16,0,0.03619733452796936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,float16,0,0.16640533010164896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,fp8,0,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,1,1,64,128,1,fp8,fp8,0,0.042463997999827065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,1,1,64,0,1,fp8,fp8,0,0.15758933623631796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,fp8,0,0.16673600673675537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,float16,0,0.05840000013510386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,float16,0,0.20713067054748535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,fp8,0,0.05905066430568695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,1,1,64,128,1,fp8,fp8,0,0.08030933141708374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,fp8,0,0.20801599820454916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,1,1,64,0,1,fp8,fp8,0,0.19474667310714722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,float16,0,0.04013866682847341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,float16,0,0.03283733377854029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,float16,0,0.1307253340880076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,fp8,0,0.04324266811211904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,1,1,64,128,1,fp8,fp8,0,0.04557866851488749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,fp8,0,0.13129599889119467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,1,1,64,0,1,fp8,fp8,0,0.12344533205032349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,float16,0,0.13061867157618204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,fp8,0,0.03342399994532267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,1,1,64,128,1,fp8,fp8,0,0.039077334105968475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,fp8,0,0.12867200374603271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,1,1,64,0,1,fp8,fp8,0,0.12397332986195882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,float16,0,0.05023466547330221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,float16,0,0.15121600031852722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,fp8,0,0.052154665191968284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,1,1,64,128,1,fp8,fp8,0,0.06452266871929169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,fp8,0,0.15366933743158975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,1,1,64,0,1,fp8,fp8,0,0.14336533347765604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,float16,0,0.040933333337306976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,float16,0,0.11191466450691223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,1,1,64,128,1,fp8,fp8,0,0.04365866879622141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,fp8,0,0.11217066645622253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,1,1,64,0,1,fp8,fp8,0,0.10713066657384236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,float16,0,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,float16,0,0.1106719970703125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,1,1,64,128,1,fp8,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,fp8,0,0.11002133289972942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,1,1,64,0,1,fp8,fp8,0,0.10654933253924052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,float16,0,0.07170133292675018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,float16,0,0.18786134322484335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,fp8,0,0.07259200016657512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,1,1,64,128,1,fp8,fp8,0,0.09930666287740071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,fp8,0,0.18740799029668173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,1,1,64,128,1,fp8,fp8,0,0.05756799876689911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,1,1,64,0,1,fp8,fp8,0,0.17740267515182495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,float16,0,0.04422933359940847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,float16,0,0.10003200173377991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,fp8,0,0.04539733131726583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,fp8,0,0.1016426682472229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,1,1,64,0,1,fp8,fp8,0,0.09709333380063374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,float16,0,0.03070399910211563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,float16,0,0.09353599945704143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,1,1,64,128,1,fp8,fp8,0,0.03660800059636434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,fp8,0,0.09332799911499023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,1,1,64,0,1,fp8,fp8,0,0.08919466535250346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,float16,0,0.02977066735426585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,float16,0,0.12221866846084595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,float16,0,0.09176533420880635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,fp8,0,0.029765332738558452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,1,1,64,128,1,fp8,fp8,0,0.03629333277543386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,fp8,0,0.09150399764378865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,1,1,64,0,1,fp8,fp8,0,0.08800533413887024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,float16,0,0.055919999877611794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,fp8,0,0.05593599875768026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,1,1,64,128,1,fp8,fp8,0,0.0792906681696574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,fp8,0,0.12463999787966411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,1,1,64,0,1,fp8,fp8,0,0.11941867073376973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,float16,0,0.03774933268626531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,float16,0,0.07689066727956136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,fp8,0,0.038848000268141426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,1,1,64,128,1,fp8,fp8,0,0.045168002446492515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,fp8,0,0.07746666669845581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,1,1,64,0,1,fp8,fp8,0,0.07658133407433827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,float16,0,0.07439466814200084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,fp8,0,0.02902399996916453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,1,1,64,128,1,fp8,fp8,0,0.03585600107908249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,fp8,0,0.07462933162848155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,1,1,64,0,1,fp8,fp8,0,0.07218666871388753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,float16,0,0.028277332584063213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,float16,0,0.07333866755167644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,fp8,0,0.028757333755493164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,float16,0,0.07115733126799266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,1,1,64,128,1,fp8,fp8,0,0.03429333368937174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,fp8,0,0.07373333474000295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,1,1,64,0,1,fp8,fp8,0,0.0718453327814738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,float16,0,0.11893332997957866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,fp8,0,0.07175466914971669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,1,1,64,128,1,fp8,fp8,0,0.09177600344022115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,fp8,0,0.11967466274897258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,1,1,64,0,1,fp8,fp8,0,0.11798933148384094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,float16,0,0.04422399898370107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,float16,0,0.06751999755700429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,fp8,0,0.0450186679760615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,float16,0,0.05704533557097117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,1,1,64,128,1,fp8,fp8,0,0.05715733269850413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,fp8,0,0.0683840016523997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,1,1,64,0,1,fp8,fp8,0,0.06533866624037425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,float16,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,fp8,0,0.029968000948429108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,1,1,64,128,1,fp8,fp8,0,0.03646933287382126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,fp8,0,0.057018667459487915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,1,1,64,0,1,fp8,fp8,0,0.056847999493281044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,float16,0,0.02796799937884013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,float16,0,0.055674667159716286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,1,1,64,128,1,fp8,fp8,0,0.03402133285999298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,fp8,0,0.0552106648683548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,1,1,64,0,1,fp8,fp8,0,0.05442666510740916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,float16,0,0.026709333062171936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,float16,0,0.05554133156935374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,1,1,64,128,1,fp8,fp8,0,0.03321066747109095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,fp8,0,0.05490666627883911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,1,1,64,0,1,fp8,fp8,0,0.05292266607284546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,float16,0,0.055439998706181846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,float16,0,0.08221333225568135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,fp8,0,0.05611200133959452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,1,1,64,128,1,fp8,fp8,0,0.07269333302974701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,fp8,0,0.08125333487987518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,float16,0,0.03758399933576584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,1,1,64,0,1,fp8,fp8,0,0.07962666451931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,float16,0,0.05148266752560934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,fp8,0,0.03921599934498469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,1,1,64,128,1,fp8,fp8,0,0.044400001565615334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,fp8,0,0.052501335740089417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,1,1,64,0,1,fp8,fp8,0,0.0510506679614385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,float16,0,0.02886933336655299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,float16,0,0.0476693312327067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,fp8,0,0.028031999866167705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,1,1,64,128,1,fp8,fp8,0,0.03517866631348928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,1,1,64,128,1,fp8,fp8,0,0.03305066625277201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,fp8,0,0.04731733103593191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,1,1,64,0,1,fp8,fp8,0,0.04900266726811727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,float16,0,0.026543999711672466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,float16,0,0.045738667249679565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,fp8,0,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,fp8,0,0.04543466866016388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,1,1,64,0,1,fp8,fp8,0,0.0452106644709905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,float16,0,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,float16,0,0.045237332582473755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,fp8,0,0.026127999027570088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,1,1,64,128,1,fp8,fp8,0,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,fp8,0,0.04539200166861216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,1,1,64,0,1,fp8,fp8,0,0.04414933423201243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,float16,0,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,float16,0,0.08560533324877422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,fp8,0,0.0709386666615804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,1,1,64,128,1,fp8,fp8,0,0.09764800469080608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,fp8,0,0.08668266733487447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,1,1,64,0,1,fp8,fp8,0,0.08513066172599792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,float16,0,0.0447573314110438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,float16,0,0.05106133222579956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,fp8,0,0.04472533365090688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,1,1,64,128,1,fp8,fp8,0,0.05685333410898844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,fp8,0,0.05141866703828176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,1,1,64,0,1,fp8,fp8,0,0.04981866478919983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,float16,0,0.029653333127498627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,float16,0,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,fp8,0,0.029882666965325672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,1,1,64,128,1,fp8,fp8,0,0.03626666714747747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,1,1,64,0,1,fp8,fp8,0,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,float16,0,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,float16,0,0.037802666425704956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,fp8,0,0.0283146674434344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,1,1,64,128,1,fp8,fp8,0,0.033728001018365227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,1,1,64,0,1,fp8,fp8,0,0.03697066754102707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,float16,0,0.025781333446502686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,float16,0,0.03667200108369192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,fp8,0,0.026288000245889027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,1,1,64,128,1,fp8,fp8,0,0.032298666735490165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,fp8,0,0.03669333209594091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,1,1,64,0,1,fp8,fp8,0,0.03522133330504099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,float16,0,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,float16,0,0.03643733263015747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,1,1,64,128,1,fp8,fp8,0,0.03186133255561193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,1,1,64,0,1,fp8,fp8,0,0.03457066665093104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,float16,0,0.056101332108179726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,1,1,64,0,1,fp8,fp8,0,0.0631573349237442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,float16,0,0.06301333506902058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,fp8,0,0.057461331288019814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,1,1,64,128,1,fp8,fp8,0,0.07323200007279713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,fp8,0,0.06391466657320659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,float16,0,0.03812800099452337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,float16,0,0.0402399996916453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,fp8,0,0.03882133215665817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,1,1,64,128,1,fp8,fp8,0,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,fp8,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,1,1,64,0,1,fp8,fp8,0,0.039247999588648476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,float16,0,0.02842666705449422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,float16,0,0.034815999368826546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,fp8,0,0.028880000114440918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,1,1,64,128,1,fp8,fp8,0,0.03438399980465571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,fp8,0,0.03437866767247518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,1,1,64,0,1,fp8,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,float16,0,0.02661866694688797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,float16,0,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,fp8,0,0.0264533335963885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,1,1,64,128,1,fp8,fp8,0,0.03319466610749563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,fp8,0,0.033146666983763375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,1,1,64,0,1,fp8,fp8,0,0.03214933226505915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,float16,0,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,1,1,64,0,1,fp8,fp8,0,0.030821333328882854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,float16,0,0.03201066702604294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,1,1,64,128,1,fp8,fp8,0,0.03169066707293192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,fp8,0,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,float16,0,0.024613333245118458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,fp8,0,0.024933333198229473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,1,1,64,128,1,fp8,fp8,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,fp8,0,0.031167998909950256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,1,1,64,128,1,fp8,fp8,0,0.0860053300857544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,1,1,64,0,1,fp8,fp8,0,0.030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,float16,0,0.06277333199977875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,float16,0,0.06445866823196411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,fp8,0,0.06274666885534923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,fp8,0,0.06338666876157124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,1,1,64,0,1,fp8,fp8,0,0.06146133442719778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,float16,0,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,float16,0,0.040634666879971824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,fp8,0,0.04205333193143209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,1,1,64,128,1,fp8,fp8,0,0.05226666728655497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,fp8,0,0.04035733391841253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,1,1,64,0,1,fp8,fp8,0,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,float16,0,0.02810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,float16,0,0.03013866643110911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,fp8,0,0.028330666323502857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,1,1,64,128,1,fp8,fp8,0,0.03485333422819773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,fp8,0,0.030623999734719593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,1,1,64,0,1,fp8,fp8,0,0.029701332251230877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,float16,0,0.02492800106604894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,float16,0,0.027893332143624622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,fp8,0,0.02605866640806198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,1,1,64,128,1,fp8,fp8,0,0.03172266731659571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,fp8,0,0.028378665447235107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,1,1,64,0,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,float16,0,0.024442667762438457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,float16,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,fp8,0,0.02458133300145467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,1,1,64,128,1,fp8,fp8,0,0.030810666580994923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,1,1,64,0,1,fp8,fp8,0,0.025722667574882507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,float16,0,0.024010665714740753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,float16,0,0.02613866577545802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,fp8,0,0.024218666056791942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,1,1,64,128,1,fp8,fp8,0,0.03070933371782303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,fp8,0,0.026533332963784535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,1,1,64,0,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,float16,0,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,fp8,0,0.024373332659403484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,1,1,64,128,1,fp8,fp8,0,0.030479999879995983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,fp8,0,0.02659733345111211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,1,1,64,0,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,1,1,64,128,1,float16,float16,0,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,1,1,64,0,1,float16,float16,0,0.056554665168126426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,1,1,64,128,1,float16,fp8,0,0.06057600180308024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,1,1,64,128,1,fp8,fp8,0,0.08210666477680206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,1,1,64,0,1,float16,fp8,0,0.05630399783452352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,1,1,64,0,1,fp8,fp8,0,0.052602668603261314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,1,1,64,128,1,float16,float16,0,0.04121066629886627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,1,1,64,0,1,float16,float16,0,0.03585066646337509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,1,1,64,128,1,float16,fp8,0,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,1,1,64,128,1,fp8,fp8,0,0.05170666674772898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,1,1,64,128,1,float16,fp8,0,0.028698667883872986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,1,1,64,0,1,float16,fp8,0,0.036144000788529716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,1,1,64,0,1,fp8,fp8,0,0.03276800115903219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,1,1,64,128,1,float16,float16,0,0.028149334092934925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,1,1,64,0,1,float16,float16,0,0.0264533335963885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,1,1,64,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,1,1,64,128,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,1,1,64,128,1,fp8,fp8,0,0.033999999364217125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,1,1,64,0,1,float16,fp8,0,0.026816000541051228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,1,1,64,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,1,1,64,128,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,1,1,64,128,1,fp8,fp8,0,0.03236799935499827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,1,1,64,0,1,float16,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,1,1,64,0,1,fp8,fp8,0,0.022554665803909302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,1,1,64,128,1,float16,float16,0,0.0239680012067159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,1,1,64,0,1,float16,float16,0,0.022474666436513264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,1,1,64,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,1,1,64,128,1,fp8,fp8,0,0.030634666482607525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,1,1,64,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,1,1,64,0,1,fp8,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,1,1,64,128,1,float16,float16,0,0.023813332120577495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,1,1,64,0,1,float16,float16,0,0.0220320001244545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,1,1,64,128,1,float16,fp8,0,0.02420266717672348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,1,1,64,128,1,fp8,fp8,0,0.03038399914900462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,1,1,64,0,1,float16,fp8,0,0.021727999051411945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,1,1,64,0,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,1,1,64,128,1,float16,float16,0,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,1,1,64,0,1,float16,float16,0,0.021781332790851593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,1,1,64,128,1,float16,fp8,0,0.02585600068171819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,1,1,64,128,1,fp8,fp8,0,0.029658667743206024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,1,1,64,0,1,float16,fp8,0,0.021941334009170532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,1,1,64,0,1,fp8,fp8,0,0.0206133338312308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,1,1,64,128,1,float16,float16,0,0.023775999744733173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,1,1,64,0,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,1,1,64,128,1,float16,fp8,0,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,1,1,64,128,1,fp8,fp8,0,0.030074665943781536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,1,1,64,0,1,float16,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,1,1,64,0,1,fp8,fp8,0,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,1,1,64,128,1,float16,float16,0,0.03381866713364919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,1,1,64,0,1,float16,float16,0,0.0339626669883728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,1,1,64,128,1,float16,fp8,0,0.034346667428811394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,1,1,64,128,1,fp8,fp8,0,0.036117332677046456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,1,1,64,0,1,float16,fp8,0,0.03387733300526937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,1,1,64,0,1,fp8,fp8,0,0.030250666042168934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,1,1,64,128,1,float16,float16,0,0.022298666338125866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,1,1,64,0,1,float16,float16,0,0.02180800090233485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,1,1,64,128,1,float16,fp8,0,0.022543999056021374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,1,1,64,128,1,fp8,fp8,0,0.02590399980545044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,1,1,64,0,1,float16,fp8,0,0.02201066662867864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,1,1,64,0,1,fp8,fp8,0,0.02260799954334895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,1,1,64,128,1,float16,float16,0,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,1,1,64,0,1,float16,float16,0,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,1,1,64,128,1,float16,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,1,1,64,128,1,fp8,fp8,0,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,1,1,64,0,1,float16,fp8,0,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,1,1,64,0,1,fp8,fp8,0,0.02029866725206375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,1,1,64,128,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,1,1,64,0,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,1,1,64,0,1,float16,float16,0,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,1,1,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,1,1,64,128,1,fp8,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,1,1,64,0,1,float16,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,1,1,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,1,1,64,128,1,float16,float16,0,0.015754666179418564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,1,1,64,128,1,float16,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,1,1,64,128,1,fp8,fp8,0,0.022111999491850536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,1,1,64,0,1,float16,fp8,0,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,1,1,64,0,1,fp8,fp8,0,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,1,1,64,128,1,float16,float16,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,1,1,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,1,1,64,128,1,float16,fp8,0,0.015935999651749928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,1,1,64,128,1,fp8,fp8,0,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,1,1,64,0,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,1,1,64,0,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,1,1,64,128,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,1,1,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,1,1,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,1,1,64,128,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,1,1,64,128,1,fp8,fp8,0,0.021738665799299877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,1,1,64,0,1,float16,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,1,1,64,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,1,1,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,1,1,64,128,1,float16,fp8,0,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,1,1,64,128,1,fp8,fp8,0,0.021850667893886566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,1,1,64,128,1,fp8,fp8,0,0.02237333357334137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,1,1,64,0,1,float16,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,1,1,64,0,1,fp8,fp8,0,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,1,1,64,128,1,float16,float16,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,1,1,64,0,1,float16,float16,0,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,1,1,64,128,1,float16,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,1,1,64,0,1,float16,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,1,1,64,0,1,fp8,fp8,0,0.02257599929968516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,1,1,64,128,1,float16,float16,0,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,1,1,64,0,1,float16,float16,0,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,1,1,64,128,1,float16,fp8,0,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,1,1,64,128,1,fp8,fp8,0,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,1,1,64,0,1,float16,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,1,1,64,0,1,fp8,fp8,0,0.02025066688656807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,1,1,64,128,1,float16,float16,0,0.014602666099866232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,1,1,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,1,1,64,0,1,float16,float16,0,0.014592000593741735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,1,1,64,128,1,float16,fp8,0,0.014463999619086584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,1,1,64,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,1,1,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,1,1,64,128,1,float16,float16,0,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,1,1,64,0,1,fp8,fp8,0,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,1,1,64,0,1,float16,float16,0,0.01392000044385592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,1,1,64,128,1,float16,fp8,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,1,1,64,128,1,fp8,fp8,0,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,1,1,64,0,1,float16,fp8,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,1,1,64,128,1,float16,float16,0,0.013605333864688873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,1,1,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,1,1,64,128,1,float16,fp8,0,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,1,1,64,128,1,fp8,fp8,0,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,1,1,64,128,1,float16,fp8,0,0.013861333330472311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,1,1,64,0,1,float16,fp8,0,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,1,1,64,0,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,1,1,64,128,1,float16,float16,0,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,1,1,64,0,1,float16,float16,0,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,1,1,64,128,1,fp8,fp8,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,1,1,64,0,1,float16,fp8,0,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,1,1,64,128,1,fp8,fp8,0,0.0182239996890227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,1,1,64,0,1,fp8,fp8,0,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,1,1,64,128,1,float16,float16,0,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,1,1,64,0,1,float16,float16,0,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,1,1,64,128,1,float16,fp8,0,0.01393066719174385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,1,1,64,0,1,float16,fp8,0,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,1,1,64,0,1,fp8,fp8,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,1,1,64,128,1,float16,float16,0,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,1,1,64,0,1,float16,float16,0,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,1,1,64,128,1,float16,fp8,0,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,1,1,64,128,1,fp8,fp8,0,0.018191999445358913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,1,1,64,0,1,float16,fp8,0,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,1,1,64,0,1,fp8,fp8,0,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,1,1,64,128,1,float16,float16,0,0.014394666999578476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,1,1,64,0,1,float16,float16,0,0.014469332993030548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,1,1,64,128,1,float16,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,1,1,64,128,1,fp8,fp8,0,0.02059200033545494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,1,1,64,0,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,1,1,64,0,1,fp8,fp8,0,0.02077866718173027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,1,1,64,128,1,float16,float16,0,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,1,1,64,0,1,float16,float16,0,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,1,1,64,128,1,float16,fp8,0,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,1,1,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,1,1,64,0,1,float16,fp8,0,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,1,1,64,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,1,1,64,0,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,1,1,64,128,1,float16,float16,0,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,1,1,64,0,1,float16,float16,0,0.01227733368674914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,1,1,64,128,1,float16,fp8,0,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,1,1,64,128,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,1,1,64,0,1,float16,fp8,0,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,1,1,64,128,1,float16,float16,0,0.012229333321253458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,1,1,64,0,1,float16,float16,0,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,1,1,64,128,1,float16,fp8,0,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,1,1,64,128,1,fp8,fp8,0,0.018165333817402523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,1,1,64,0,1,float16,fp8,0,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,1,1,64,0,1,fp8,fp8,0,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,1,1,64,128,1,float16,float16,0,0.012416000167528788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,1,1,64,0,1,float16,float16,0,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,1,1,64,128,1,float16,fp8,0,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,1,1,64,128,1,fp8,fp8,0,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,1,1,64,0,1,float16,fp8,0,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,1,1,64,0,1,fp8,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,1,1,64,128,1,float16,float16,0,0.012560000022252401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,1,1,64,0,1,float16,float16,0,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,1,1,64,128,1,float16,fp8,0,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,1,1,64,128,1,fp8,fp8,0,0.0179626668492953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,1,1,64,0,1,float16,fp8,0,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,1,1,64,0,1,fp8,fp8,0,0.018085333208243053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,1,1,64,128,1,float16,float16,0,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,1,1,64,0,1,float16,float16,0,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,1,1,64,128,1,float16,fp8,0,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,1,1,64,128,1,fp8,fp8,0,0.01821333294113477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,1,1,64,0,1,float16,fp8,0,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,1,1,64,0,1,float16,fp8,0,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,1,1,64,0,1,fp8,fp8,0,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,1,1,64,128,1,float16,float16,0,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,1,1,64,0,1,float16,float16,0,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,1,1,64,128,1,float16,fp8,0,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,1,1,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,1,1,64,0,1,fp8,fp8,0,0.018138666947682697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,1,1,64,128,1,float16,float16,0,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,1,1,64,0,1,float16,float16,0,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,1,1,64,128,1,float16,fp8,0,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,1,1,64,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,1,1,64,0,1,float16,fp8,0,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,1,1,64,0,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,1,1,64,128,1,float16,float16,0,0.011962667107582092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,1,1,64,0,1,float16,float16,0,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,1,1,64,128,1,float16,fp8,0,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,1,1,64,128,1,float16,fp8,0,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,1,1,64,128,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,1,1,64,0,1,float16,fp8,0,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,1,1,64,0,1,fp8,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,1,1,64,128,1,float16,float16,0,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,1,1,64,0,1,float16,float16,0,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,1,1,64,128,1,fp8,fp8,0,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,1,1,64,0,1,float16,fp8,0,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,1,1,64,0,1,fp8,fp8,0,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,1,1,64,128,1,float16,float16,0,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,1,1,64,0,1,float16,float16,0,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,1,1,64,128,1,float16,fp8,0,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,1,1,64,128,1,fp8,fp8,0,0.018165333817402523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,1,1,64,0,1,float16,fp8,0,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,1,1,64,0,1,fp8,fp8,0,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,1,1,64,128,1,float16,float16,0,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,1,1,64,0,1,fp8,fp8,0,0.01836799954374631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,1,1,64,0,1,float16,float16,0,0.011322667201360067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,1,1,64,128,1,float16,fp8,0,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,1,1,64,128,1,fp8,fp8,0,0.018320000420014065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,1,1,64,0,1,float16,fp8,0,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,1,1,64,128,1,float16,float16,0,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,1,1,64,0,1,float16,float16,0,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,1,1,64,128,1,float16,fp8,0,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,1,1,64,128,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,1,1,64,0,1,float16,fp8,0,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,1,1,64,0,1,fp8,fp8,0,0.018165333817402523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,1,1,64,128,1,float16,float16,0,0.01163200040658315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,1,1,64,0,1,float16,float16,0,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,1,1,64,128,1,float16,fp8,0,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,1,1,64,128,1,fp8,fp8,0,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,1,1,64,0,1,float16,fp8,0,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,1,1,64,0,1,fp8,fp8,0,0.018191999445358913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,1,1,64,128,1,float16,float16,0,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,1,1,64,0,1,float16,float16,0,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,1,1,64,128,1,float16,fp8,0,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,1,1,64,128,1,fp8,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,1,1,64,0,1,float16,fp8,0,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,1,1,64,0,1,fp8,fp8,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,1,1,64,128,1,float16,float16,0,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,1,1,64,0,1,float16,float16,0,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,1,1,64,128,1,float16,fp8,0,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,1,1,64,128,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,1,1,64,0,1,float16,fp8,0,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,1,1,64,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,1,1,64,128,1,float16,float16,0,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,1,1,64,0,1,float16,float16,0,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,1,1,64,128,1,float16,fp8,0,0.012240000069141388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,1,1,64,128,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,1,1,64,0,1,float16,fp8,0,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,1,1,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,1,1,64,128,1,float16,float16,0,0.011600000162919363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,1,1,64,0,1,float16,float16,0,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,1,1,64,128,1,float16,fp8,0,0.012245333443085352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,1,1,64,128,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,1,1,64,0,1,float16,fp8,0,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,1,1,64,0,1,fp8,fp8,0,0.01846933364868164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,1,1,64,128,1,float16,float16,0,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,1,1,64,0,1,float16,float16,0,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,1,1,64,128,1,float16,fp8,0,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,1,1,64,128,1,fp8,fp8,0,0.01838933303952217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,1,1,64,0,1,float16,fp8,0,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,1,1,64,0,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,1,1,64,128,1,float16,float16,0,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,1,1,64,0,1,float16,float16,0,0.011653333902359009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,1,1,64,128,1,float16,fp8,0,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,1,1,64,128,1,fp8,fp8,0,0.017765333255132038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,1,1,64,0,1,float16,fp8,0,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,1,1,64,0,1,fp8,fp8,0,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,1,1,64,128,1,float16,float16,0,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,1,1,64,0,1,float16,float16,0,0.011498666057984034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,1,1,64,128,1,float16,fp8,0,0.01184533288081487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,1,1,64,128,1,fp8,fp8,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,1,1,64,0,1,float16,fp8,0,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,1,1,64,0,1,fp8,fp8,0,0.01806933308641116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,1,1,64,128,1,float16,float16,0,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,1,1,64,128,1,float16,float16,0,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,1,1,64,0,1,float16,float16,0,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,1,1,64,128,1,float16,fp8,0,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,1,1,64,128,1,fp8,fp8,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,1,1,64,0,1,float16,fp8,0,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,1,1,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,1,1,64,0,1,float16,float16,0,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,1,1,64,128,1,float16,fp8,0,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,1,1,64,128,1,fp8,fp8,0,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,1,1,64,0,1,float16,fp8,0,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,1,1,64,0,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,1,128,0,1,fp8,fp8,0,70.00069173177083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,2,128,0,1,fp8,fp8,0,69.11908467610677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,4,128,0,1,fp8,fp8,0,70.43555196126302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,float16,0,51.126749674479164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,fp8,0,51.140045166015625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,128,0,1,fp8,fp8,0,33.20952606201172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,96,8,128,0,1,fp8,fp8,0,66.77718607584636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,float16,0,49.947591145833336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,fp8,0,50.98997497558594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,128,0,1,fp8,fp8,0,34.62107849121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,128,0,1,fp8,fp8,0,35.07253774007162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,128,0,1,float16,float16,0,50.1712900797526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,2,128,0,1,float16,fp8,0,50.51457214355469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,float16,0,50.61558532714844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,fp8,0,50.25962829589844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,float16,0,50.670186360677086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,float16,0,26.405776977539062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,128,0,1,fp8,fp8,0,17.418251037597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,fp8,0,26.446571350097656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,128,0,1,fp8,fp8,0,35.20508321126302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,float16,0,26.09722646077474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,128,0,1,fp8,fp8,0,16.839786529541016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,fp8,0,50.642191569010414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,fp8,0,25.331776936848957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,128,0,1,fp8,fp8,0,17.115557352701824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,128,0,1,float16,float16,0,25.436927795410156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,2,128,0,1,float16,fp8,0,25.02349344889323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,float16,0,25.520543416341145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,128,0,1,fp8,fp8,0,17.08405303955078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,fp8,0,25.831631978352863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,float16,0,13.252677917480469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,128,0,1,fp8,fp8,0,17.14402135213216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,128,0,1,fp8,fp8,0,9.227962493896484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,float16,0,25.370038350423176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,fp8,0,26.103919982910156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,fp8,0,13.352101643880209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,float16,0,12.990229288736979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,128,0,1,fp8,fp8,0,8.958357493082682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,fp8,0,12.83126449584961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,128,0,1,fp8,fp8,0,8.834224065144857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,128,0,1,float16,float16,0,13.018330891927084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,2,128,0,1,float16,fp8,0,13.062068939208984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,float16,0,12.939818064371744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,128,0,1,fp8,fp8,0,8.733354568481445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,fp8,0,13.430912017822266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,128,0,1,fp8,fp8,0,8.752981185913086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,float16,0,12.988085428873697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,fp8,0,13.197792053222656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,128,0,1,fp8,fp8,0,39.78128560384115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,128,0,1,fp8,fp8,0,39.19904581705729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,float16,0,59.104451497395836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,fp8,0,58.65923055013021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,128,0,1,float16,float16,0,57.5555419921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,2,128,0,1,float16,fp8,0,58.227335611979164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,float16,0,58.57141621907552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,128,0,1,fp8,fp8,0,39.55606333414713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,float16,0,29.8822504679362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,fp8,0,58.8304189046224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,fp8,0,30.226048787434895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,128,0,1,fp8,fp8,0,39.397168477376304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,96,128,0,1,fp8,fp8,0,20.333365122477215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,fp8,0,58.35553995768229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,float16,0,59.15935770670573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,128,0,1,fp8,fp8,0,19.90797297159831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,float16,0,29.0719731648763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,fp8,0,29.244415283203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,128,0,1,float16,float16,0,29.282852172851562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,128,0,1,fp8,fp8,0,19.704090118408203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,2,128,0,1,float16,fp8,0,28.881423950195312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,128,0,1,fp8,fp8,0,19.725413004557293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,float16,0,29.771519978841145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,fp8,0,29.452372233072918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,float16,0,15.353535970052084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,128,0,1,fp8,fp8,0,19.943402608235676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,float16,0,29.2125981648763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,128,0,1,fp8,fp8,0,10.39677874247233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,fp8,0,15.697530110677084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,fp8,0,29.2568842569987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,float16,0,14.877061208089193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,128,0,1,fp8,fp8,0,9.90883763631185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,fp8,0,14.599098205566406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,128,0,1,fp8,fp8,0,9.884906768798828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,128,0,1,float16,float16,0,14.850964864095053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,2,128,0,1,float16,fp8,0,14.797594706217447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,float16,0,14.759109497070312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,128,0,1,fp8,fp8,0,9.964495976765951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,fp8,0,15.01416015625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,float16,0,14.824474334716797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,float16,0,8.802576065063477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,128,0,1,fp8,fp8,0,5.331797281901042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,128,0,1,fp8,fp8,0,10.014575958251953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,fp8,0,14.693125406901041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,fp8,0,7.838629404703776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,128,0,1,fp8,fp8,0,5.173647880554199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,float16,0,7.706165313720703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,fp8,0,7.629983901977539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,128,0,1,fp8,fp8,0,5.091973304748535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,128,0,1,float16,fp8,0,7.868858973185222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,2,128,0,1,float16,float16,0,7.518725077311198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,float16,0,7.508186976114909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,128,0,1,fp8,fp8,0,5.1953919728597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,fp8,0,7.6373545328776045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,128,0,1,fp8,fp8,0,5.1599626541137695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,float16,0,7.535109202067058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,fp8,0,7.459413528442383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,128,0,1,fp8,fp8,0,28.65277862548828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,128,0,1,fp8,fp8,0,28.669413248697918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,float16,0,41.44802602132162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,fp8,0,41.113563537597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,128,0,1,float16,float16,0,41.64879353841146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,2,128,0,1,float16,fp8,0,41.44177500406901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,float16,0,44.43635559082031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,128,0,1,fp8,fp8,0,27.808609008789062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,float16,0,21.708740234375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,fp8,0,42.443145751953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,128,0,1,fp8,fp8,0,28.650784810384113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,128,0,1,fp8,fp8,0,14.570842742919922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,float16,0,41.78194681803385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,fp8,0,21.273675282796223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,float16,0,20.65550994873047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,fp8,0,41.56342315673828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,128,0,1,fp8,fp8,0,14.06222407023112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,fp8,0,20.987669626871746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,128,0,1,float16,float16,0,20.629348754882812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,128,0,1,fp8,fp8,0,13.94589869181315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,2,128,0,1,float16,fp8,0,22.08196258544922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,128,0,1,fp8,fp8,0,14.05086898803711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,fp8,0,21.176794687906902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,float16,0,20.418421427408855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,128,0,1,fp8,fp8,0,7.453178405761719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,float16,0,11.08133316040039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,float16,0,21.795461018880207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,fp8,0,10.69042714436849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,128,0,1,fp8,fp8,0,14.360623677571615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,fp8,0,20.98514684041341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,128,0,1,fp8,fp8,0,7.147658665974935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,float16,0,10.507578531901041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,fp8,0,10.440101623535156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,128,0,1,fp8,fp8,0,7.07969601949056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,128,0,1,float16,float16,0,10.602618535359701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,2,128,0,1,float16,fp8,0,10.402565638224283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,float16,0,10.708287556966146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,fp8,0,10.404783884684244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,4,128,0,1,fp8,fp8,0,7.177344004313151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,128,0,1,fp8,fp8,0,7.075621287027995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,float16,0,5.515013376871745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,fp8,0,5.62226676940918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,96,128,0,1,fp8,fp8,0,3.7959518432617188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,float16,0,10.389109293619791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,fp8,0,10.479578653971354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,float16,0,5.346581141153972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,128,0,1,fp8,fp8,0,3.760965347290039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,fp8,0,5.330191930135091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,128,0,1,fp8,fp8,0,3.754938761393229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,128,0,1,float16,float16,0,5.376869201660156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,2,128,0,1,float16,fp8,0,5.429141362508138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,float16,0,5.3318131764729815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,128,0,1,fp8,fp8,0,3.6701440811157227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,fp8,0,5.424383799235026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,float16,0,5.427712122599284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,128,0,1,fp8,fp8,0,3.7649545669555664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,fp8,0,5.520506540934245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,1,128,0,1,fp8,fp8,0,36.336219787597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,2,128,0,1,fp8,fp8,0,36.30334981282552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,4,128,0,1,fp8,fp8,0,36.78692372639974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,96,8,128,0,1,fp8,fp8,0,39.959302266438804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,fp8,0,27.710906982421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,float16,0,27.877296447753906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,float16,0,26.832987467447918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,128,0,1,fp8,fp8,0,18.31818135579427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,128,0,1,fp8,fp8,0,18.25979741414388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,fp8,0,26.8385009765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,128,0,1,float16,float16,0,26.599764506022137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,2,128,0,1,float16,fp8,0,26.564356486002605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,128,0,1,fp8,fp8,0,18.97306187947591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,fp8,0,26.635833740234375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,float16,0,26.854873657226562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,float16,0,13.95139185587565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,128,0,1,fp8,fp8,0,18.220784505208332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,float16,0,26.9724858601888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,fp8,0,27.639732360839844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,fp8,0,14.672805786132812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,96,128,0,1,fp8,fp8,0,9.972997029622396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,float16,0,13.55575434366862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,128,0,1,fp8,fp8,0,9.166224161783854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,128,0,1,fp8,fp8,0,9.287151972452799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,fp8,0,13.39850107828776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,128,0,1,float16,float16,0,13.481498718261719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,2,128,0,1,float16,fp8,0,13.799247741699219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,128,0,1,fp8,fp8,0,9.394720077514648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,float16,0,13.295786539713541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,fp8,0,13.514911651611328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,128,0,1,fp8,fp8,0,9.364714940388998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,float16,0,7.10427729288737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,128,0,1,fp8,fp8,0,4.911856015523274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,float16,0,13.812751770019531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,fp8,0,6.997914632161458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,fp8,0,13.701466878255209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,128,0,1,fp8,fp8,0,4.722458521525065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,float16,0,6.917392094930013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,fp8,0,6.869338353474935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,128,0,1,fp8,fp8,0,4.673338572184245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,128,0,1,float16,float16,0,6.8875306447347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,2,128,0,1,float16,fp8,0,6.798970540364583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,128,0,1,fp8,fp8,0,4.73470401763916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,float16,0,6.867509206136067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,fp8,0,6.937653223673503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,float16,0,3.6472959518432617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,128,0,1,fp8,fp8,0,4.6705067952473955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,float16,0,6.907738367716472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,fp8,0,6.886698404947917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,128,0,1,fp8,fp8,0,2.572314739227295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,fp8,0,3.6326878865559897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,float16,0,3.842426617940267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,128,0,1,fp8,fp8,0,2.4259680112202964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,fp8,0,3.3584105173746743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,128,0,1,float16,float16,0,3.429343859354655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,128,0,1,fp8,fp8,0,2.4737280209859214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,2,128,0,1,float16,fp8,0,3.690575917561849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,128,0,1,fp8,fp8,0,2.4655253092447915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,float16,0,3.53658135732015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,fp8,0,3.565194765726725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,float16,0,3.5452426274617515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,128,0,1,fp8,fp8,0,2.458693345387777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,fp8,0,3.585850715637207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,1,128,0,1,fp8,fp8,0,21.610822041829426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,2,128,0,1,fp8,fp8,0,21.77868906656901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,float16,0,32.26773325602213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,fp8,0,32.13784535725912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,2,128,0,1,float16,fp8,0,32.7413075764974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,4,128,0,1,fp8,fp8,0,22.198209126790363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,96,8,128,0,1,fp8,fp8,0,21.854395548502605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,float16,0,16.32803217569987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,fp8,0,16.497567494710285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,96,128,0,1,fp8,fp8,0,11.816516876220703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,float16,0,17.226688385009766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,fp8,0,16.066543579101562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,1,128,0,1,fp8,fp8,0,10.894789377848307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,128,0,1,float16,float16,0,15.815306345621744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,128,0,1,fp8,fp8,0,11.059749603271484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,2,128,0,1,float16,fp8,0,15.753611246744791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,float16,0,15.953567504882812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,fp8,0,16.588821411132812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,4,128,0,1,fp8,fp8,0,11.135035196940104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,float16,0,15.926063537597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,fp8,0,16.088799794514973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,96,8,128,0,1,fp8,fp8,0,10.945264180501303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,float16,0,8.291007995605469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,fp8,0,8.571317036946615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,96,128,0,1,fp8,fp8,0,5.934437433878581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,float16,0,7.927290598551433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,fp8,0,7.947104136149089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,1,128,0,1,fp8,fp8,0,5.495818456013997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,128,0,1,float16,float16,0,7.937365214029948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,128,0,1,float16,fp8,0,7.923205057779948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,2,128,0,1,fp8,fp8,0,5.7678985595703125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,float16,0,8.102821350097656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,fp8,0,8.139535903930664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,4,128,0,1,fp8,fp8,0,5.531936009724935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,float16,0,7.983840306599935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,fp8,0,8.146053314208984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,96,8,128,0,1,fp8,fp8,0,5.52016003926595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,float16,0,4.140490531921387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,fp8,0,4.31275749206543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,96,128,0,1,fp8,fp8,0,3.012842814127604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,float16,0,3.922021230061849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,fp8,0,4.01796277364095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,1,128,0,1,fp8,fp8,0,2.779189427693685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,128,0,1,float16,float16,0,4.056480089823405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,128,0,1,float16,fp8,0,3.770026524861654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,2,128,0,1,fp8,fp8,0,2.7574453353881836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,float16,0,3.729466756184896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,fp8,0,4.068746566772461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,4,128,0,1,fp8,fp8,0,2.751983960469564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,float16,0,3.9444640477498374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,fp8,0,3.7409919102986655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,float16,0,2.1292479832967124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,96,8,128,0,1,fp8,fp8,0,2.814154624938965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,128,0,1,fp8,fp8,0,1.5901974042256672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,fp8,0,2.196613311767578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,float16,0,2.053621292114258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,fp8,0,2.0689279238382974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,1,128,0,1,fp8,fp8,0,1.5064694086710613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,128,0,1,float16,float16,0,2.060650666554769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,128,0,1,float16,fp8,0,2.0757919947306314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,2,128,0,1,fp8,fp8,0,1.5429013570149739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,float16,0,2.03984006245931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,fp8,0,2.0158027013142905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,4,128,0,1,fp8,fp8,0,1.5116000175476074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,float16,0,2.049546718597412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,fp8,0,2.1387252807617188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,96,8,128,0,1,fp8,fp8,0,1.5313706398010254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,1,128,0,1,fp8,fp8,0,21.4540532430013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,2,128,0,1,fp8,fp8,0,21.38715108235677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,4,128,0,1,fp8,fp8,0,21.55053456624349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,96,8,128,0,1,fp8,fp8,0,21.621109008789062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,float16,0,15.788186391194662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,fp8,0,15.896607716878256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,float16,0,15.040650685628256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,128,0,1,fp8,fp8,0,10.691519419352213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,fp8,0,15.210138956705729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,128,0,1,float16,float16,0,15.40506108601888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,128,0,1,float16,fp8,0,15.518367767333984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,2,128,0,1,fp8,fp8,0,10.985946655273438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,float16,0,15.473674774169922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,fp8,0,15.086692810058594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,4,128,0,1,fp8,fp8,0,10.89748764038086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,float16,0,15.621760050455729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,128,0,1,fp8,fp8,0,11.114810943603516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,float16,0,8.174922943115234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,fp8,0,15.079973856608072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,fp8,0,8.212437311808268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,96,128,0,1,fp8,fp8,0,5.963461558024089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,128,0,1,fp8,fp8,0,5.349418640136719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,float16,0,7.466634750366211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,fp8,0,7.604464213053386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,128,0,1,fp8,fp8,0,5.392026901245117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,128,0,1,float16,float16,0,7.545077641805013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,2,128,0,1,float16,fp8,0,7.4446665445963545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,128,0,1,fp8,fp8,0,5.351541519165039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,float16,0,7.675973256429036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,fp8,0,7.812309265136719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,128,0,1,fp8,fp8,0,5.589770634969075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,float16,0,7.497418721516927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,fp8,0,7.6626402537028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,float16,0,4.1866881052653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,128,0,1,fp8,fp8,0,3.015338579813639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,fp8,0,4.079290707906087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,float16,0,3.803546587626139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,fp8,0,3.900735855102539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,1,128,0,1,fp8,fp8,0,2.6623360315958657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,128,0,1,float16,float16,0,3.7956692377726235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,128,0,1,fp8,fp8,0,2.6576693852742515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,2,128,0,1,float16,fp8,0,3.7486772537231445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,float16,0,3.712538719177246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,fp8,0,3.772197405497233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,4,128,0,1,fp8,fp8,0,2.6896533966064453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,float16,0,3.7140159606933594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,128,0,1,fp8,fp8,0,2.681232134501139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,fp8,0,3.678229331970215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,float16,0,1.9787947336832683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,fp8,0,2.0111093521118164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,96,128,0,1,fp8,fp8,0,1.546346664428711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,float16,0,1.8631200790405273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,128,0,1,fp8,fp8,0,1.414357344309489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,fp8,0,1.8893973032633464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,128,0,1,fp8,fp8,0,1.431370735168457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,128,0,1,float16,float16,0,1.9381814002990723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,2,128,0,1,float16,fp8,0,1.9266719818115234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,float16,0,1.8531039555867512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,128,0,1,fp8,fp8,0,1.4178400039672852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,fp8,0,1.8723519643147786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,float16,0,1.9317919413248699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,128,0,1,fp8,fp8,0,1.4463094075520833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,fp8,0,1.9012319246927898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,float16,0,1.0558773676554363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,fp8,0,1.0697973569234211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,96,128,0,1,fp8,fp8,0,0.8588159879048666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,float16,0,1.0405279795328777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,fp8,0,1.0401493708292644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,1,128,0,1,fp8,fp8,0,0.8005332946777344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,128,0,1,float16,float16,0,1.0271573066711426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,128,0,1,float16,fp8,0,1.0325013001759846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,2,128,0,1,fp8,fp8,0,0.8007307052612305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,float16,0,1.02674134572347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,fp8,0,1.0329439640045166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,4,128,0,1,fp8,fp8,0,0.803114652633667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,float16,0,1.028538703918457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,fp8,0,1.0352319876352947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,96,8,128,0,1,fp8,fp8,0,0.8043413162231445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,1,128,0,1,fp8,fp8,0,13.200677235921225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,2,128,0,1,fp8,fp8,0,13.113547007242838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,4,128,0,1,fp8,fp8,0,13.242154439290365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,96,8,128,0,1,fp8,fp8,0,13.382245381673178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,float16,0,9.65557861328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,fp8,0,9.672917048136393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,96,128,0,1,fp8,fp8,0,7.543274561564128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,128,0,1,fp8,fp8,0,6.612389246622722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,fp8,0,9.066352208455404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,float16,0,8.978821436564127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,128,0,1,fp8,fp8,0,6.524698893229167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,128,0,1,float16,fp8,0,9.13697624206543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,float16,0,9.019872029622396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,2,128,0,1,float16,float16,0,8.82094955444336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,128,0,1,fp8,fp8,0,6.680229187011719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,fp8,0,9.012608210245768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,float16,0,9.013466517130533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,128,0,1,fp8,fp8,0,6.600400288899739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,float16,0,4.866949399312337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,fp8,0,4.8684641520182295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,96,128,0,1,fp8,fp8,0,3.6723626454671225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,float16,0,4.372010548909505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,fp8,0,9.080127716064453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,fp8,0,4.634463946024577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,1,128,0,1,fp8,fp8,0,3.2380266189575195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,128,0,1,float16,float16,0,4.423525174458821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,128,0,1,float16,fp8,0,4.431599934895833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,2,128,0,1,fp8,fp8,0,3.314319928487142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,float16,0,4.446149190266927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,128,0,1,fp8,fp8,0,3.2344373067220054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,fp8,0,4.497674624125163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,float16,0,4.3988908131917315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,fp8,0,4.4961652755737305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,float16,0,2.408751964569092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,96,8,128,0,1,fp8,fp8,0,3.3076960245768228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,fp8,0,2.3994506200154624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,96,128,0,1,fp8,fp8,0,1.8676106135050456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,float16,0,2.2536800702412925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,128,0,1,fp8,fp8,0,1.6901440620422363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,fp8,0,2.2285119692484536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,128,0,1,fp8,fp8,0,1.7013866106669109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,128,0,1,float16,fp8,0,2.1754345893859863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,float16,0,2.241578737894694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,2,128,0,1,float16,float16,0,2.1624107360839844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,fp8,0,2.172111988067627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,4,128,0,1,fp8,fp8,0,1.6894240379333496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,float16,0,2.2087999979654946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,float16,0,1.2101279894510906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,fp8,0,2.2106879552205405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,96,8,128,0,1,fp8,fp8,0,1.7052639325459797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,fp8,0,1.2392319838205974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,96,128,0,1,fp8,fp8,0,0.9960373242696127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,float16,0,1.1523626645406086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,fp8,0,1.1694293022155762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,1,128,0,1,fp8,fp8,0,0.9147466818491617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,128,0,1,float16,float16,0,1.158351977666219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,128,0,1,float16,fp8,0,1.1636853218078613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,2,128,0,1,fp8,fp8,0,0.9084373315175375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,float16,0,1.160266637802124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,128,0,1,fp8,fp8,0,0.9112906455993652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,fp8,0,1.1621599992116292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,float16,0,1.1553973356882732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,fp8,0,1.162549336751302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,float16,0,0.6736266613006592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,96,8,128,0,1,fp8,fp8,0,0.9189386367797852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,fp8,0,0.6892000039418539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,96,128,0,1,fp8,fp8,0,0.5622613430023193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,float16,0,0.6583573420842489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,fp8,0,0.6575040022532145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,1,128,0,1,fp8,fp8,0,0.5291733344395956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,128,0,1,float16,float16,0,0.6574506759643555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,128,0,1,float16,fp8,0,0.6571306784947714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,2,128,0,1,fp8,fp8,0,0.5267253319422404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,float16,0,0.6559360027313232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,fp8,0,0.6605493227640787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,4,128,0,1,fp8,fp8,0,0.5287946859995524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,float16,0,0.6566720008850098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,fp8,0,0.6594933271408081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,96,8,128,0,1,fp8,fp8,0,0.5289226770401001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,1,128,0,1,fp8,fp8,0,13.85592524210612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,2,128,0,1,fp8,fp8,0,13.880512237548828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,4,128,0,1,fp8,fp8,0,14.197893778483072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,96,8,128,0,1,fp8,fp8,0,14.012069702148438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,float16,0,9.972533543904623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,fp8,0,10.102703730265299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,float16,0,9.108783721923828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,fp8,0,9.203503926595053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,1,128,0,1,fp8,fp8,0,6.976287841796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,128,0,1,float16,float16,0,9.137088139851889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,128,0,1,float16,fp8,0,9.336346944173178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,fp8,0,9.197173436482748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,float16,0,9.098693211873373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,2,128,0,1,fp8,fp8,0,6.909311930338542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,4,128,0,1,fp8,fp8,0,7.091381072998047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,float16,0,9.317802429199219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,fp8,0,9.23365338643392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,96,8,128,0,1,fp8,fp8,0,7.10969607035319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,fp8,0,5.151962598164876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,128,0,1,fp8,fp8,0,3.962554613749186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,float16,0,4.867520014444987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,fp8,0,4.525818824768066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,128,0,1,fp8,fp8,0,3.4445279439290366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,float16,0,4.502826690673828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,128,0,1,fp8,fp8,0,3.4914185206095376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,float16,0,4.571184158325195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,128,0,1,float16,fp8,0,4.365866661071777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,fp8,0,4.571322758992513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,2,128,0,1,float16,float16,0,4.407727877298991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,4,128,0,1,fp8,fp8,0,3.4907519022623696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,float16,0,4.535807927449544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,float16,0,2.4028372764587402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,128,0,1,fp8,fp8,0,2.0336586634318032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,128,0,1,fp8,fp8,0,3.4587198893229165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,fp8,0,4.538607915242513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,float16,0,2.194533348083496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,128,0,1,fp8,fp8,0,1.763759930928548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,fp8,0,2.417535940806071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,fp8,0,2.233386675516764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,128,0,1,float16,float16,0,2.149951934814453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,128,0,1,float16,fp8,0,2.229472001393636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,2,128,0,1,fp8,fp8,0,1.7858826319376628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,128,0,1,fp8,fp8,0,1.7652959823608398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,fp8,0,2.2304213841756186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,float16,0,2.1943306922912598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,128,0,1,fp8,fp8,0,1.7904480298360188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,fp8,0,2.258336067199707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,float16,0,2.2104533513387046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,float16,0,1.2187626361846924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,fp8,0,1.2383413314819336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,96,128,0,1,fp8,fp8,0,1.0566720167795818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,float16,0,1.135434627532959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,fp8,0,1.1461439927419026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,1,128,0,1,fp8,fp8,0,0.9507946968078613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,128,0,1,float16,float16,0,1.130021333694458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,128,0,1,float16,fp8,0,1.1385280291239421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,2,128,0,1,fp8,fp8,0,0.9293599923451742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,float16,0,1.1351786454518635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,fp8,0,1.1412800153096516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,4,128,0,1,fp8,fp8,0,0.9341066678365072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,float16,0,1.1342240174611409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,fp8,0,1.1442399819691975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,96,8,128,0,1,fp8,fp8,0,0.9369280338287354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,128,0,1,fp8,fp8,0,0.5691733360290527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,fp8,0,0.6619199911753336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,float16,0,0.6488853295644125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,float16,0,0.6165493329366049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,128,0,1,fp8,fp8,0,0.5104426542917887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,fp8,0,0.6201813220977783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,128,0,1,float16,float16,0,0.6175520022710165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,128,0,1,float16,fp8,0,0.6209706862767538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,2,128,0,1,fp8,fp8,0,0.5166293382644653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,float16,0,0.6179306507110596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,fp8,0,0.6207893292109171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,4,128,0,1,fp8,fp8,0,0.5142666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,float16,0,0.6200266679128011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,fp8,0,0.6246773401896158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,96,8,128,0,1,fp8,fp8,0,0.5205493370691935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,float16,0,0.3741226593653361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,fp8,0,0.38233598073323566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,96,128,0,1,fp8,fp8,0,0.3233013351758321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,float16,0,0.3633973201115926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,fp8,0,0.3667680025100708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,128,0,1,float16,fp8,0,0.3638026714324951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,1,128,0,1,fp8,fp8,0,0.29263466596603394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,128,0,1,float16,float16,0,0.36340800921122235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,2,128,0,1,fp8,fp8,0,0.2934719920158386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,float16,0,0.3681066830952962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,fp8,0,0.36771734555562335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,4,128,0,1,fp8,fp8,0,0.29171733061472577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,float16,0,0.3664693435033162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,fp8,0,0.36323734124501544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,96,8,128,0,1,fp8,fp8,0,0.2966559926668803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,2,128,0,1,fp8,fp8,0,9.20195198059082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,1,128,0,1,fp8,fp8,0,9.139226913452148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,fp8,0,11.533562978108725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,4,128,0,1,fp8,fp8,0,9.221061070760092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,float16,0,11.63653310139974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,96,8,128,0,1,fp8,fp8,0,9.321642557779947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,float16,0,6.445295969645183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,fp8,0,6.510847727457683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,96,128,0,1,fp8,fp8,0,5.2881011962890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,float16,0,5.682938893636067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,fp8,0,5.56446901957194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,128,0,1,float16,float16,0,5.738565444946289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,1,128,0,1,fp8,fp8,0,4.558474540710449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,128,0,1,float16,fp8,0,5.763760248819987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,2,128,0,1,fp8,fp8,0,4.561973253885905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,float16,0,5.741141637166341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,fp8,0,5.739109039306641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,4,128,0,1,fp8,fp8,0,4.586549441019694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,float16,0,5.756144205729167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,fp8,0,5.730266571044922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,float16,0,3.223445256551107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,96,8,128,0,1,fp8,fp8,0,4.635077476501465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,fp8,0,3.1949758529663086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,96,128,0,1,fp8,fp8,0,2.6636479695638022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,float16,0,2.7342828114827475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,fp8,0,2.7364800771077475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,1,128,0,1,fp8,fp8,0,2.313802719116211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,128,0,1,float16,float16,0,2.7827625274658203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,128,0,1,float16,fp8,0,2.7391039530436196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,2,128,0,1,fp8,fp8,0,2.3197439511617026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,float16,0,2.76416015625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,fp8,0,2.7823146184285483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,4,128,0,1,fp8,fp8,0,2.3267146746317544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,float16,0,2.795498530069987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,fp8,0,2.78438409169515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,96,8,128,0,1,fp8,fp8,0,2.34226131439209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,float16,0,1.5446613629659016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,fp8,0,1.5611467361450195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,96,128,0,1,fp8,fp8,0,1.3715146382649739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,float16,0,1.3898293177286785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,fp8,0,1.4078292846679688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,1,128,0,1,fp8,fp8,0,1.1907467047373455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,128,0,1,float16,float16,0,1.3973706563313801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,128,0,1,float16,fp8,0,1.3999999364217122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,2,128,0,1,fp8,fp8,0,1.1977759997049968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,float16,0,1.3917066256205242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,fp8,0,1.4117066065470378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,4,128,0,1,fp8,fp8,0,1.1940106550852458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,float16,0,1.403509298960368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,fp8,0,1.4060160319010417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,96,8,128,0,1,fp8,fp8,0,1.208448012669881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,float16,0,0.7946613629659017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,fp8,0,0.8107893466949463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,96,128,0,1,fp8,fp8,0,0.721232016881307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,float16,0,0.7289066314697266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,fp8,0,0.7390240033467611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,1,128,0,1,fp8,fp8,0,0.630506674448649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,128,0,1,float16,float16,0,0.7327413558959961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,128,0,1,float16,fp8,0,0.7373440265655518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,2,128,0,1,fp8,fp8,0,0.632917324701945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,float16,0,0.732367992401123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,fp8,0,0.7420427004496256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,4,128,0,1,fp8,fp8,0,0.6335786581039429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,float16,0,0.7372480233510336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,fp8,0,0.7392000357309977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,float16,0,0.4309600194295247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,96,8,128,0,1,fp8,fp8,0,0.6391893227895101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,fp8,0,0.44261332352956134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,96,128,0,1,fp8,fp8,0,0.3950506846110026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,float16,0,0.40777067343393963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,fp8,0,0.4109813372294108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,1,128,0,1,fp8,fp8,0,0.35499731699625653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,128,0,1,float16,float16,0,0.4085386594136556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,128,0,1,float16,fp8,0,0.4135626554489136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,2,128,0,1,fp8,fp8,0,0.3540639877319336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,float16,0,0.4096693197886149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,fp8,0,0.4110613266626994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,4,128,0,1,fp8,fp8,0,0.3556266625722249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,float16,0,0.40958933035532635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,fp8,0,0.4150773286819458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,96,8,128,0,1,fp8,fp8,0,0.35655999183654785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,float16,0,0.25572800636291504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,fp8,0,0.26156800985336304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,96,128,0,1,fp8,fp8,0,0.22799466053644815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,float16,0,0.24716800451278687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,fp8,0,0.24715733528137207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,1,128,0,1,fp8,fp8,0,0.20437333981196085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,128,0,1,float16,float16,0,0.24643733104070029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,128,0,1,fp8,fp8,0,0.20403200387954712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,2,128,0,1,float16,fp8,0,0.2475200096766154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,float16,0,0.2471733291943868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,fp8,0,0.2479520042737325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,4,128,0,1,fp8,fp8,0,0.20339200894037882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,float16,0,0.24766933917999268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,128,0,1,fp8,fp8,0,0.20427733659744263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,fp8,0,0.2507200042406718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,1,128,0,1,fp8,fp8,0,10.143418629964193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,2,128,0,1,fp8,fp8,0,10.136240005493164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,4,128,0,1,fp8,fp8,0,10.255877176920572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,fp8,0,7.014346440633138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,float16,0,7.117562611897786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,96,8,128,0,1,fp8,fp8,0,10.769930521647135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,fp8,0,5.800741195678711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,float16,0,5.82426643371582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,1,128,0,1,fp8,fp8,0,5.111669222513835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,128,0,1,float16,float16,0,5.956229527791341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,128,0,1,float16,fp8,0,5.882485071818034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,2,128,0,1,fp8,fp8,0,5.139706611633301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,float16,0,6.075461069742839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,fp8,0,5.961722691853841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,4,128,0,1,fp8,fp8,0,5.21558411916097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,float16,0,6.127264022827148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,fp8,0,6.11677360534668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,96,8,128,0,1,fp8,fp8,0,5.381066640218099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,float16,0,3.4545440673828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,fp8,0,3.3604745864868164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,96,128,0,1,fp8,fp8,0,2.940922737121582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,float16,0,2.838442802429199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,128,0,1,fp8,fp8,0,2.5338826179504395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,fp8,0,2.874858538309733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,128,0,1,float16,float16,0,2.8339627583821616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,128,0,1,fp8,fp8,0,2.550384044647217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,2,128,0,1,float16,fp8,0,2.8201281229654946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,float16,0,2.8386561075846353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,128,0,1,fp8,fp8,0,2.591925303141276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,fp8,0,2.8394721349080405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,float16,0,2.982442537943522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,128,0,1,fp8,fp8,0,2.6814985275268555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,fp8,0,2.9760001500447593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,float16,0,1.7161067326863606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,fp8,0,1.4201760292053223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,128,0,1,fp8,fp8,0,1.2721866766611736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,128,0,1,float16,float16,0,1.4307146072387695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,float16,0,1.4175465901692708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,fp8,0,1.662266731262207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,96,128,0,1,fp8,fp8,0,1.4596373240152996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,128,0,1,fp8,fp8,0,1.281274636586507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,2,128,0,1,float16,fp8,0,1.4296479225158691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,float16,0,1.4316372871398926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,128,0,1,fp8,fp8,0,1.2814826965332031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,fp8,0,1.4402027130126953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,float16,0,0.8584266503651937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,128,0,1,fp8,fp8,0,1.3512639999389648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,128,0,1,fp8,fp8,0,0.7359946568806967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,fp8,0,1.425370693206787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,float16,0,0.7241706848144531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,float16,0,1.446346600850423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,fp8,0,0.721343994140625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,1,128,0,1,fp8,fp8,0,0.6395946741104126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,128,0,1,float16,fp8,0,0.7261813481648763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,128,0,1,fp8,fp8,0,0.647429347038269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,float16,0,0.7286986509958903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,fp8,0,0.7235840161641439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,4,128,0,1,fp8,fp8,0,0.6522239844004313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,float16,0,0.7318720022837321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,fp8,0,0.8413013617197672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,2,128,0,1,float16,float16,0,0.7225333054860433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,fp8,0,0.7321279843648275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,96,8,128,0,1,fp8,fp8,0,0.6768426895141602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,float16,0,0.4461386601130168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,fp8,0,0.437333345413208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,96,128,0,1,fp8,fp8,0,0.37811732292175293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,fp8,0,0.3773760000864665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,128,0,1,float16,float16,0,0.3782613277435303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,128,0,1,fp8,fp8,0,0.3290613293647766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,128,0,1,float16,fp8,0,0.3784213463465373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,2,128,0,1,fp8,fp8,0,0.3317333261171977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,float16,0,0.37673600514729816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,fp8,0,0.3792639970779419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,4,128,0,1,fp8,fp8,0,0.33823998769124347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,128,0,1,fp8,fp8,0,0.34493335088094074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,float16,0,0.3795253435770671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,fp8,0,0.23029333353042603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,float16,0,0.37381335099538165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,fp8,0,0.3791999816894531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,float16,0,0.23492799202601114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,96,128,0,1,fp8,fp8,0,0.18929066260655722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,float16,0,0.20069332917531332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,fp8,0,0.2012853423754374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,1,128,0,1,fp8,fp8,0,0.15730133652687073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,128,0,1,float16,float16,0,0.2013333241144816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,128,0,1,float16,fp8,0,0.20096532503763834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,2,128,0,1,fp8,fp8,0,0.16048533717791238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,float16,0,0.2012373407681783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,fp8,0,0.20333333810170492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,4,128,0,1,fp8,fp8,0,0.16120533148447672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,float16,0,0.2029013236363729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,128,0,1,fp8,fp8,0,0.10357866684595744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,fp8,0,0.2028800050417582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,96,8,128,0,1,fp8,fp8,0,0.16241066654523215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,float16,0,0.12706666191418967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,fp8,0,0.12553067008654276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,float16,0,0.10548266768455505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,fp8,0,0.10525332887967427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,float16,0,0.10630933443705241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,1,128,0,1,fp8,fp8,0,0.08771733442942302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,128,0,1,float16,float16,0,0.10570666193962097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,128,0,1,float16,fp8,0,0.10572800040245056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,2,128,0,1,fp8,fp8,0,0.08807999889055888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,fp8,0,0.10634133219718933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,4,128,0,1,fp8,fp8,0,0.08777599533398946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,float16,0,0.10630399982134502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,fp8,0,0.10777599612871806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,96,8,128,0,1,fp8,fp8,0,0.0904373327891032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,1,128,0,1,fp8,fp8,0,8.241093317667643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,2,128,0,1,fp8,fp8,0,8.31061871846517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,4,128,0,1,fp8,fp8,0,8.368672053019205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,96,128,0,1,float16,float16,0,5.575808207194011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,96,8,128,0,1,fp8,fp8,0,8.750650405883789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,96,128,0,1,float16,fp8,0,5.435525258382161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,2,128,0,1,float16,float16,0,4.401610692342122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,1,128,0,1,float16,float16,0,4.3344160715738935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,1,128,0,1,fp8,fp8,0,4.098042805989583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,1,128,0,1,float16,fp8,0,4.341861406962077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,2,128,0,1,float16,fp8,0,4.425306638081868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,4,128,0,1,fp8,fp8,0,4.197877248128255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,4,128,0,1,float16,float16,0,4.586031913757324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,4,128,0,1,float16,fp8,0,4.582432111104329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,8,128,0,1,float16,float16,0,4.675093332926433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,8,128,0,1,float16,fp8,0,4.7087148030598955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,2,128,0,1,fp8,fp8,0,4.103978792826335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,96,128,0,1,float16,float16,0,2.785162607828776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,1,128,0,1,float16,fp8,0,2.177797317504883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,1,128,0,1,float16,float16,0,2.1733333269755044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,96,8,128,0,1,fp8,fp8,0,4.331226666768392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,96,128,0,1,fp8,fp8,0,2.451002597808838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,96,128,0,1,float16,fp8,0,2.6946452458699546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,1,128,0,1,fp8,fp8,0,2.0427573521931968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,2,128,0,1,float16,float16,0,2.194021383921305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,2,128,0,1,float16,fp8,0,2.185152053833008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,2,128,0,1,fp8,fp8,0,2.0535680452982583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,4,128,0,1,float16,fp8,0,2.215221405029297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,4,128,0,1,fp8,fp8,0,2.091551939646403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,8,128,0,1,float16,float16,0,2.342186609903971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,8,128,0,1,float16,fp8,0,2.325647989908854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,8,128,0,1,fp8,fp8,0,2.1810506184895835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,96,128,0,1,float16,float16,0,1.401626745859782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,96,128,0,1,float16,fp8,0,1.3389867146809895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,96,4,128,0,1,float16,float16,0,2.2127040227254233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,96,128,0,1,fp8,fp8,0,1.216048002243042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,1,128,0,1,float16,float16,0,1.0972853501637776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,1,128,0,1,float16,fp8,0,1.0954773426055908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,1,128,0,1,fp8,fp8,0,1.0209120114644368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,2,128,0,1,float16,float16,0,1.1038560072580974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,2,128,0,1,fp8,fp8,0,1.0367039839426677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,2,128,0,1,float16,fp8,0,1.1058666706085205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,4,128,0,1,float16,float16,0,1.1046773592631023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,4,128,0,1,float16,fp8,0,1.1070666313171387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,4,128,0,1,fp8,fp8,0,1.0428746541341145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,8,128,0,1,float16,float16,0,1.1301493644714355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,8,128,0,1,float16,fp8,0,1.1180213292439778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,96,8,128,0,1,fp8,fp8,0,1.0951893329620361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,96,128,0,1,fp8,fp8,0,0.6136480172475179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,1,128,0,1,float16,float16,0,0.5593546628952026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,96,128,0,1,float16,float16,0,0.7007199923197428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,1,128,0,1,float16,fp8,0,0.5618933439254761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,96,128,0,1,float16,fp8,0,0.6816106637318929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,1,128,0,1,fp8,fp8,0,0.5124053160349528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,2,128,0,1,float16,float16,0,0.5644586483637491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,2,128,0,1,float16,fp8,0,0.5624693234761556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,2,128,0,1,fp8,fp8,0,0.5251253445943197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,4,128,0,1,float16,float16,0,0.5646933317184448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,4,128,0,1,float16,fp8,0,0.5628906488418579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,4,128,0,1,fp8,fp8,0,0.5244799852371216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,8,128,0,1,float16,float16,0,0.5722346703211466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,8,128,0,1,float16,fp8,0,0.5685919920603434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,96,8,128,0,1,fp8,fp8,0,0.5554506778717041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,96,128,0,1,fp8,fp8,0,0.31411733229955036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,1,128,0,1,float16,fp8,0,0.2928853432337443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,96,128,0,1,float16,float16,0,0.36264534791310626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,96,128,0,1,float16,fp8,0,0.35262401898701984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,1,128,0,1,float16,float16,0,0.2927199999491374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,1,128,0,1,fp8,fp8,0,0.2669493357340495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,2,128,0,1,float16,float16,0,0.2923733393351237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,2,128,0,1,float16,fp8,0,0.2929439942042033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,2,128,0,1,fp8,fp8,0,0.26972800493240356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,4,128,0,1,float16,fp8,0,0.29367466767628986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,4,128,0,1,fp8,fp8,0,0.27134400606155396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,8,128,0,1,fp8,fp8,0,0.2799946665763855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,8,128,0,1,float16,float16,0,0.29897065957387287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,8,128,0,1,float16,fp8,0,0.2935306628545125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,96,4,128,0,1,float16,float16,0,0.29183467229207355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,96,128,0,1,float16,float16,0,0.1916853388150533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,96,128,0,1,float16,fp8,0,0.1888586680094401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,96,128,0,1,fp8,fp8,0,0.15987733006477356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,1,128,0,1,float16,float16,0,0.15607999761899313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,1,128,0,1,float16,fp8,0,0.1552906632423401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,1,128,0,1,fp8,fp8,0,0.1301706631978353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,2,128,0,1,float16,float16,0,0.1581013302008311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,2,128,0,1,float16,fp8,0,0.1578933298587799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,2,128,0,1,fp8,fp8,0,0.13248533010482788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,4,128,0,1,float16,float16,0,0.15781333049138388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,4,128,0,1,float16,fp8,0,0.15942399700482687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,4,128,0,1,fp8,fp8,0,0.1336373289426168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,8,128,0,1,float16,float16,0,0.1583093305428823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,8,128,0,1,float16,fp8,0,0.158351997534434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,96,8,128,0,1,fp8,fp8,0,0.1366933286190033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,96,128,0,1,float16,float16,0,0.11189333597819011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,96,128,0,1,float16,fp8,0,0.10795733332633972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,96,128,0,1,fp8,fp8,0,0.08820266524950664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,1,128,0,1,float16,float16,0,0.08444799979527791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,1,128,0,1,float16,fp8,0,0.08570667107899983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,1,128,0,1,fp8,fp8,0,0.07156266768773396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,2,128,0,1,float16,float16,0,0.08601066470146179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,2,128,0,1,fp8,fp8,0,0.07336533566315968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,2,128,0,1,float16,fp8,0,0.08542399605115254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,4,128,0,1,float16,fp8,0,0.08520533641179402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,4,128,0,1,float16,float16,0,0.08601066470146179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,4,128,0,1,fp8,fp8,0,0.07397333284219106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,8,128,0,1,float16,float16,0,0.0870293378829956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,8,128,0,1,float16,fp8,0,0.08674666285514832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,96,8,128,0,1,fp8,fp8,0,0.07519466678301494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,96,128,0,1,float16,float16,0,0.05917333563168844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,96,128,0,1,float16,fp8,0,0.05787200232346853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,96,128,0,1,fp8,fp8,0,0.05241066714127859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,1,128,0,1,float16,float16,0,0.048112000028292336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,1,128,0,1,float16,fp8,0,0.04940799872080485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,1,128,0,1,fp8,fp8,0,0.04303466777006785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,2,128,0,1,float16,float16,0,0.04894933104515076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,2,128,0,1,float16,fp8,0,0.04846400022506714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,2,128,0,1,fp8,fp8,0,0.04211199780305227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,4,128,0,1,fp8,fp8,0,0.04314666489760081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,4,128,0,1,float16,float16,0,0.049413333336512245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,8,128,0,1,float16,fp8,0,0.049440001447995506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,4,128,0,1,float16,fp8,0,0.04882133503754934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,8,128,0,1,float16,float16,0,0.04913066824277242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,96,8,128,0,1,fp8,fp8,0,0.043568000197410583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,1,128,0,1,float16,float16,0,3.7372105916341147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,1,128,0,1,float16,fp8,0,3.6753600438435874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,1,128,0,1,fp8,fp8,0,3.263279914855957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,2,128,0,1,float16,float16,0,3.774282773335775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,2,128,0,1,float16,fp8,0,3.774341265360514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,2,128,0,1,fp8,fp8,0,3.2760534286499023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,4,128,0,1,fp8,fp8,0,3.3415091832478843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,4,128,0,1,float16,float16,0,3.9372533162434897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,4,128,0,1,float16,fp8,0,3.90008544921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,8,128,0,1,float16,float16,0,4.073909441630046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,8,128,0,1,float16,fp8,0,4.067141215006511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,96,8,128,0,1,fp8,fp8,0,3.5349601109822593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,96,128,0,1,float16,float16,0,2.464885393778483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,1,128,0,1,float16,float16,0,1.8534666697184246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,96,128,0,1,float16,fp8,0,2.390442689259847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,96,128,0,1,fp8,fp8,0,2.067509333292643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,1,128,0,1,float16,fp8,0,1.8564693133036296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,1,128,0,1,fp8,fp8,0,1.631050745646159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,2,128,0,1,float16,float16,0,1.8626559575398762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,2,128,0,1,fp8,fp8,0,1.6293652852376301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,2,128,0,1,float16,fp8,0,1.8814560572306316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,4,128,0,1,float16,float16,0,1.9218026796976726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,4,128,0,1,float16,fp8,0,1.8909279505411785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,4,128,0,1,fp8,fp8,0,1.6640960375467937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,96,128,0,1,float16,float16,0,1.2334240277608235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,96,128,0,1,fp8,fp8,0,1.0365920066833496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,8,128,0,1,float16,float16,0,2.0238399505615234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,96,128,0,1,float16,fp8,0,1.1925546328226726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,8,128,0,1,float16,fp8,0,2.013882637023926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,96,8,128,0,1,fp8,fp8,0,1.762623945871989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,1,128,0,1,float16,float16,0,0.9335573514302572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,1,128,0,1,fp8,fp8,0,0.8097600142161051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,1,128,0,1,float16,fp8,0,0.9374773502349854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,2,128,0,1,float16,float16,0,0.9437546730041504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,2,128,0,1,float16,fp8,0,0.9467999935150146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,2,128,0,1,fp8,fp8,0,0.8277866840362549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,4,128,0,1,float16,float16,0,0.9481546878814697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,4,128,0,1,float16,fp8,0,0.9465760389963785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,4,128,0,1,fp8,fp8,0,0.82968537012736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,8,128,0,1,float16,float16,0,0.9896213213602701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,8,128,0,1,float16,fp8,0,0.9646986325581869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,96,128,0,1,float16,float16,0,0.618944009145101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,96,8,128,0,1,fp8,fp8,0,0.888533353805542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,96,128,0,1,float16,fp8,0,0.6013226509094238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,96,128,0,1,fp8,fp8,0,0.5169546604156494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,1,128,0,1,float16,float16,0,0.47811734676361084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,1,128,0,1,float16,fp8,0,0.475381334622701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,1,128,0,1,fp8,fp8,0,0.4079786539077759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,2,128,0,1,fp8,fp8,0,0.4127946694691976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,2,128,0,1,float16,float16,0,0.4798986514409383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,2,128,0,1,float16,fp8,0,0.48050133387247723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,4,128,0,1,float16,float16,0,0.483514666557312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,4,128,0,1,float16,fp8,0,0.4819360176722209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,4,128,0,1,fp8,fp8,0,0.42083199818929035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,8,128,0,1,float16,float16,0,0.48815464973449707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,8,128,0,1,float16,fp8,0,0.48757867018381756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,96,8,128,0,1,fp8,fp8,0,0.44704532623291016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,96,128,0,1,float16,float16,0,0.3223573366800944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,96,128,0,1,float16,fp8,0,0.3111199935277303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,96,128,0,1,fp8,fp8,0,0.2688213388125102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,1,128,0,1,float16,float16,0,0.24877333641052246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,1,128,0,1,float16,fp8,0,0.2500320076942444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,1,128,0,1,fp8,fp8,0,0.21250667174657187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,2,128,0,1,float16,float16,0,0.25101866324742633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,2,128,0,1,float16,fp8,0,0.2490666707356771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,2,128,0,1,fp8,fp8,0,0.21618133783340454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,4,128,0,1,float16,float16,0,0.25094399849573773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,4,128,0,1,float16,fp8,0,0.2513599991798401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,4,128,0,1,fp8,fp8,0,0.21770666042963663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,8,128,0,1,float16,float16,0,0.25474133094151813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,8,128,0,1,float16,fp8,0,0.2528853416442871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,96,8,128,0,1,fp8,fp8,0,0.2298133373260498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,96,128,0,1,float16,float16,0,0.17160000403722128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,96,128,0,1,float16,fp8,0,0.1665066679318746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,96,128,0,1,fp8,fp8,0,0.14502933621406555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,1,128,0,1,float16,float16,0,0.13451199730237326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,1,128,0,1,float16,fp8,0,0.13296533624331155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,1,128,0,1,fp8,fp8,0,0.11645866433779399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,2,128,0,1,float16,float16,0,0.1344053347905477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,2,128,0,1,float16,fp8,0,0.13507733742396036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,2,128,0,1,fp8,fp8,0,0.11838400363922119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,4,128,0,1,float16,float16,0,0.13471466302871704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,4,128,0,1,float16,fp8,0,0.1354986627896627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,4,128,0,1,fp8,fp8,0,0.11802666385968526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,8,128,0,1,fp8,fp8,0,0.12122666835784912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,8,128,0,1,float16,float16,0,0.13685866196950278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,96,8,128,0,1,float16,fp8,0,0.13743467132250467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,96,128,0,1,float16,float16,0,0.10147200028101604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,96,128,0,1,float16,fp8,0,0.09803733229637146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,96,128,0,1,fp8,fp8,0,0.08305599788824718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,1,128,0,1,float16,float16,0,0.07602133353551228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,1,128,0,1,float16,fp8,0,0.07645333309968312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,1,128,0,1,fp8,fp8,0,0.0645653357108434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,2,128,0,1,float16,float16,0,0.07617599765459697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,2,128,0,1,float16,fp8,0,0.07577066620190938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,2,128,0,1,fp8,fp8,0,0.06542400022347768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,4,128,0,1,float16,float16,0,0.07654933134714763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,8,128,0,1,float16,float16,0,0.0765119989713033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,4,128,0,1,float16,fp8,0,0.07644266883532207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,4,128,0,1,fp8,fp8,0,0.0661653329928716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,8,128,0,1,float16,fp8,0,0.07736533383528392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,96,8,128,0,1,fp8,fp8,0,0.06804800033569336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,96,128,0,1,float16,float16,0,0.05363733569780985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,96,128,0,1,float16,fp8,0,0.05243733525276184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,96,128,0,1,fp8,fp8,0,0.048298666874567665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,1,128,0,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,1,128,0,1,float16,float16,0,0.04282666742801666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,1,128,0,1,float16,fp8,0,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,2,128,0,1,float16,float16,0,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,2,128,0,1,float16,fp8,0,0.043007999658584595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,2,128,0,1,fp8,fp8,0,0.037946666280428566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,4,128,0,1,fp8,fp8,0,0.03817066550254822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,4,128,0,1,float16,float16,0,0.04329599936803182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,4,128,0,1,float16,fp8,0,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,8,128,0,1,float16,float16,0,0.043280000487963356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,8,128,0,1,float16,fp8,0,0.04342400034268697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,96,8,128,0,1,fp8,fp8,0,0.039146666725476585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,96,128,0,1,float16,float16,0,0.033728001018365227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,1,128,0,1,float16,float16,0,0.030794667700926464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,96,128,0,1,float16,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,96,128,0,1,fp8,fp8,0,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,1,128,0,1,float16,fp8,0,0.030847998956839245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,2,128,0,1,fp8,fp8,0,0.028394666810830433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,1,128,0,1,fp8,fp8,0,0.02829333394765854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,2,128,0,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,2,128,0,1,float16,float16,0,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,4,128,0,1,float16,float16,0,0.030847998956839245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,4,128,0,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,4,128,0,1,fp8,fp8,0,0.028399998943010967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,8,128,0,1,float16,float16,0,0.03195200115442276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,8,128,0,1,float16,fp8,0,0.03162666658560435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,96,8,128,0,1,fp8,fp8,0,0.02864533414443334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,1,128,0,1,float16,float16,0,1.6702772776285808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,1,128,0,1,float16,fp8,0,1.6661920547485352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,1,128,0,1,fp8,fp8,0,1.600661277770996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,2,128,0,1,float16,float16,0,1.694101333618164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,2,128,0,1,float16,fp8,0,1.7057226498921711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,2,128,0,1,fp8,fp8,0,1.6393225987752278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,4,128,0,1,float16,float16,0,1.7881813049316406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,4,128,0,1,float16,fp8,0,1.7253012657165527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,4,128,0,1,fp8,fp8,0,1.678010622660319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,8,128,0,1,float16,float16,0,1.826533317565918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,8,128,0,1,float16,fp8,0,1.8051466941833496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,96,8,128,0,1,fp8,fp8,0,1.7666826248168945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,96,128,0,1,float16,float16,0,1.1458186308542888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,96,128,0,1,float16,fp8,0,1.1201386451721191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,1,128,0,1,float16,float16,0,0.8416053454081217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,96,128,0,1,fp8,fp8,0,1.0376319885253906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,1,128,0,1,float16,fp8,0,0.8467893600463867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,1,128,0,1,fp8,fp8,0,0.8022507031758627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,2,128,0,1,float16,float16,0,0.8532160123189291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,2,128,0,1,float16,fp8,0,0.8534399668375651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,2,128,0,1,fp8,fp8,0,0.8247199853261312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,4,128,0,1,float16,float16,0,0.8603306611378988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,4,128,0,1,float16,fp8,0,0.8636853694915771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,4,128,0,1,fp8,fp8,0,0.8346347014109293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,8,128,0,1,float16,float16,0,0.898202657699585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,8,128,0,1,float16,fp8,0,0.8802133401234945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,96,128,0,1,float16,float16,0,0.5850613514582316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,96,128,0,1,float16,fp8,0,0.5642346541086832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,96,8,128,0,1,fp8,fp8,0,0.8826560179392496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,96,128,0,1,fp8,fp8,0,0.5153599977493286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,1,128,0,1,float16,float16,0,0.43192001183827716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,1,128,0,1,float16,fp8,0,0.4325439929962158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,1,128,0,1,fp8,fp8,0,0.4034880002339681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,2,128,0,1,float16,float16,0,0.43643732865651447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,2,128,0,1,float16,fp8,0,0.4354986747105916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,2,128,0,1,fp8,fp8,0,0.4150720040003459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,4,128,0,1,float16,float16,0,0.43938132127126056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,4,128,0,1,float16,fp8,0,0.4379093249638875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,8,128,0,1,float16,float16,0,0.4461919864018758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,4,128,0,1,fp8,fp8,0,0.4196639855702718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,96,128,0,1,float16,float16,0,0.3057760000228882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,8,128,0,1,float16,fp8,0,0.44521065553029376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,96,8,128,0,1,fp8,fp8,0,0.44540266195933026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,96,128,0,1,float16,fp8,0,0.29586132367451984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,96,128,0,1,fp8,fp8,0,0.264682670434316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,1,128,0,1,float16,float16,0,0.22778666019439697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,1,128,0,1,float16,fp8,0,0.2266026735305786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,1,128,0,1,fp8,fp8,0,0.21179733673731485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,2,128,0,1,float16,float16,0,0.22748800118764242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,2,128,0,1,float16,fp8,0,0.22817067305246988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,2,128,0,1,fp8,fp8,0,0.21491734186808267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,4,128,0,1,float16,float16,0,0.23005332549413046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,4,128,0,1,float16,fp8,0,0.23056532939275107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,4,128,0,1,fp8,fp8,0,0.21619733174641928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,8,128,0,1,float16,float16,0,0.2343733310699463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,8,128,0,1,float16,fp8,0,0.23407467206319174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,96,8,128,0,1,fp8,fp8,0,0.23005332549413046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,96,128,0,1,float16,float16,0,0.1660426656405131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,96,128,0,1,float16,fp8,0,0.16220800081888834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,96,128,0,1,fp8,fp8,0,0.14129599928855896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,1,128,0,1,float16,float16,0,0.1227946678797404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,1,128,0,1,float16,fp8,0,0.12329600254694621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,1,128,0,1,fp8,fp8,0,0.1151093343893687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,2,128,0,1,float16,float16,0,0.12342933813730876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,2,128,0,1,float16,fp8,0,0.1250986655553182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,2,128,0,1,fp8,fp8,0,0.11583466331164043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,4,128,0,1,float16,float16,0,0.12545599540074667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,4,128,0,1,float16,fp8,0,0.12562132875124613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,4,128,0,1,fp8,fp8,0,0.11762133240699768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,8,128,0,1,float16,float16,0,0.1269973317782084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,8,128,0,1,float16,fp8,0,0.12782933314641318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,96,128,0,1,float16,fp8,0,0.09474133451779683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,96,8,128,0,1,fp8,fp8,0,0.11980799833933513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,96,128,0,1,float16,float16,0,0.0953493316968282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,96,128,0,1,fp8,fp8,0,0.08053866525491078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,1,128,0,1,float16,float16,0,0.07106133302052815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,1,128,0,1,float16,fp8,0,0.07133866846561432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,1,128,0,1,fp8,fp8,0,0.06406933565934499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,2,128,0,1,float16,float16,0,0.07124799986680348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,2,128,0,1,float16,fp8,0,0.07125333448251088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,2,128,0,1,fp8,fp8,0,0.06427200138568878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,4,128,0,1,float16,float16,0,0.07231999933719635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,4,128,0,1,float16,fp8,0,0.07157333195209503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,4,128,0,1,fp8,fp8,0,0.06527466575304668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,8,128,0,1,float16,float16,0,0.07247466842333476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,8,128,0,1,float16,fp8,0,0.07214933137098949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,96,8,128,0,1,fp8,fp8,0,0.06705600023269653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,96,128,0,1,float16,float16,0,0.05294933418432871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,96,128,0,1,float16,fp8,0,0.05091733237107595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,96,128,0,1,fp8,fp8,0,0.04800533254941305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,1,128,0,1,float16,float16,0,0.04064533362785975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,1,128,0,1,float16,fp8,0,0.04205333193143209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,1,128,0,1,fp8,fp8,0,0.03710933278004328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,2,128,0,1,float16,float16,0,0.04141866664091746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,2,128,0,1,float16,fp8,0,0.041050667564074196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,2,128,0,1,fp8,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,4,128,0,1,float16,float16,0,0.04120533416668574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,4,128,0,1,float16,fp8,0,0.04194133480389913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,4,128,0,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,8,128,0,1,float16,float16,0,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,8,128,0,1,float16,fp8,0,0.041802664597829185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,96,8,128,0,1,fp8,fp8,0,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,96,128,0,1,float16,float16,0,0.03252266595760981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,96,128,0,1,float16,fp8,0,0.03313600023587545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,96,128,0,1,fp8,fp8,0,0.03223466624816259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,1,128,0,1,float16,float16,0,0.03071466585000356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,1,128,0,1,float16,fp8,0,0.03073066721359889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,1,128,0,1,fp8,fp8,0,0.028170667588710785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,2,128,0,1,float16,float16,0,0.030671998858451843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,2,128,0,1,float16,fp8,0,0.03084266682465871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,2,128,0,1,fp8,fp8,0,0.02787200113137563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,4,128,0,1,float16,float16,0,0.030613332986831665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,4,128,0,1,float16,fp8,0,0.030997333427270252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,4,128,0,1,fp8,fp8,0,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,8,128,0,1,float16,float16,0,0.03107200066248576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,8,128,0,1,float16,fp8,0,0.031066666046778362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,96,8,128,0,1,fp8,fp8,0,0.028431999186674755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,96,128,0,1,float16,float16,0,0.022384000321229298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,96,128,0,1,float16,fp8,0,0.022469334304332733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,96,128,0,1,fp8,fp8,0,0.022437334060668945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,1,128,0,1,float16,float16,0,0.02045866722861926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,1,128,0,1,float16,fp8,0,0.020527999848127365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,1,128,0,1,fp8,fp8,0,0.020400000115235645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,2,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,2,128,0,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,2,128,0,1,fp8,fp8,0,0.020303999384244282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,4,128,0,1,float16,float16,0,0.02060266708334287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,4,128,0,1,float16,fp8,0,0.02065066620707512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,4,128,0,1,fp8,fp8,0,0.020784000555674236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,8,128,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,8,128,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,96,8,128,0,1,fp8,fp8,0,0.02045866722861926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,1,128,0,1,float16,float16,0,0.9981280167897543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,1,128,0,1,float16,fp8,0,1.0004053115844727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,1,128,0,1,fp8,fp8,0,1.1340693632761638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,2,128,0,1,float16,float16,0,1.0075039863586426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,2,128,0,1,float16,fp8,0,1.0079786777496338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,2,128,0,1,fp8,fp8,0,1.1531413396199544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,4,128,0,1,float16,float16,0,1.0066400369008381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,4,128,0,1,float16,fp8,0,1.0183520317077637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,4,128,0,1,fp8,fp8,0,1.1660106976826985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,8,128,0,1,float16,fp8,0,1.031882683436076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,8,128,0,1,float16,float16,0,1.0635680357615154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,96,128,0,1,float16,float16,0,0.657045324643453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,96,128,0,1,float16,fp8,0,0.638703982035319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,96,8,128,0,1,fp8,fp8,0,1.2274239857991536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,96,128,0,1,fp8,fp8,0,0.6799360116322836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,1,128,0,1,float16,float16,0,0.5073599815368652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,1,128,0,1,float16,fp8,0,0.5069013436635336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,1,128,0,1,fp8,fp8,0,0.5740586519241333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,2,128,0,1,float16,float16,0,0.512234648068746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,2,128,0,1,float16,fp8,0,0.5118666489919027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,2,128,0,1,fp8,fp8,0,0.5773119926452637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,4,128,0,1,float16,float16,0,0.5101226568222046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,4,128,0,1,float16,fp8,0,0.5160373449325562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,4,128,0,1,fp8,fp8,0,0.5855626662572225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,8,128,0,1,float16,float16,0,0.5233759880065918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,8,128,0,1,float16,fp8,0,0.5189013481140137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,96,128,0,1,float16,float16,0,0.3397333224614461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,96,8,128,0,1,fp8,fp8,0,0.6151946783065796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,96,128,0,1,float16,fp8,0,0.331712007522583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,96,128,0,1,fp8,fp8,0,0.3484586477279663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,1,128,0,1,float16,float16,0,0.2617280085881551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,1,128,0,1,fp8,fp8,0,0.2948639988899231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,1,128,0,1,float16,fp8,0,0.2619253396987915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,2,128,0,1,float16,float16,0,0.2630079984664917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,2,128,0,1,float16,fp8,0,0.26410667101542157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,2,128,0,1,fp8,fp8,0,0.29818133513132733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,4,128,0,1,float16,float16,0,0.26393600304921466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,4,128,0,1,float16,fp8,0,0.266154666741689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,4,128,0,1,fp8,fp8,0,0.2993226647377014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,8,128,0,1,float16,float16,0,0.27036799987157184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,8,128,0,1,float16,fp8,0,0.2693866689999898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,96,8,128,0,1,fp8,fp8,0,0.3093493382136027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,96,128,0,1,float16,float16,0,0.1802133321762085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,96,128,0,1,float16,fp8,0,0.17617066701253256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,96,128,0,1,fp8,fp8,0,0.18409599860509238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,1,128,0,1,float16,float16,0,0.13778666655222574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,1,128,0,1,float16,fp8,0,0.13806399703025818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,1,128,0,1,fp8,fp8,0,0.1591200033823649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,2,128,0,1,float16,float16,0,0.13961600263913473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,2,128,0,1,float16,fp8,0,0.1394773324330648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,2,128,0,1,fp8,fp8,0,0.15825600425402322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,4,128,0,1,float16,float16,0,0.14004799723625183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,4,128,0,1,float16,fp8,0,0.14138133327166238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,4,128,0,1,fp8,fp8,0,0.15973866979281107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,8,128,0,1,float16,float16,0,0.1415786643822988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,8,128,0,1,float16,fp8,0,0.1418346663316091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,96,8,128,0,1,fp8,fp8,0,0.16225600242614746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,96,128,0,1,float16,float16,0,0.09733333190282185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,96,128,0,1,float16,fp8,0,0.09733333190282185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,96,128,0,1,fp8,fp8,0,0.10077333450317383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,1,128,0,1,float16,float16,0,0.07604800164699554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,2,128,0,1,float16,fp8,0,0.07737066845099132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,1,128,0,1,float16,fp8,0,0.0764160007238388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,1,128,0,1,fp8,fp8,0,0.08616532882054646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,2,128,0,1,float16,float16,0,0.07693333427111308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,2,128,0,1,fp8,fp8,0,0.08647466699282329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,4,128,0,1,float16,float16,0,0.07710933188597362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,4,128,0,1,float16,fp8,0,0.07745600243409474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,4,128,0,1,fp8,fp8,0,0.08747200171152751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,8,128,0,1,float16,float16,0,0.07795733213424683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,8,128,0,1,float16,fp8,0,0.07881600161393483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,96,8,128,0,1,fp8,fp8,0,0.08963732918103536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,96,128,0,1,float16,float16,0,0.05463466544946035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,96,128,0,1,float16,fp8,0,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,96,128,0,1,fp8,fp8,0,0.05893866717815399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,1,128,0,1,float16,float16,0,0.04433600107828776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,1,128,0,1,float16,fp8,0,0.04420266548792521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,1,128,0,1,fp8,fp8,0,0.04879466692606608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,2,128,0,1,float16,float16,0,0.043765331308046974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,2,128,0,1,float16,fp8,0,0.044394666949907936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,2,128,0,1,fp8,fp8,0,0.048783997694651283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,4,128,0,1,float16,float16,0,0.04436799883842468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,4,128,0,1,float16,fp8,0,0.0450133333603541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,4,128,0,1,fp8,fp8,0,0.04916266600290934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,8,128,0,1,float16,float16,0,0.04427733520666758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,8,128,0,1,float16,fp8,0,0.045370668172836304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,96,8,128,0,1,fp8,fp8,0,0.04975466430187225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,96,128,0,1,float16,float16,0,0.03283733377854029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,96,128,0,1,float16,fp8,0,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,96,128,0,1,fp8,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,1,128,0,1,float16,float16,0,0.029605334003766377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,1,128,0,1,float16,fp8,0,0.030000001192092896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,1,128,0,1,fp8,fp8,0,0.03192000091075897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,2,128,0,1,float16,float16,0,0.029648000995318096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,2,128,0,1,float16,fp8,0,0.030266667405764263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,2,128,0,1,fp8,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,4,128,0,1,float16,float16,0,0.029850666721661884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,4,128,0,1,float16,fp8,0,0.030207999050617218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,4,128,0,1,fp8,fp8,0,0.031898667414983116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,8,128,0,1,float16,float16,0,0.029701332251230877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,8,128,0,1,float16,fp8,0,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,96,8,128,0,1,fp8,fp8,0,0.032485333581765495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,96,128,0,1,float16,float16,0,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,96,128,0,1,float16,fp8,0,0.024570666253566742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,96,128,0,1,fp8,fp8,0,0.02641066660483678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,1,128,0,1,float16,float16,0,0.022282667458057404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,1,128,0,1,float16,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,1,128,0,1,fp8,fp8,0,0.023914667467276256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,2,128,0,1,float16,float16,0,0.022469334304332733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,2,128,0,1,float16,fp8,0,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,2,128,0,1,fp8,fp8,0,0.02420799930890401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,4,128,0,1,float16,float16,0,0.02229333420594533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,4,128,0,1,float16,fp8,0,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,4,128,0,1,fp8,fp8,0,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,8,128,0,1,float16,float16,0,0.022304000953833263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,8,128,0,1,float16,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,96,8,128,0,1,fp8,fp8,0,0.02437866727511088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,96,128,0,1,float16,float16,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,96,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,96,128,0,1,fp8,fp8,0,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,1,128,0,1,float16,float16,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,1,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,1,128,0,1,fp8,fp8,0,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,2,128,0,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,4,128,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,2,128,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,2,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,4,128,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,4,128,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,8,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,8,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,96,8,128,0,1,fp8,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,1,128,0,1,float16,float16,0,0.694976011912028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,1,128,0,1,float16,fp8,0,0.6994506518046061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,1,128,0,1,fp8,fp8,0,0.9017333189646403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,2,128,0,1,float16,float16,0,0.699018637339274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,2,128,0,1,float16,fp8,0,0.7055359681447347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,2,128,0,1,fp8,fp8,0,0.9092746575673422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,4,128,0,1,float16,float16,0,0.7038559913635254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,4,128,0,1,float16,fp8,0,0.699023962020874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,4,128,0,1,fp8,fp8,0,0.9081546465555826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,8,128,0,1,float16,float16,0,0.7129813035329183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,8,128,0,1,float16,fp8,0,0.7107626597086588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,96,128,0,1,float16,float16,0,0.43930665651957196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,96,128,0,1,float16,fp8,0,0.4283466736475627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,96,8,128,0,1,fp8,fp8,0,0.9523893197377523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,96,128,0,1,fp8,fp8,0,0.5119626522064209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,1,128,0,1,float16,float16,0,0.3559733231862386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,1,128,0,1,float16,fp8,0,0.3542506694793701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,1,128,0,1,fp8,fp8,0,0.45848000049591064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,2,128,0,1,float16,float16,0,0.35651199022928876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,2,128,0,1,float16,fp8,0,0.3617813189824422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,2,128,0,1,fp8,fp8,0,0.46485332647959393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,4,128,0,1,fp8,fp8,0,0.4642239809036255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,4,128,0,1,float16,float16,0,0.3595786492029826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,4,128,0,1,float16,fp8,0,0.35920532544453937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,8,128,0,1,float16,float16,0,0.36157333850860596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,8,128,0,1,float16,fp8,0,0.3654133478800456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,96,128,0,1,float16,float16,0,0.22895467281341553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,96,8,128,0,1,fp8,fp8,0,0.478490670522054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,96,128,0,1,float16,fp8,0,0.2249600092569987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,1,128,0,1,float16,fp8,0,0.18465065956115723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,96,128,0,1,fp8,fp8,0,0.2645973364512126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,1,128,0,1,float16,float16,0,0.18443200985590616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,1,128,0,1,fp8,fp8,0,0.23906133572260538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,2,128,0,1,float16,float16,0,0.18711467583974203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,2,128,0,1,float16,fp8,0,0.1864853302637736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,2,128,0,1,fp8,fp8,0,0.24333866437276205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,4,128,0,1,float16,float16,0,0.1872373421986898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,4,128,0,1,float16,fp8,0,0.18846933046976724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,4,128,0,1,fp8,fp8,0,0.24078933397928873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,8,128,0,1,float16,fp8,0,0.18954666455586752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,8,128,0,1,fp8,fp8,0,0.2460106611251831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,96,8,128,0,1,float16,float16,0,0.1885333259900411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,96,128,0,1,float16,float16,0,0.12050132950146993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,96,128,0,1,float16,fp8,0,0.11845333377520244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,96,128,0,1,fp8,fp8,0,0.14149333039919534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,1,128,0,1,float16,fp8,0,0.10065066814422607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,1,128,0,1,float16,float16,0,0.10015466809272766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,1,128,0,1,fp8,fp8,0,0.128330667813619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,2,128,0,1,float16,float16,0,0.09934399525324504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,2,128,0,1,float16,fp8,0,0.09942932923634847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,2,128,0,1,fp8,fp8,0,0.1283253331979116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,4,128,0,1,float16,float16,0,0.1020906666914622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,4,128,0,1,float16,fp8,0,0.10079466303189595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,4,128,0,1,fp8,fp8,0,0.12967999776204428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,8,128,0,1,float16,float16,0,0.10090133547782898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,8,128,0,1,float16,fp8,0,0.1016319990158081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,96,8,128,0,1,fp8,fp8,0,0.13115732868512472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,96,128,0,1,float16,float16,0,0.06796800096829732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,96,128,0,1,float16,fp8,0,0.06576000154018402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,1,128,0,1,fp8,fp8,0,0.0699786643187205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,2,128,0,1,float16,float16,0,0.055888002117474876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,96,128,0,1,fp8,fp8,0,0.07989866534868877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,1,128,0,1,float16,float16,0,0.055359999338785805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,1,128,0,1,float16,fp8,0,0.05517866710821787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,2,128,0,1,float16,fp8,0,0.055813332398732506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,2,128,0,1,fp8,fp8,0,0.06999999781449635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,4,128,0,1,float16,float16,0,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,4,128,0,1,float16,fp8,0,0.05551999807357788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,4,128,0,1,fp8,fp8,0,0.07064533233642578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,8,128,0,1,float16,float16,0,0.055455997586250305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,8,128,0,1,float16,fp8,0,0.05578133463859558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,96,128,0,1,float16,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,96,128,0,1,float16,float16,0,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,96,128,0,1,fp8,fp8,0,0.04772266745567322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,96,8,128,0,1,fp8,fp8,0,0.07149866720040639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,1,128,0,1,float16,float16,0,0.03506666670242945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,1,128,0,1,float16,fp8,0,0.034901333351929985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,1,128,0,1,fp8,fp8,0,0.04292800029118856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,2,128,0,1,float16,float16,0,0.03473600000143051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,2,128,0,1,float16,fp8,0,0.03484266748030981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,2,128,0,1,fp8,fp8,0,0.04249600072701772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,4,128,0,1,float16,float16,0,0.03478399912516276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,4,128,0,1,float16,fp8,0,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,4,128,0,1,fp8,fp8,0,0.04345599810282389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,8,128,0,1,float16,float16,0,0.0344106654326121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,8,128,0,1,float16,fp8,0,0.034714666505654655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,96,8,128,0,1,fp8,fp8,0,0.04363733530044556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,96,128,0,1,float16,float16,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,96,128,0,1,float16,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,96,128,0,1,fp8,fp8,0,0.030074665943781536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,1,128,0,1,float16,float16,0,0.02367999901374181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,1,128,0,1,float16,fp8,0,0.02369066576162974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,1,128,0,1,fp8,fp8,0,0.028005334238211315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,2,128,0,1,float16,float16,0,0.024112001061439514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,2,128,0,1,float16,fp8,0,0.024133334557215374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,2,128,0,1,fp8,fp8,0,0.028213332096735638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,4,128,0,1,float16,float16,0,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,4,128,0,1,float16,fp8,0,0.024117333193620045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,4,128,0,1,fp8,fp8,0,0.02880000074704488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,8,128,0,1,float16,float16,0,0.024319998919963837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,8,128,0,1,float16,fp8,0,0.024325333535671234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,96,8,128,0,1,fp8,fp8,0,0.02779199928045273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,96,128,0,1,float16,float16,0,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,96,128,0,1,float16,fp8,0,0.01970133309563001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,96,128,0,1,fp8,fp8,0,0.022570667167504627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,1,128,0,1,float16,float16,0,0.018378666291634243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,1,128,0,1,float16,fp8,0,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,1,128,0,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,2,128,0,1,float16,float16,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,2,128,0,1,float16,fp8,0,0.019941333681344986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,2,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,4,128,0,1,float16,float16,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,4,128,0,1,float16,fp8,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,4,128,0,1,fp8,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,8,128,0,1,float16,float16,0,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,8,128,0,1,float16,fp8,0,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,96,8,128,0,1,fp8,fp8,0,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,96,128,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,96,128,0,1,float16,fp8,0,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,96,128,0,1,fp8,fp8,0,0.018122666825850803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,1,128,0,1,float16,float16,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,1,128,0,1,float16,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,1,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,2,128,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,2,128,0,1,float16,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,2,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,4,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,4,128,0,1,float16,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,4,128,0,1,fp8,fp8,0,0.017594666530688603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,8,128,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,8,128,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,96,8,128,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,1,128,0,1,float16,float16,0,0.5701653162638346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,1,128,0,1,float16,fp8,0,0.5744053522745768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,2,128,0,1,float16,float16,0,0.5713333288828532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,1,128,0,1,fp8,fp8,0,0.8012586434682211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,2,128,0,1,float16,fp8,0,0.5732746521631876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,2,128,0,1,fp8,fp8,0,0.806266705195109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,4,128,0,1,float16,float16,0,0.5740906794865926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,4,128,0,1,float16,fp8,0,0.5791306495666504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,4,128,0,1,fp8,fp8,0,0.7968693574269613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,8,128,0,1,float16,float16,0,0.5837386846542358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,8,128,0,1,float16,fp8,0,0.5767253239949545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,96,128,0,1,float16,float16,0,0.34809064865112305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,96,128,0,1,float16,fp8,0,0.34751466910044354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,96,128,0,1,fp8,fp8,0,0.43086934089660645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,96,8,128,0,1,fp8,fp8,0,0.8205173015594482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,1,128,0,1,float16,float16,0,0.2908426721890767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,1,128,0,1,float16,fp8,0,0.2930453419685364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,1,128,0,1,fp8,fp8,0,0.4071679910024007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,2,128,0,1,float16,float16,0,0.2983520030975342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,2,128,0,1,float16,fp8,0,0.29605333010355633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,2,128,0,1,fp8,fp8,0,0.41068800290425617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,4,128,0,1,float16,float16,0,0.29309866825739544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,4,128,0,1,float16,fp8,0,0.29677865902582806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,4,128,0,1,fp8,fp8,0,0.407477339108785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,8,128,0,1,float16,float16,0,0.30018667380015057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,8,128,0,1,float16,fp8,0,0.2969920039176941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,96,128,0,1,float16,float16,0,0.17637866735458374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,96,8,128,0,1,fp8,fp8,0,0.41475733121236164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,96,128,0,1,float16,fp8,0,0.17440533638000488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,96,128,0,1,fp8,fp8,0,0.22427733739217123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,1,128,0,1,float16,float16,0,0.1521013379096985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,1,128,0,1,float16,fp8,0,0.15203199783960977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,1,128,0,1,fp8,fp8,0,0.2119093338648478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,2,128,0,1,float16,float16,0,0.15310399731000265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,2,128,0,1,float16,fp8,0,0.15235199530919394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,2,128,0,1,fp8,fp8,0,0.21287999550501505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,4,128,0,1,float16,float16,0,0.1530933380126953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,4,128,0,1,float16,fp8,0,0.15276267131169638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,4,128,0,1,fp8,fp8,0,0.2118826707204183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,8,128,0,1,float16,float16,0,0.15405333042144775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,8,128,0,1,float16,fp8,0,0.1543359955151876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,96,8,128,0,1,fp8,fp8,0,0.21520533164342245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,96,128,0,1,float16,float16,0,0.09501333038012187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,96,128,0,1,float16,fp8,0,0.09175466497739156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,96,128,0,1,fp8,fp8,0,0.12186666329701741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,1,128,0,1,float16,float16,0,0.08242133259773254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,2,128,0,1,float16,fp8,0,0.08182933429876964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,1,128,0,1,float16,fp8,0,0.08154666423797607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,1,128,0,1,fp8,fp8,0,0.1133013367652893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,2,128,0,1,float16,float16,0,0.08232533435026805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,2,128,0,1,fp8,fp8,0,0.11229866743087769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,4,128,0,1,float16,float16,0,0.08220799763997395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,4,128,0,1,float16,fp8,0,0.08158400158087413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,4,128,0,1,fp8,fp8,0,0.11285866300264995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,8,128,0,1,float16,float16,0,0.08257600168387096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,8,128,0,1,float16,fp8,0,0.08266133566697438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,96,8,128,0,1,fp8,fp8,0,0.11539733409881592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,96,128,0,1,float16,float16,0,0.05093333125114441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,96,128,0,1,float16,fp8,0,0.05040533343950907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,96,128,0,1,fp8,fp8,0,0.06851199766000111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,1,128,0,1,fp8,fp8,0,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,1,128,0,1,float16,float16,0,0.04877333343029022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,1,128,0,1,float16,fp8,0,0.048698668678601585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,2,128,0,1,float16,float16,0,0.048250665267308555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,2,128,0,1,float16,fp8,0,0.048357332746187844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,2,128,0,1,fp8,fp8,0,0.06445866823196411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,4,128,0,1,float16,float16,0,0.049029335379600525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,4,128,0,1,float16,fp8,0,0.04863999783992767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,4,128,0,1,fp8,fp8,0,0.0644053320089976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,8,128,0,1,float16,float16,0,0.048469334840774536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,8,128,0,1,float16,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,96,128,0,1,float16,float16,0,0.03215466688076655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,96,8,128,0,1,fp8,fp8,0,0.06461866696675618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,96,128,0,1,float16,fp8,0,0.03200000027815501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,96,128,0,1,fp8,fp8,0,0.041349334021409355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,1,128,0,1,float16,float16,0,0.03075733284155528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,1,128,0,1,float16,fp8,0,0.031040000418821972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,1,128,0,1,fp8,fp8,0,0.03952533255020777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,2,128,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,2,128,0,1,float16,fp8,0,0.03178666780392329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,2,128,0,1,fp8,fp8,0,0.040133332212766014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,4,128,0,1,float16,float16,0,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,4,128,0,1,float16,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,4,128,0,1,fp8,fp8,0,0.038890667259693146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,8,128,0,1,float16,float16,0,0.030943999687830608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,8,128,0,1,float16,fp8,0,0.03194133440653483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,96,8,128,0,1,fp8,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,96,128,0,1,float16,float16,0,0.02272533377011617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,96,128,0,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,96,128,0,1,fp8,fp8,0,0.026789332429567974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,1,128,0,1,float16,float16,0,0.022282667458057404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,1,128,0,1,float16,fp8,0,0.02184533327817917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,1,128,0,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,2,128,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,2,128,0,1,float16,fp8,0,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,2,128,0,1,fp8,fp8,0,0.025685332715511322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,4,128,0,1,float16,float16,0,0.02214933435122172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,8,128,0,1,fp8,fp8,0,0.02587733417749405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,4,128,0,1,float16,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,4,128,0,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,8,128,0,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,96,8,128,0,1,float16,fp8,0,0.022272000710169475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,96,128,0,1,float16,float16,0,0.018453333526849747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,96,128,0,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,96,128,0,1,fp8,fp8,0,0.02170666555563609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,1,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,1,128,0,1,float16,fp8,0,0.017984000345071156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,1,128,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,2,128,0,1,float16,float16,0,0.01783466711640358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,2,128,0,1,float16,fp8,0,0.018122666825850803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,2,128,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,4,128,0,1,float16,float16,0,0.018031999468803406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,4,128,0,1,float16,fp8,0,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,4,128,0,1,fp8,fp8,0,0.021583999196688335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,8,128,0,1,float16,float16,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,8,128,0,1,float16,fp8,0,0.017765333255132038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,96,8,128,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,96,128,0,1,float16,float16,0,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,96,128,0,1,float16,fp8,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,96,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,1,128,0,1,float16,float16,0,0.014101333916187286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,1,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,1,128,0,1,fp8,fp8,0,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,2,128,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,2,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,2,128,0,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,4,128,0,1,float16,float16,0,0.014426667243242264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,4,128,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,4,128,0,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,8,128,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,8,128,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,96,8,128,0,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,128,0,1,fp8,fp8,0,45.69640604654948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,128,0,1,fp8,fp8,0,45.68038431803385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,float16,0,66.41831970214844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,float16,0,66.17681376139323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,fp8,0,67.32813517252605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,fp8,0,67.24923197428386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,128,0,1,fp8,fp8,0,44.80531311035156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,float16,0,35.141273498535156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,fp8,0,68.68415832519531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,128,0,1,fp8,fp8,0,46.24352010091146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,float16,0,67.87549845377605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,fp8,0,35.02465565999349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,64,128,0,1,fp8,fp8,0,23.211334228515625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,float16,0,66.99485778808594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,fp8,0,66.40407816569011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,128,0,1,fp8,fp8,0,22.457669576009113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,float16,0,34.809766133626304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,fp8,0,35.026512145996094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,128,0,1,fp8,fp8,0,22.60533396402995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,float16,0,34.26465098063151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,fp8,0,34.48388926188151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,128,0,1,fp8,fp8,0,22.608245849609375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,float16,0,34.14617156982422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,float16,0,17.662804921468098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,fp8,0,34.776896158854164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,128,0,1,fp8,fp8,0,22.124954223632812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,fp8,0,17.61029815673828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,64,128,0,1,fp8,fp8,0,11.853995005289713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,float16,0,34.273546854654946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,fp8,0,34.99837748209635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,128,0,1,fp8,fp8,0,12.456923166910807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,float16,0,17.667322794596355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,fp8,0,16.860479990641277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,128,0,1,fp8,fp8,0,11.39731216430664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,fp8,0,17.19755681355794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,128,0,1,fp8,fp8,0,11.227301279703775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,float16,0,17.039525349934895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,float16,0,16.730010986328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,fp8,0,16.972816467285156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,128,0,1,fp8,fp8,0,12.252267201741537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,float16,0,17.84547170003255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,float16,0,9.006778717041016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,128,0,1,fp8,fp8,0,6.063205083211263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,fp8,0,18.183226267496746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,fp8,0,9.133258819580078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,float16,0,8.579290390014648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,128,0,1,fp8,fp8,0,5.865530649820964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,128,0,1,fp8,fp8,0,5.880832036336263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,fp8,0,8.798181533813477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,float16,0,8.671493530273438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,fp8,0,8.92793083190918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,float16,0,8.598106384277344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,fp8,0,8.64079475402832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,4,128,0,1,fp8,fp8,0,5.833898544311523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,128,0,1,fp8,fp8,0,6.245424270629883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,float16,0,8.676485061645508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,fp8,0,8.700799942016602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,128,0,1,fp8,fp8,0,27.046986897786457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,128,0,1,fp8,fp8,0,26.325899759928387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,float16,0,39.073140462239586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,fp8,0,39.606127421061196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,float16,0,39.37728627522787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,fp8,0,39.399515787760414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,float16,0,39.62555185953776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,fp8,0,39.393689473470054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,4,128,0,1,fp8,fp8,0,25.652249654134113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,128,0,1,fp8,fp8,0,14.055739084879557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,float16,0,20.20919418334961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,fp8,0,19.928260803222656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,128,0,1,fp8,fp8,0,26.605926513671875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,float16,0,19.679808298746746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,float16,0,39.93738555908203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,128,0,1,fp8,fp8,0,13.408245086669922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,fp8,0,19.703941345214844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,fp8,0,39.45952606201172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,128,0,1,fp8,fp8,0,13.138277689615885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,float16,0,19.397076924641926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,fp8,0,19.326148986816406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,float16,0,19.673924763997395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,128,0,1,fp8,fp8,0,13.184847513834635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,fp8,0,19.682479858398438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,float16,0,10.016410827636719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,float16,0,19.45968500773112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,fp8,0,9.747999827067057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,128,0,1,fp8,fp8,0,13.090442657470703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,64,128,0,1,fp8,fp8,0,6.916869481404622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,float16,0,10.050399780273438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,fp8,0,19.735076904296875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,128,0,1,fp8,fp8,0,6.658816019694011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,128,0,1,fp8,fp8,0,6.880853017171224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,fp8,0,10.074357350667318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,float16,0,9.79757308959961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,fp8,0,9.999312082926432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,float16,0,10.082938512166342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,128,0,1,fp8,fp8,0,6.761002858479817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,fp8,0,10.234805425008139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,128,0,1,fp8,fp8,0,3.576533317565918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,128,0,1,fp8,fp8,0,6.811637242635091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,float16,0,5.172687848409017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,float16,0,9.730794906616211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,fp8,0,5.3536427815755205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,float16,0,5.180794715881348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,fp8,0,10.069093068440756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,128,0,1,fp8,fp8,0,3.445082664489746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,fp8,0,5.124213218688965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,128,0,1,fp8,fp8,0,3.3102614084879556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,float16,0,4.945306777954102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,fp8,0,4.933845202128093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,128,0,1,fp8,fp8,0,3.403183937072754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,float16,0,4.984821319580078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,fp8,0,5.077210744222005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,128,0,1,fp8,fp8,0,3.407221476236979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,float16,0,5.094079971313477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,fp8,0,5.090970675150554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,128,0,1,fp8,fp8,0,18.772069295247395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,128,0,1,fp8,fp8,0,18.320091247558594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,float16,0,28.25062306722005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,fp8,0,28.188618977864582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,float16,0,27.57701365152995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,fp8,0,28.0797602335612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,float16,0,28.206507364908855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,fp8,0,28.37517801920573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,4,128,0,1,fp8,fp8,0,18.972373962402344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,128,0,1,fp8,fp8,0,9.749381383260092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,float16,0,14.104379018147787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,fp8,0,14.145418802897135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,float16,0,13.765499114990234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,128,0,1,fp8,fp8,0,18.39841079711914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,128,0,1,fp8,fp8,0,9.321327845255533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,fp8,0,13.74554697672526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,fp8,0,27.525680541992188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,float16,0,27.86516825358073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,128,0,1,fp8,fp8,0,9.491162618001303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,float16,0,14.037615458170572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,fp8,0,13.6527468363444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,float16,0,14.042917887369791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,128,0,1,fp8,fp8,0,9.187871932983398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,fp8,0,14.481204986572266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,float16,0,7.460400263468425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,128,0,1,fp8,fp8,0,9.937472025553385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,fp8,0,7.486805597941081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,64,128,0,1,fp8,fp8,0,4.901610692342122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,float16,0,13.8810183207194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,fp8,0,14.13205337524414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,128,0,1,fp8,fp8,0,4.691439946492513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,float16,0,7.073850631713867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,128,0,1,fp8,fp8,0,4.619215965270996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,fp8,0,7.035439809163411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,float16,0,7.0606028238932295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,fp8,0,6.927093505859375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,128,0,1,fp8,fp8,0,4.726693471272786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,float16,0,6.854586919148763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,fp8,0,7.047322591145833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,float16,0,3.6164639790852866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,fp8,0,3.6847254435221353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,128,0,1,fp8,fp8,0,4.795237223307292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,float16,0,6.91917355855306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,fp8,0,6.970389048258464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,64,128,0,1,fp8,fp8,0,2.5128639539082847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,128,0,1,fp8,fp8,0,2.472186724344889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,float16,0,3.488389333089193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,fp8,0,3.6199893951416016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,128,0,1,fp8,fp8,0,2.435317357381185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,float16,0,3.5713974634806314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,fp8,0,3.66156800587972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,float16,0,3.4294665654500327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,fp8,0,3.6155573527018228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,4,128,0,1,fp8,fp8,0,2.5048160552978516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,128,0,1,fp8,fp8,0,2.479856014251709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,float16,0,3.5792694091796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,fp8,0,3.6190614700317383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,128,0,1,fp8,fp8,0,24.70824432373047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,128,0,1,fp8,fp8,0,24.9180425008138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,float16,0,37.17201487223307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,fp8,0,36.70443216959635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,float16,0,37.252479553222656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,fp8,0,37.38412221272787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,float16,0,36.18864440917969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,fp8,0,36.859937032063804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,4,128,0,1,fp8,fp8,0,24.61414337158203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,128,0,1,fp8,fp8,0,12.87563705444336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,float16,0,18.728682200113933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,fp8,0,18.771546681722004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,float16,0,18.377936045328777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,128,0,1,fp8,fp8,0,24.900985717773438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,float16,0,36.49398295084635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,128,0,1,fp8,fp8,0,12.462239583333334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,fp8,0,18.09765370686849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,128,0,1,fp8,fp8,0,12.707290649414062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,fp8,0,37.22482045491537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,float16,0,18.292282104492188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,fp8,0,18.61907704671224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,128,0,1,fp8,fp8,0,12.38092295328776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,float16,0,17.95685323079427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,fp8,0,17.806325276692707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,128,0,1,fp8,fp8,0,6.5794932047526045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,float16,0,9.39467748006185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,128,0,1,fp8,fp8,0,12.291520436604818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,fp8,0,9.50333340962728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,float16,0,18.513594309488933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,float16,0,8.999872207641602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,fp8,0,18.607194264729817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,fp8,0,8.851989110310873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,1,128,0,1,fp8,fp8,0,6.138965606689453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,128,0,1,fp8,fp8,0,6.174928029378255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,float16,0,8.990383783976236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,fp8,0,9.036378860473633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,float16,0,9.143914540608725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,128,0,1,fp8,fp8,0,6.355434417724609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,fp8,0,9.072261174519857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,128,0,1,fp8,fp8,0,6.315173467000325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,float16,0,4.819813410441081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,fp8,0,4.849295934041341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,64,128,0,1,fp8,fp8,0,3.3057387669881186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,float16,0,9.175760269165039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,fp8,0,9.214309056599935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,float16,0,4.647365252176921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,128,0,1,fp8,fp8,0,3.1812852223714194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,fp8,0,4.727711995442708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,128,0,1,fp8,fp8,0,3.114543914794922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,float16,0,4.802165349324544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,fp8,0,4.914415995279948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,float16,0,4.596357345581055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,128,0,1,fp8,fp8,0,3.222954750061035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,fp8,0,4.6832319895426435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,float16,0,2.432250658671061
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,float16,0,4.6723785400390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,128,0,1,fp8,fp8,0,3.239898681640625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,128,0,1,fp8,fp8,0,1.7380906740824382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,fp8,0,2.5138452847798667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,fp8,0,4.543738683064778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,float16,0,2.3668266932169595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,128,0,1,fp8,fp8,0,1.662010669708252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,fp8,0,2.3628053665161133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,float16,0,2.3194079399108887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,128,0,1,fp8,fp8,0,1.6872426668802898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,fp8,0,2.340933322906494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,float16,0,2.307370662689209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,fp8,0,2.338714599609375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,4,128,0,1,fp8,fp8,0,1.6520479520161946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,float16,0,2.3539253870646157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,128,0,1,fp8,fp8,0,1.644144058227539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,fp8,0,2.2960000038146973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,128,0,1,fp8,fp8,0,14.34656016031901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,128,0,1,fp8,fp8,0,14.58355712890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,fp8,0,20.83450190226237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,float16,0,21.452420552571613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,fp8,0,21.170176188151043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,float16,0,21.779579162597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,float16,0,20.75508753458659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,fp8,0,21.160437266031902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,4,128,0,1,fp8,fp8,0,14.98525873819987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,128,0,1,fp8,fp8,0,8.237589518229166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,float16,0,11.791370391845703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,fp8,0,11.696725209554037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,float16,0,11.02606455485026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,128,0,1,fp8,fp8,0,15.093690236409506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,float16,0,22.125595092773438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,128,0,1,fp8,fp8,0,7.31660270690918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,fp8,0,10.548720041910807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,128,0,1,fp8,fp8,0,7.350287755330403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,fp8,0,21.00398890177409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,float16,0,10.314410527547201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,fp8,0,10.57532819112142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,128,0,1,fp8,fp8,0,7.28276252746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,float16,0,10.579488118489584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,fp8,0,10.435279846191406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,float16,0,5.500165303548177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,128,0,1,fp8,fp8,0,7.437866846720378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,fp8,0,5.646544138590495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,64,128,0,1,fp8,fp8,0,3.952272097269694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,float16,0,10.403658548990885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,fp8,0,10.540410359700521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,float16,0,5.33575439453125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,128,0,1,fp8,fp8,0,3.6166346867879233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,fp8,0,5.295957247416179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,128,0,1,fp8,fp8,0,3.623978614807129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,float16,0,5.227007865905762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,fp8,0,5.621472040812175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,128,0,1,fp8,fp8,0,3.6700267791748047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,float16,0,5.195162773132324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,fp8,0,5.293216069539388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,float16,0,2.7652158737182617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,128,0,1,fp8,fp8,0,3.6705760955810547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,128,0,1,fp8,fp8,0,2.0180160204569497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,float16,0,5.245461463928223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,fp8,0,2.74397881825765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,float16,0,2.602165381113688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,fp8,0,5.495466868082683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,fp8,0,2.730661392211914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,1,128,0,1,fp8,fp8,0,1.867402712504069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,float16,0,2.7183574040730796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,128,0,1,fp8,fp8,0,1.876805305480957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,fp8,0,2.63209597269694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,128,0,1,fp8,fp8,0,1.8925760587056477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,float16,0,2.6947574615478516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,fp8,0,2.636240005493164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,float16,0,2.7043094635009766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,fp8,0,2.6739253997802734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,float16,0,1.3838507334391277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,64,8,128,0,1,fp8,fp8,0,1.8902452786763508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,128,0,1,fp8,fp8,0,1.0804906686147053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,fp8,0,1.4363466898600261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,float16,0,1.373146692911784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,fp8,0,1.3695306777954102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,1,128,0,1,fp8,fp8,0,1.0537866751352947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,float16,0,1.37772798538208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,128,0,1,fp8,fp8,0,1.0250186920166016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,fp8,0,1.3760959307352703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,float16,0,1.3611092567443848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,128,0,1,fp8,fp8,0,1.0523573557535808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,fp8,0,1.3634079297383626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,float16,0,1.3598933219909668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,128,0,1,fp8,fp8,0,1.0274399916330974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,fp8,0,1.3700213432312012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,128,0,1,fp8,fp8,0,14.324347178141275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,128,0,1,fp8,fp8,0,14.20373280843099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,float16,0,20.03603744506836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,fp8,0,19.843482971191406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,float16,0,20.29812240600586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,fp8,0,20.12508265177409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,float16,0,19.907525380452473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,128,0,1,fp8,fp8,0,14.328404744466146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,128,0,1,fp8,fp8,0,8.005247751871744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,float16,0,11.698416392008463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,fp8,0,12.107461293538412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,fp8,0,20.17580795288086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,128,0,1,fp8,fp8,0,15.663776397705078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,float16,0,21.975087483723957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,128,0,1,fp8,fp8,0,7.358837127685547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,float16,0,10.346922556559244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,fp8,0,20.239173889160156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,fp8,0,10.00156275431315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,fp8,0,10.305178960164389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,128,0,1,fp8,fp8,0,7.12666130065918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,float16,0,10.16921615600586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,float16,0,9.937503814697266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,128,0,1,fp8,fp8,0,7.198234558105469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,float16,0,5.283674558003743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,fp8,0,5.442527770996094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,fp8,0,10.203397115071615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,float16,0,9.981621424357096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,64,128,0,1,fp8,fp8,0,3.890399932861328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,fp8,0,10.063685099283854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,64,8,128,0,1,fp8,fp8,0,7.442117055257161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,float16,0,5.013968149820964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,128,0,1,fp8,fp8,0,3.476421356201172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,fp8,0,5.071200052897136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,128,0,1,fp8,fp8,0,3.5529438654581704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,float16,0,5.034901301066081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,fp8,0,5.007760047912598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,float16,0,4.984469413757324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,fp8,0,4.893631935119629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,4,128,0,1,fp8,fp8,0,3.59114138285319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,float16,0,5.081391970316569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,float16,0,2.683285395304362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,128,0,1,fp8,fp8,0,3.524442672729492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,fp8,0,5.026639938354492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,fp8,0,2.591440041859945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,64,128,0,1,fp8,fp8,0,1.9621920585632324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,float16,0,2.496895949045817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,fp8,0,2.4745119412740073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,1,128,0,1,fp8,fp8,0,1.8402187029520671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,float16,0,2.5189332962036133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,128,0,1,fp8,fp8,0,1.7980000178019206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,fp8,0,2.444373289744059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,float16,0,2.382592042287191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,128,0,1,fp8,fp8,0,1.8087199529012044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,fp8,0,2.4791040420532227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,float16,0,2.384005387624105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,float16,0,1.3291146755218506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,fp8,0,2.4618345896402993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,64,8,128,0,1,fp8,fp8,0,1.8149120012919109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,128,0,1,fp8,fp8,0,1.046890656153361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,fp8,0,1.3408800760904949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,float16,0,1.2466986974080403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,128,0,1,fp8,fp8,0,0.974565347035726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,fp8,0,1.2633386452992756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,float16,0,1.2669013341267903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,128,0,1,fp8,fp8,0,0.9648693402608236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,fp8,0,1.292101303736369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,float16,0,1.246608018875122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,fp8,0,1.2546239693959553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,4,128,0,1,fp8,fp8,0,0.9612267017364502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,float16,0,1.2547840277353923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,128,0,1,fp8,fp8,0,0.9760639667510986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,fp8,0,1.254479964574178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,float16,0,0.7191572984059652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,fp8,0,0.7297866344451904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,64,128,0,1,fp8,fp8,0,0.5794879992802938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,float16,0,0.6993920008341471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,fp8,0,0.6989440123240153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,1,128,0,1,fp8,fp8,0,0.5478346745173136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,float16,0,0.6972693602244059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,fp8,0,0.6991199652353922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,2,128,0,1,fp8,fp8,0,0.5465866724650065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,float16,0,0.699664036432902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,fp8,0,0.6999946435292562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,4,128,0,1,fp8,fp8,0,0.5509013334910074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,float16,0,0.7003893057505289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,fp8,0,0.7064212958017985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,64,8,128,0,1,fp8,fp8,0,0.5542826652526855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,128,0,1,fp8,fp8,0,8.915066401163736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,128,0,1,fp8,fp8,0,8.843114852905273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,float16,0,11.951546986897787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,fp8,0,12.277056376139322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,float16,0,12.040271759033203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,fp8,0,12.033067067464193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,float16,0,12.139450073242188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,fp8,0,12.25930658976237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,4,128,0,1,fp8,fp8,0,8.996442794799805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,128,0,1,fp8,fp8,0,4.944383939107259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,float16,0,6.489509582519531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,fp8,0,6.408693313598633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,float16,0,6.035296122233073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,128,0,1,fp8,fp8,0,9.09940274556478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,float16,0,12.19589869181315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,fp8,0,12.124591827392578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,128,0,1,fp8,fp8,0,4.405818621317546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,fp8,0,6.083679835001628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,float16,0,5.9938398996988935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,fp8,0,5.928474426269531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,2,128,0,1,fp8,fp8,0,4.50218137105306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,float16,0,6.050298690795898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,128,0,1,fp8,fp8,0,4.420810699462891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,float16,0,3.4234774907430015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,fp8,0,6.600170771280925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,128,0,1,fp8,fp8,0,4.448207855224609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,float16,0,6.465818405151367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,128,0,1,fp8,fp8,0,2.4747519493103027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,fp8,0,6.8219254811604815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,fp8,0,3.670405387878418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,float16,0,3.142746607462565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,fp8,0,3.014122645060221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,1,128,0,1,fp8,fp8,0,2.197098731994629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,float16,0,3.0652478535970054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,128,0,1,fp8,fp8,0,2.2084800402323403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,fp8,0,2.980549176534017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,float16,0,3.0678561528523765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,128,0,1,fp8,fp8,0,2.1931840578715005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,fp8,0,2.988448143005371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,float16,0,1.5704320271809895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,float16,0,2.9695307413736978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,fp8,0,1.5833652814229329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,64,128,0,1,fp8,fp8,0,1.2838239669799805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,128,0,1,fp8,fp8,0,2.177903970082601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,fp8,0,3.0143038431803384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,float16,0,1.451269308725993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,128,0,1,fp8,fp8,0,1.142416000366211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,fp8,0,1.5033653577168782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,float16,0,1.4856905937194824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,128,0,1,fp8,fp8,0,1.1378560066223145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,fp8,0,1.4610026677449544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,float16,0,1.4499999682108562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,128,0,1,fp8,fp8,0,1.1420746644337971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,fp8,0,1.4875839551289876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,float16,0,1.4520907402038574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,float16,0,0.8186026414235433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,128,0,1,fp8,fp8,0,1.165829340616862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,fp8,0,1.4780799547831218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,128,0,1,fp8,fp8,0,0.6901386578877767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,fp8,0,0.8328213691711426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,float16,0,0.7791199684143066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,fp8,0,0.8017066319783529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,1,128,0,1,fp8,fp8,0,0.631114681561788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,float16,0,0.7859573364257812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,fp8,0,0.7856480280558268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,2,128,0,1,fp8,fp8,0,0.623253345489502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,float16,0,0.7865866820017496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,float16,0,0.790287971496582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,fp8,0,0.7890933354695638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,4,128,0,1,fp8,fp8,0,0.6202346483866373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,fp8,0,0.789306640625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,64,8,128,0,1,fp8,fp8,0,0.6240053176879883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,float16,0,0.46069331963857013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,fp8,0,0.4703520139058431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,64,128,0,1,fp8,fp8,0,0.37885868549346924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,float16,0,0.45129064718882245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,fp8,0,0.45459731419881183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,1,128,0,1,fp8,fp8,0,0.34645867347717285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,float16,0,0.4493653376897176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,fp8,0,0.45418667793273926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,2,128,0,1,fp8,fp8,0,0.34829334417978924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,float16,0,0.4503626823425293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,fp8,0,0.45362667242685956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,4,128,0,1,fp8,fp8,0,0.34834134578704834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,float16,0,0.45135998725891113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,fp8,0,0.45604801177978516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,64,8,128,0,1,fp8,fp8,0,0.34879998366038006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,128,0,1,fp8,fp8,0,9.606559753417969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,128,0,1,fp8,fp8,0,9.414458592732748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,float16,0,12.196715037027994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,fp8,0,12.122570037841797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,float16,0,12.349051157633463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,fp8,0,12.280831654866537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,float16,0,12.234858194986979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,fp8,0,12.3144162495931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,128,0,1,fp8,fp8,0,5.268063863118489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,4,128,0,1,fp8,fp8,0,9.653050740559896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,float16,0,6.673791885375977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,float16,0,6.011583964029948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,fp8,0,6.782874425252278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,128,0,1,fp8,fp8,0,9.496405283610025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,float16,0,12.344474792480469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,fp8,0,12.39563242594401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,128,0,1,fp8,fp8,0,4.727482795715332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,fp8,0,6.04472541809082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,128,0,1,fp8,fp8,0,4.723343849182129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,float16,0,5.96230951944987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,fp8,0,6.050367991129558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,float16,0,6.052010854085286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,128,0,1,fp8,fp8,0,4.814693450927734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,fp8,0,6.152181625366211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,float16,0,3.357664108276367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,fp8,0,3.377322514851888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,float16,0,6.9511464436848955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,128,0,1,fp8,fp8,0,4.790128072102864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,64,128,0,1,fp8,fp8,0,2.7771145502726235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,fp8,0,6.131104151407878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,float16,0,3.2137120564778647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,128,0,1,fp8,fp8,0,2.522058645884196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,fp8,0,3.3036905924479165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,128,0,1,fp8,fp8,0,2.329733371734619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,float16,0,3.1968959172566733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,fp8,0,2.973557472229004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,float16,0,3.1390612920125327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,128,0,1,fp8,fp8,0,2.406597296396891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,fp8,0,2.9704319636027017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,float16,0,3.037173271179199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,float16,0,1.5897547403971355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,128,0,1,fp8,fp8,0,2.3722826639811196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,128,0,1,fp8,fp8,0,1.3416159947713215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,fp8,0,1.6826400756835938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,fp8,0,2.958810806274414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,float16,0,1.469205379486084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,fp8,0,1.4926560719807942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,1,128,0,1,fp8,fp8,0,1.2076746622721355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,float16,0,1.4481652577718098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,fp8,0,1.4893760681152344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,2,128,0,1,fp8,fp8,0,1.194655974706014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,float16,0,1.4441226323445637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,fp8,0,1.4623252550760906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,4,128,0,1,fp8,fp8,0,1.2015946706136067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,float16,0,0.813653310139974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,float16,0,1.470037301381429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,128,0,1,fp8,fp8,0,1.2066559791564941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,fp8,0,1.4974239667256672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,128,0,1,fp8,fp8,0,0.7099839846293131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,float16,0,0.7652746836344401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,fp8,0,0.8281653722127279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,128,0,1,fp8,fp8,0,0.6319040060043335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,fp8,0,0.7669599850972494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,float16,0,0.7716693083445231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,fp8,0,0.7674880027770996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,2,128,0,1,fp8,fp8,0,0.6315199931462606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,float16,0,0.7642239729563395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,fp8,0,0.7681066989898682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,4,128,0,1,fp8,fp8,0,0.6287786563237509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,float16,0,0.7655680179595947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,fp8,0,0.7734986941019694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,float16,0,0.42347200711568195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,64,8,128,0,1,fp8,fp8,0,0.6428266763687134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,float16,0,0.44179201126098633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,fp8,0,0.4520639975865682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,64,128,0,1,fp8,fp8,0,0.39129066467285156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,fp8,0,0.42212267716725665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,1,128,0,1,fp8,fp8,0,0.3516106605529785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,float16,0,0.4235200087229411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,fp8,0,0.42660268147786456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,2,128,0,1,fp8,fp8,0,0.3558559815088908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,float16,0,0.42159998416900635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,fp8,0,0.4256693522135417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,4,128,0,1,fp8,fp8,0,0.3538293441136678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,float16,0,0.4202880064646403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,fp8,0,0.4262559811274211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,64,8,128,0,1,fp8,fp8,0,0.355840007464091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,float16,0,0.2610986630121867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,fp8,0,0.26773866017659503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,64,128,0,1,fp8,fp8,0,0.22083733479181925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,float16,0,0.25091733535130817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,fp8,0,0.2542293270428975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,1,128,0,1,fp8,fp8,0,0.19885333379109701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,float16,0,0.2520693341890971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,fp8,0,0.2527573307355245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,2,128,0,1,fp8,fp8,0,0.20130133628845215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,float16,0,0.2534186641375224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,fp8,0,0.2536533276240031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,4,128,0,1,fp8,fp8,0,0.20006400346755981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,float16,0,0.25276799996693927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,fp8,0,0.2541653315226237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,64,8,128,0,1,fp8,fp8,0,0.20093866189320883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,128,0,1,fp8,fp8,0,6.201440175374349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,128,0,1,fp8,fp8,0,6.159599939982097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,float16,0,7.70309321085612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,fp8,0,7.692218780517578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,float16,0,7.749978383382161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,fp8,0,7.788565317789714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,float16,0,7.688138961791992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,fp8,0,7.719455718994141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,float16,0,4.260047912597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,128,0,1,fp8,fp8,0,3.5100533167521157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,4,128,0,1,fp8,fp8,0,6.299610773722331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,fp8,0,4.322944005330403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,float16,0,3.7485278447469077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,128,0,1,fp8,fp8,0,6.277445475260417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,float16,0,7.725061416625977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,fp8,0,7.7685120900472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,128,0,1,fp8,fp8,0,3.0616321563720703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,fp8,0,3.8062238693237305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,float16,0,3.774847984313965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,128,0,1,fp8,fp8,0,3.0639626185099282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,fp8,0,3.8139146169026694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,float16,0,3.773791948954264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,128,0,1,fp8,fp8,0,3.0873387654622397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,fp8,0,3.7473599116007485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,float16,0,2.071674664815267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,fp8,0,2.111834685007731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,64,128,0,1,fp8,fp8,0,1.7997546195983887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,float16,0,3.795232137044271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,128,0,1,fp8,fp8,0,3.142928123474121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,fp8,0,3.9388745625813804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,float16,0,1.7908693949381511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,fp8,0,1.835840066274007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,1,128,0,1,fp8,fp8,0,1.5705653826395671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,float16,0,1.7918507258097331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,fp8,0,1.8246666590372722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,2,128,0,1,fp8,fp8,0,1.5706346829732258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,float16,0,1.8282772699991863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,fp8,0,1.81605863571167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,4,128,0,1,fp8,fp8,0,1.5617440541585286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,float16,0,1.856160004933675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,float16,0,1.0278346538543701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,fp8,0,1.857642650604248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,64,8,128,0,1,fp8,fp8,0,1.5741599400838215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,fp8,0,1.0468640327453613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,64,128,0,1,fp8,fp8,0,0.9276213645935059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,float16,0,0.9286293188730875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,fp8,0,0.9413493474324545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,1,128,0,1,fp8,fp8,0,0.8121279875437418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,float16,0,0.9285600185394287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,fp8,0,0.9376800060272217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,2,128,0,1,fp8,fp8,0,0.8084320227305094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,float16,0,0.9309066931406657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,fp8,0,0.9408426284790039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,float16,0,0.933135986328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,4,128,0,1,fp8,fp8,0,0.8080906867980957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,fp8,0,0.9462986787160238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,64,8,128,0,1,fp8,fp8,0,0.8186666965484619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,float16,0,0.5375359853108724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,fp8,0,0.5487039883931478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,64,128,0,1,fp8,fp8,0,0.49107201894124347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,float16,0,0.4936693509419759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,float16,0,0.4989226659138997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,float16,0,0.5002453327178955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,128,0,1,fp8,fp8,0,0.43175466855367023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,fp8,0,0.4996266762415568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,1,128,0,1,fp8,fp8,0,0.431877334912618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,fp8,0,0.5032266775767008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,fp8,0,0.5009973446528116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,4,128,0,1,fp8,fp8,0,0.43669335047403973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,float16,0,0.4997066656748454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,fp8,0,0.5070026715596517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,64,8,128,0,1,fp8,fp8,0,0.43506133556365967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,float16,0,0.2966453234354655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,fp8,0,0.3036959966023763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,64,128,0,1,fp8,fp8,0,0.26822400093078613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,float16,0,0.2786293427149455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,fp8,0,0.2813599904378255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,1,128,0,1,fp8,fp8,0,0.22927467028299967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,float16,0,0.28096532821655273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,fp8,0,0.282640000184377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,2,128,0,1,fp8,fp8,0,0.231605331103007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,float16,0,0.2828426758448283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,fp8,0,0.2825813293457031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,4,128,0,1,fp8,fp8,0,0.2311306595802307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,128,0,1,fp8,fp8,0,0.15408000349998474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,float16,0,0.2825760046641032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,fp8,0,0.283461332321167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,64,8,128,0,1,fp8,fp8,0,0.23291732867558798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,float16,0,0.1685333251953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,fp8,0,0.17473600308100382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,float16,0,0.1593226691087087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,fp8,0,0.15958933035532633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,1,128,0,1,fp8,fp8,0,0.14058132966359457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,float16,0,0.15894933541615805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,float16,0,0.15997866789499918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,fp8,0,0.16005866726239523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,2,128,0,1,fp8,fp8,0,0.14190933108329773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,float16,0,0.15833066900571188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,fp8,0,0.1602079967657725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,4,128,0,1,fp8,fp8,0,0.14160533746083578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,fp8,0,0.16106133659680685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,64,8,128,0,1,fp8,fp8,0,0.1418826679388682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,128,0,1,fp8,fp8,0,6.767466862996419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,float16,0,7.787589391072591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,fp8,0,7.872378667195638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,128,0,1,fp8,fp8,0,6.795759836832683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,float16,0,7.717215855916341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,fp8,0,7.967039744059245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,float16,0,7.977439880371094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,fp8,0,8.054416020711264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,float16,0,4.644015947977702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,float16,0,3.82257080078125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,128,0,1,fp8,fp8,0,3.934266726175944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,fp8,0,4.640000025431315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,4,128,0,1,fp8,fp8,0,6.865407943725586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,128,0,1,fp8,fp8,0,6.965525309244792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,float16,0,8.104954401652018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,fp8,0,8.031189600626627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,fp8,0,3.727866808573405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,1,128,0,1,fp8,fp8,0,3.4315627415974936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,float16,0,3.8757654825846353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,fp8,0,3.898655891418457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,2,128,0,1,fp8,fp8,0,3.370512008666992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,128,0,1,fp8,fp8,0,3.424367904663086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,float16,0,3.9686508178710938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,fp8,0,3.9581066767374673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,float16,0,2.3165599505106607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,fp8,0,2.22435728708903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,float16,0,4.033413251241048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,128,0,1,fp8,fp8,0,3.4792372385660806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,fp8,0,4.072618802388509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,float16,0,1.865957260131836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,64,128,0,1,fp8,fp8,0,1.9963679313659668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,fp8,0,1.87390931447347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,1,128,0,1,fp8,fp8,0,1.7070879936218262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,float16,0,1.8790186246236165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,fp8,0,1.866970698038737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,2,128,0,1,fp8,fp8,0,1.7099253336588542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,float16,0,1.8942559560139973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,fp8,0,1.901840051015218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,4,128,0,1,fp8,fp8,0,1.7347839673360188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,float16,0,1.9197279612223308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,float16,0,1.1316800117492676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,fp8,0,1.8910293579101562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,fp8,0,1.106384038925171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,64,8,128,0,1,fp8,fp8,0,1.7383839289347331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,64,128,0,1,fp8,fp8,0,0.9973493417104086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,float16,0,0.9466773668924967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,fp8,0,0.9483733177185059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,1,128,0,1,fp8,fp8,0,0.8494079907735189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,float16,0,0.9735199610392252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,fp8,0,0.9488906860351562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,2,128,0,1,fp8,fp8,0,0.8391520182291666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,float16,0,0.9548746744791666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,fp8,0,0.9505120118459066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,4,128,0,1,fp8,fp8,0,0.8609546820322672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,float16,0,0.5767893393834432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,fp8,0,0.9581920305887858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,float16,0,0.9509867032368978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,64,8,128,0,1,fp8,fp8,0,0.8751946290334066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,fp8,0,0.5662293434143066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,64,128,0,1,fp8,fp8,0,0.5008533398310343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,float16,0,0.4880266586939494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,fp8,0,0.48635733127593994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,1,128,0,1,fp8,fp8,0,0.4299946626027425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,float16,0,0.4874933163324992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,fp8,0,0.48178664843241376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,2,128,0,1,fp8,fp8,0,0.4307680130004883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,float16,0,0.4851359923680623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,fp8,0,0.48968533674875897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,4,128,0,1,fp8,fp8,0,0.43329068024953205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,float16,0,0.4949013392130534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,fp8,0,0.492789347966512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,64,8,128,0,1,fp8,fp8,0,0.44418132305145264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,float16,0,0.3016853332519531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,fp8,0,0.2976213296254476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,64,128,0,1,fp8,fp8,0,0.2580373287200928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,float16,0,0.2577280004819234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,fp8,0,0.25594133138656616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,1,128,0,1,fp8,fp8,0,0.22945600748062134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,float16,0,0.25543999671936035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,fp8,0,0.2569013237953186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,2,128,0,1,fp8,fp8,0,0.2249493400255839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,float16,0,0.25805866718292236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,fp8,0,0.25853333870569867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,4,128,0,1,fp8,fp8,0,0.22976533571879068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,float16,0,0.2601333260536194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,fp8,0,0.26260266701380414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,64,8,128,0,1,fp8,fp8,0,0.23192532857259116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,float16,0,0.1644319991270701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,fp8,0,0.16179733475049338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,64,128,0,1,fp8,fp8,0,0.13132799665133157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,float16,0,0.13928533593813577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,fp8,0,0.13967999815940857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,1,128,0,1,fp8,fp8,0,0.11271466811498006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,float16,0,0.14064000050226846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,float16,0,0.14272000392278036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,fp8,0,0.1404800017674764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,2,128,0,1,fp8,fp8,0,0.11182933052380879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,float16,0,0.14015466968218485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,fp8,0,0.14038933316866556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,4,128,0,1,fp8,fp8,0,0.11185066898663838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,float16,0,0.07234666744867961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,fp8,0,0.14295466740926108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,64,8,128,0,1,fp8,fp8,0,0.11404800415039062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,float16,0,0.08646933237711589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,fp8,0,0.08473599950472514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,64,128,0,1,fp8,fp8,0,0.07379733522733052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,fp8,0,0.07196799914042155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,1,128,0,1,fp8,fp8,0,0.06076799829800924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,float16,0,0.07196799914042155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,fp8,0,0.07163199782371521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,2,128,0,1,fp8,fp8,0,0.06140799820423126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,float16,0,0.07266133526961009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,fp8,0,0.07236266632874806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,4,128,0,1,fp8,fp8,0,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,float16,0,0.07386666536331177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,fp8,0,0.07346133391062419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,64,8,128,0,1,fp8,fp8,0,0.06297066807746887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,1,128,0,1,float16,float16,0,5.92196782430013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,1,128,0,1,fp8,fp8,0,5.497130711873372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,1,128,0,1,float16,fp8,0,5.9104054768880205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,2,128,0,1,float16,float16,0,5.927530924479167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,2,128,0,1,fp8,fp8,0,5.476287841796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,2,128,0,1,float16,fp8,0,5.921264012654622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,4,128,0,1,float16,float16,0,5.99234135945638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,4,128,0,1,float16,fp8,0,6.0041758219401045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,4,128,0,1,fp8,fp8,0,5.556528091430664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,64,128,0,1,float16,float16,0,3.7012106577555337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,64,128,0,1,float16,fp8,0,3.6159092585245767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,64,128,0,1,fp8,fp8,0,3.2921600341796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,8,128,0,1,float16,float16,0,6.1321760813395185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,8,128,0,1,float16,fp8,0,6.152704238891602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,64,8,128,0,1,fp8,fp8,0,5.666271845499675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,1,128,0,1,float16,float16,0,2.872938791910807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,1,128,0,1,float16,fp8,0,2.87227725982666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,1,128,0,1,fp8,fp8,0,2.742277463277181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,2,128,0,1,float16,float16,0,2.8566773732503257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,2,128,0,1,float16,fp8,0,2.8713067372639975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,2,128,0,1,fp8,fp8,0,2.734623908996582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,4,128,0,1,float16,float16,0,2.944202740987142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,4,128,0,1,float16,fp8,0,2.957184155782064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,4,128,0,1,fp8,fp8,0,2.788463910420736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,64,128,0,1,float16,float16,0,1.8470239639282227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,8,128,0,1,float16,float16,0,2.9773972829182944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,64,128,0,1,float16,fp8,0,1.785696029663086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,8,128,0,1,fp8,fp8,0,2.8332961400349936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,64,128,0,1,fp8,fp8,0,1.6458080609639485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,64,8,128,0,1,float16,fp8,0,2.9842987060546875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,1,128,0,1,float16,float16,0,1.4407199223836262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,1,128,0,1,float16,fp8,0,1.4452853202819824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,1,128,0,1,fp8,fp8,0,1.3692320187886555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,2,128,0,1,float16,float16,0,1.4429492950439453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,2,128,0,1,float16,fp8,0,1.4431626001993816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,2,128,0,1,fp8,fp8,0,1.3610399564107258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,4,128,0,1,float16,float16,0,1.4398560523986816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,4,128,0,1,float16,fp8,0,1.4486826260884602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,4,128,0,1,fp8,fp8,0,1.3799573580423992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,8,128,0,1,float16,float16,0,1.4699467023213704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,64,128,0,1,float16,float16,0,0.9200053215026855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,8,128,0,1,float16,fp8,0,1.464431921641032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,64,8,128,0,1,fp8,fp8,0,1.4062879880269368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,64,128,0,1,float16,fp8,0,0.892848014831543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,64,128,0,1,fp8,fp8,0,0.8237280050913492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,1,128,0,1,float16,float16,0,0.7317547003428141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,1,128,0,1,fp8,fp8,0,0.6800373395284017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,1,128,0,1,float16,fp8,0,0.732917308807373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,2,128,0,1,float16,float16,0,0.7306773662567139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,2,128,0,1,float16,fp8,0,0.7338079611460367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,2,128,0,1,fp8,fp8,0,0.6747999986012777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,4,128,0,1,float16,float16,0,0.7342720031738281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,4,128,0,1,fp8,fp8,0,0.7000799973805746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,4,128,0,1,float16,fp8,0,0.7316266695658366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,8,128,0,1,float16,float16,0,0.7413866519927979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,8,128,0,1,float16,fp8,0,0.7443892955780029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,64,8,128,0,1,fp8,fp8,0,0.7100160121917725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,64,128,0,1,float16,float16,0,0.4708373149236043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,64,128,0,1,float16,fp8,0,0.45901866753896076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,64,128,0,1,fp8,fp8,0,0.4148586591084798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,2,128,0,1,float16,fp8,0,0.37674132982889813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,1,128,0,1,float16,float16,0,0.3744266827901204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,1,128,0,1,float16,fp8,0,0.3725546598434448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,1,128,0,1,fp8,fp8,0,0.3471466700236003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,2,128,0,1,float16,float16,0,0.3762720028559367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,2,128,0,1,fp8,fp8,0,0.34705066680908203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,4,128,0,1,float16,float16,0,0.3752426703770955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,4,128,0,1,float16,fp8,0,0.37603731950124103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,4,128,0,1,fp8,fp8,0,0.3492853244145711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,64,128,0,1,fp8,fp8,0,0.21521600087483725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,8,128,0,1,float16,float16,0,0.38042132059733075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,8,128,0,1,float16,fp8,0,0.3803946574529012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,64,8,128,0,1,fp8,fp8,0,0.3561866680781047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,64,128,0,1,float16,float16,0,0.24618667364120483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,64,128,0,1,float16,fp8,0,0.24273600180943808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,4,128,0,1,float16,float16,0,0.1987733244895935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,1,128,0,1,float16,float16,0,0.19801066319147745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,1,128,0,1,float16,fp8,0,0.19897067546844482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,1,128,0,1,fp8,fp8,0,0.1818186640739441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,2,128,0,1,float16,float16,0,0.19767467180887857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,2,128,0,1,float16,fp8,0,0.19830399751663208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,2,128,0,1,fp8,fp8,0,0.18425067265828451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,4,128,0,1,float16,fp8,0,0.1998400092124939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,4,128,0,1,fp8,fp8,0,0.1834133267402649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,8,128,0,1,float16,float16,0,0.2011680006980896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,8,128,0,1,float16,fp8,0,0.20143999656041464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,64,8,128,0,1,fp8,fp8,0,0.1858666737874349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,64,128,0,1,float16,float16,0,0.13322133819262186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,64,128,0,1,float16,fp8,0,0.13265066345532736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,2,128,0,1,float16,fp8,0,0.10794666409492493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,64,128,0,1,fp8,fp8,0,0.11255466938018799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,1,128,0,1,float16,float16,0,0.1085653305053711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,1,128,0,1,float16,fp8,0,0.10813333590825398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,1,128,0,1,fp8,fp8,0,0.09249066313107808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,2,128,0,1,float16,float16,0,0.1088693340619405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,2,128,0,1,fp8,fp8,0,0.09346133470535278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,4,128,0,1,float16,float16,0,0.11098133524258931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,4,128,0,1,float16,fp8,0,0.10900266965230306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,4,128,0,1,fp8,fp8,0,0.09357333183288574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,64,128,0,1,fp8,fp8,0,0.06519466638565063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,8,128,0,1,float16,float16,0,0.1106666624546051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,8,128,0,1,float16,fp8,0,0.1111199955145518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,64,8,128,0,1,fp8,fp8,0,0.09493866562843323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,64,128,0,1,float16,float16,0,0.07346666852633159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,64,128,0,1,float16,fp8,0,0.07229333122571309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,1,128,0,1,float16,float16,0,0.058389330903689064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,1,128,0,1,float16,fp8,0,0.05867200096448263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,1,128,0,1,fp8,fp8,0,0.051514665285746254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,2,128,0,1,float16,float16,0,0.05865600208441416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,2,128,0,1,float16,fp8,0,0.05891199906667074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,8,128,0,1,float16,float16,0,0.06035199761390686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,2,128,0,1,fp8,fp8,0,0.05096533397833506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,4,128,0,1,float16,float16,0,0.06073066592216492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,4,128,0,1,float16,fp8,0,0.060309335589408875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,4,128,0,1,fp8,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,8,128,0,1,float16,fp8,0,0.05969599882761637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,64,8,128,0,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,64,128,0,1,float16,float16,0,0.04074666649103165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,64,128,0,1,float16,fp8,0,0.03934400031963984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,64,128,0,1,fp8,fp8,0,0.038575999438762665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,1,128,0,1,float16,float16,0,0.03507733345031738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,1,128,0,1,float16,fp8,0,0.03589333345492681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,1,128,0,1,fp8,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,4,128,0,1,float16,fp8,0,0.036506667733192444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,2,128,0,1,float16,float16,0,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,2,128,0,1,float16,fp8,0,0.03611200054486593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,2,128,0,1,fp8,fp8,0,0.03092266619205475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,4,128,0,1,float16,float16,0,0.03606933355331421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,4,128,0,1,fp8,fp8,0,0.03310399999221166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,8,128,0,1,float16,float16,0,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,8,128,0,1,float16,fp8,0,0.0364479993780454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,64,8,128,0,1,fp8,fp8,0,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,1,128,0,1,float16,float16,0,2.403461297353109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,1,128,0,1,float16,fp8,0,2.4135306676228843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,1,128,0,1,fp8,fp8,0,2.1547999382019043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,2,128,0,1,float16,float16,0,2.4196267127990723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,2,128,0,1,fp8,fp8,0,2.1611785888671875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,2,128,0,1,float16,fp8,0,2.4217119216918945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,4,128,0,1,float16,float16,0,2.523983955383301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,4,128,0,1,float16,fp8,0,2.4740799268086753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,4,128,0,1,fp8,fp8,0,2.1751786867777505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,64,128,0,1,float16,float16,0,1.6258613268534343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,8,128,0,1,float16,float16,0,2.5536319414774575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,8,128,0,1,float16,fp8,0,2.5901172955830893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,64,128,0,1,float16,fp8,0,1.5631252924601238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,64,8,128,0,1,fp8,fp8,0,2.263696034749349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,1,128,0,1,float16,float16,0,1.2162506580352783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,64,128,0,1,fp8,fp8,0,1.3851572672526042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,1,128,0,1,float16,fp8,0,1.2132960160573323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,1,128,0,1,fp8,fp8,0,1.0806186993916829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,2,128,0,1,float16,float16,0,1.217850685119629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,2,128,0,1,float16,fp8,0,1.2205333709716797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,2,128,0,1,fp8,fp8,0,1.0755306879679363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,4,128,0,1,float16,float16,0,1.232319990793864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,4,128,0,1,float16,fp8,0,1.2275786399841309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,4,128,0,1,fp8,fp8,0,1.1055413087209065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,8,128,0,1,float16,float16,0,1.2376320362091064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,8,128,0,1,float16,fp8,0,1.2524320284525554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,64,8,128,0,1,fp8,fp8,0,1.1265119711558025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,64,128,0,1,float16,float16,0,0.8098986943562826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,64,128,0,1,float16,fp8,0,0.7847627003987631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,64,128,0,1,fp8,fp8,0,0.6973333358764648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,1,128,0,1,float16,float16,0,0.618506669998169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,1,128,0,1,float16,fp8,0,0.6223359902699789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,1,128,0,1,fp8,fp8,0,0.533461332321167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,2,128,0,1,float16,float16,0,0.6204266548156738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,2,128,0,1,float16,fp8,0,0.6203039884567261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,2,128,0,1,fp8,fp8,0,0.5357919931411743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,4,128,0,1,float16,float16,0,0.6227413415908813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,4,128,0,1,float16,fp8,0,0.6239680051803589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,4,128,0,1,fp8,fp8,0,0.5520266691843668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,8,128,0,1,float16,float16,0,0.6352159976959229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,8,128,0,1,float16,fp8,0,0.6303040186564127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,64,8,128,0,1,fp8,fp8,0,0.5688533385594686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,64,128,0,1,float16,float16,0,0.4145919879277547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,64,128,0,1,float16,fp8,0,0.40434134006500244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,64,128,0,1,fp8,fp8,0,0.35259731610616046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,1,128,0,1,float16,float16,0,0.3177173336346944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,1,128,0,1,float16,fp8,0,0.3173333406448364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,1,128,0,1,fp8,fp8,0,0.2765600085258484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,2,128,0,1,float16,float16,0,0.3176213304201762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,2,128,0,1,float16,fp8,0,0.31813865900039673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,2,128,0,1,fp8,fp8,0,0.276037335395813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,4,128,0,1,float16,float16,0,0.3213119904200236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,4,128,0,1,float16,fp8,0,0.32068800926208496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,4,128,0,1,fp8,fp8,0,0.277946670850118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,8,128,0,1,float16,float16,0,0.32497600714365643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,8,128,0,1,float16,fp8,0,0.32678399483362836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,64,8,128,0,1,fp8,fp8,0,0.28683199485143024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,64,128,0,1,float16,float16,0,0.21946134169896445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,64,128,0,1,float16,fp8,0,0.21287467082341513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,64,128,0,1,fp8,fp8,0,0.18718934059143066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,1,128,0,1,float16,float16,0,0.16799465815226236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,1,128,0,1,float16,fp8,0,0.16829333702723184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,1,128,0,1,fp8,fp8,0,0.1479146679242452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,2,128,0,1,float16,float16,0,0.17049066225687662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,2,128,0,1,float16,fp8,0,0.16938134034474692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,2,128,0,1,fp8,fp8,0,0.14797866344451904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,4,128,0,1,float16,float16,0,0.16925332943598428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,4,128,0,1,float16,fp8,0,0.17139732837677002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,64,128,0,1,float16,fp8,0,0.11658666531244914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,4,128,0,1,fp8,fp8,0,0.14850133657455444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,8,128,0,1,float16,float16,0,0.1725226640701294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,8,128,0,1,float16,fp8,0,0.17282666762669882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,1,128,0,1,fp8,fp8,0,0.08171199758847554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,64,8,128,0,1,fp8,fp8,0,0.15288533767064413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,64,128,0,1,float16,float16,0,0.11930132905642192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,64,128,0,1,fp8,fp8,0,0.10373866558074951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,1,128,0,1,float16,float16,0,0.09408000111579895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,4,128,0,1,float16,float16,0,0.09409600496292114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,4,128,0,1,float16,fp8,0,0.09384000301361084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,1,128,0,1,float16,fp8,0,0.0935040016969045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,2,128,0,1,float16,float16,0,0.09362666805585225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,2,128,0,1,float16,fp8,0,0.09326933821042378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,2,128,0,1,fp8,fp8,0,0.08276799817879994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,4,128,0,1,fp8,fp8,0,0.08482133348782857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,64,128,0,1,float16,fp8,0,0.06879466772079468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,8,128,0,1,float16,float16,0,0.0946613351504008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,8,128,0,1,float16,fp8,0,0.09501333038012187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,64,8,128,0,1,fp8,fp8,0,0.08543999989827473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,64,128,0,1,float16,float16,0,0.07239999870459239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,64,128,0,1,fp8,fp8,0,0.06005333364009857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,2,128,0,1,float16,fp8,0,0.05347733199596405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,1,128,0,1,float16,float16,0,0.05436266462008158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,1,128,0,1,float16,fp8,0,0.05378133555253347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,1,128,0,1,fp8,fp8,0,0.04699199895064036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,2,128,0,1,float16,float16,0,0.054618666569391884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,2,128,0,1,fp8,fp8,0,0.046522667010625206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,4,128,0,1,float16,float16,0,0.05448000133037567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,4,128,0,1,float16,fp8,0,0.05453866720199585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,4,128,0,1,fp8,fp8,0,0.04840533435344696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,8,128,0,1,float16,float16,0,0.05436799923578898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,1,128,0,1,float16,float16,0,0.032272001107533775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,8,128,0,1,float16,fp8,0,0.0566293348868688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,64,8,128,0,1,fp8,fp8,0,0.048623998959859215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,64,128,0,1,float16,float16,0,0.036757332583268486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,64,128,0,1,float16,fp8,0,0.03659733384847641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,64,128,0,1,fp8,fp8,0,0.03505066782236099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,1,128,0,1,float16,fp8,0,0.03274133304754893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,1,128,0,1,fp8,fp8,0,0.02880000074704488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,2,128,0,1,float16,float16,0,0.03263466556866964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,2,128,0,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,2,128,0,1,fp8,fp8,0,0.02902399996916453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,4,128,0,1,float16,float16,0,0.03295466552178065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,4,128,0,1,float16,fp8,0,0.03382399926582972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,4,128,0,1,fp8,fp8,0,0.03075733284155528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,8,128,0,1,float16,float16,0,0.033002667129039764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,8,128,0,1,float16,fp8,0,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,64,8,128,0,1,fp8,fp8,0,0.031141333281993866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,64,128,0,1,float16,float16,0,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,2,128,0,1,float16,float16,0,0.02288000037272771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,64,128,0,1,float16,fp8,0,0.024197332561016083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,64,128,0,1,fp8,fp8,0,0.02253866692384084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,1,128,0,1,float16,float16,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,1,128,0,1,float16,fp8,0,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,1,128,0,1,fp8,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,2,128,0,1,float16,fp8,0,0.022783999641736347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,2,128,0,1,fp8,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,4,128,0,1,float16,float16,0,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,4,128,0,1,float16,fp8,0,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,4,128,0,1,fp8,fp8,0,0.02180800090233485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,8,128,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,8,128,0,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,64,8,128,0,1,fp8,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,1,128,0,1,float16,float16,0,1.1127573649088542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,1,128,0,1,float16,fp8,0,1.1125226815541585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,1,128,0,1,fp8,fp8,0,1.0654133160909016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,2,128,0,1,float16,float16,0,1.1191733678181965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,2,128,0,1,float16,fp8,0,1.1203946272532146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,2,128,0,1,fp8,fp8,0,1.0714826583862305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,4,128,0,1,float16,float16,0,1.124245325724284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,4,128,0,1,float16,fp8,0,1.12336532274882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,4,128,0,1,fp8,fp8,0,1.119930664698283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,8,128,0,1,float16,float16,0,1.1384639739990234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,8,128,0,1,float16,fp8,0,1.1385173002878826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,64,128,0,1,float16,float16,0,0.7633492946624756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,64,8,128,0,1,fp8,fp8,0,1.123477300008138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,64,128,0,1,float16,fp8,0,0.739962657292684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,64,128,0,1,fp8,fp8,0,0.7009173234303793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,1,128,0,1,float16,float16,0,0.5664746761322021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,1,128,0,1,float16,fp8,0,0.5684693257013956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,1,128,0,1,fp8,fp8,0,0.5300159851710001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,2,128,0,1,float16,float16,0,0.5679999987284342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,2,128,0,1,float16,fp8,0,0.5706293185551962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,2,128,0,1,fp8,fp8,0,0.5301973422368368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,4,128,0,1,float16,float16,0,0.5737386544545492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,8,128,0,1,fp8,fp8,0,0.5659573475519816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,4,128,0,1,float16,fp8,0,0.5718880097071329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,4,128,0,1,fp8,fp8,0,0.554746667544047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,8,128,0,1,float16,float16,0,0.5809386571248373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,64,8,128,0,1,float16,fp8,0,0.580186684926351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,64,128,0,1,float16,float16,0,0.3959999879201253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,64,128,0,1,float16,fp8,0,0.3834559917449951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,64,128,0,1,fp8,fp8,0,0.3485333522160848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,1,128,0,1,float16,float16,0,0.29073599974314374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,1,128,0,1,float16,fp8,0,0.2911253372828166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,1,128,0,1,fp8,fp8,0,0.27314666906992596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,2,128,0,1,float16,float16,0,0.29174933830897015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,2,128,0,1,float16,fp8,0,0.2913600007692973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,8,128,0,1,float16,float16,0,0.3007040023803711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,2,128,0,1,fp8,fp8,0,0.2740373412768046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,4,128,0,1,float16,float16,0,0.29505600531895954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,4,128,0,1,float16,fp8,0,0.2959253390630086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,4,128,0,1,fp8,fp8,0,0.2785653273264567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,8,128,0,1,float16,fp8,0,0.2994239926338196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,64,8,128,0,1,fp8,fp8,0,0.28515734275182086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,64,128,0,1,float16,float16,0,0.21068267027537027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,64,128,0,1,float16,fp8,0,0.20354666312535605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,64,128,0,1,fp8,fp8,0,0.18253866831461588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,1,128,0,1,float16,float16,0,0.1546026666959127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,1,128,0,1,float16,fp8,0,0.15528000394503275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,1,128,0,1,fp8,fp8,0,0.14570132891337076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,2,128,0,1,float16,float16,0,0.1550879975159963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,2,128,0,1,float16,fp8,0,0.15587199727694193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,8,128,0,1,float16,float16,0,0.15983999768892923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,2,128,0,1,fp8,fp8,0,0.1460479994614919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,4,128,0,1,float16,float16,0,0.15754133462905884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,4,128,0,1,float16,fp8,0,0.15680000185966492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,4,128,0,1,fp8,fp8,0,0.14824533462524414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,8,128,0,1,float16,fp8,0,0.16014933586120605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,64,8,128,0,1,fp8,fp8,0,0.15212266643842062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,64,128,0,1,float16,float16,0,0.11680000027020772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,64,128,0,1,float16,fp8,0,0.11375466982523601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,64,128,0,1,fp8,fp8,0,0.10207466284434001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,1,128,0,1,float16,float16,0,0.0870293378829956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,1,128,0,1,float16,fp8,0,0.08802133798599243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,1,128,0,1,fp8,fp8,0,0.08098666866620381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,2,128,0,1,float16,float16,0,0.08770133058230083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,2,128,0,1,float16,fp8,0,0.08703466256459554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,2,128,0,1,fp8,fp8,0,0.0817440003156662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,4,128,0,1,float16,float16,0,0.08789867162704468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,4,128,0,1,float16,fp8,0,0.08784000078837077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,4,128,0,1,fp8,fp8,0,0.08250133196512859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,8,128,0,1,float16,float16,0,0.08799466490745544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,8,128,0,1,float16,fp8,0,0.09053867061932881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,64,8,128,0,1,fp8,fp8,0,0.08451199531555176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,64,128,0,1,float16,float16,0,0.0688213308652242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,64,128,0,1,float16,fp8,0,0.06889066596825917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,64,128,0,1,fp8,fp8,0,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,1,128,0,1,float16,float16,0,0.05062933266162872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,1,128,0,1,float16,fp8,0,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,1,128,0,1,fp8,fp8,0,0.04674133161703745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,2,128,0,1,float16,float16,0,0.05108266572157542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,8,128,0,1,float16,float16,0,0.05134933193524679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,2,128,0,1,float16,fp8,0,0.05089066425959269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,2,128,0,1,fp8,fp8,0,0.04580800235271454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,4,128,0,1,float16,float16,0,0.05119466781616211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,4,128,0,1,fp8,fp8,0,0.04755199948946635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,4,128,0,1,float16,fp8,0,0.052058666944503784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,8,128,0,1,float16,fp8,0,0.05194133520126343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,64,8,128,0,1,fp8,fp8,0,0.048911998669306435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,64,128,0,1,float16,float16,0,0.03541333228349686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,64,128,0,1,float16,fp8,0,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,64,128,0,1,fp8,fp8,0,0.0340639998515447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,1,128,0,1,float16,float16,0,0.031119999786218006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,1,128,0,1,float16,fp8,0,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,1,128,0,1,fp8,fp8,0,0.029653333127498627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,2,128,0,1,float16,float16,0,0.031658666829268135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,8,128,0,1,float16,float16,0,0.0322773332397143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,2,128,0,1,float16,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,2,128,0,1,fp8,fp8,0,0.028783999383449554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,4,128,0,1,float16,float16,0,0.03192000091075897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,4,128,0,1,float16,fp8,0,0.03217600037654241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,4,128,0,1,fp8,fp8,0,0.03038399914900462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,8,128,0,1,float16,fp8,0,0.0330079992612203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,64,8,128,0,1,fp8,fp8,0,0.0315733328461647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,64,128,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,64,128,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,64,128,0,1,fp8,fp8,0,0.022533332308133442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,1,128,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,1,128,0,1,float16,fp8,0,0.022277332842350006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,1,128,0,1,fp8,fp8,0,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,2,128,0,1,float16,float16,0,0.021840001145998638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,2,128,0,1,float16,fp8,0,0.022410665949185688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,2,128,0,1,fp8,fp8,0,0.020581333587567013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,8,128,0,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,4,128,0,1,float16,float16,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,4,128,0,1,float16,fp8,0,0.022277332842350006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,4,128,0,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,8,128,0,1,float16,float16,0,0.02274133265018463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,64,8,128,0,1,float16,fp8,0,0.022709332406520844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,64,128,0,1,float16,float16,0,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,64,128,0,1,float16,fp8,0,0.018464000274737675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,64,128,0,1,fp8,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,1,128,0,1,float16,float16,0,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,1,128,0,1,float16,fp8,0,0.017792000124851864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,1,128,0,1,fp8,fp8,0,0.017738666385412216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,2,128,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,2,128,0,1,float16,fp8,0,0.017786666750907898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,2,128,0,1,fp8,fp8,0,0.018672000616788864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,4,128,0,1,float16,float16,0,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,4,128,0,1,float16,fp8,0,0.01855466639002164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,4,128,0,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,8,128,0,1,float16,float16,0,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,8,128,0,1,float16,fp8,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,1,128,0,1,float16,fp8,0,0.6695840358734131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,64,8,128,0,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,1,128,0,1,float16,float16,0,0.6677760283152262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,1,128,0,1,fp8,fp8,0,0.7553066412607828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,2,128,0,1,float16,float16,0,0.6722880204518636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,2,128,0,1,float16,fp8,0,0.6689706643422445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,2,128,0,1,fp8,fp8,0,0.7576373418172201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,4,128,0,1,float16,float16,0,0.6736640135447184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,4,128,0,1,float16,fp8,0,0.6754026412963867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,4,128,0,1,fp8,fp8,0,0.7767093181610107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,8,128,0,1,float16,float16,0,0.6835839748382568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,8,128,0,1,float16,fp8,0,0.6831200122833252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,64,128,0,1,float16,float16,0,0.4427040020624797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,64,8,128,0,1,fp8,fp8,0,0.7998666763305664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,64,128,0,1,float16,fp8,0,0.4301439921061198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,64,128,0,1,fp8,fp8,0,0.4567840099334717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,1,128,0,1,float16,float16,0,0.3400160074234009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,1,128,0,1,float16,fp8,0,0.3373066584269206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,1,128,0,1,fp8,fp8,0,0.3874986569086711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,2,128,0,1,float16,float16,0,0.33788267771402997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,2,128,0,1,float16,fp8,0,0.3410880168279012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,2,128,0,1,fp8,fp8,0,0.38921598593393963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,4,128,0,1,float16,float16,0,0.34381866455078125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,4,128,0,1,float16,fp8,0,0.3445173501968384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,4,128,0,1,fp8,fp8,0,0.3925173282623291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,8,128,0,1,float16,float16,0,0.3492799997329712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,8,128,0,1,float16,fp8,0,0.3476053476333618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,64,128,0,1,float16,float16,0,0.23178666830062866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,64,8,128,0,1,fp8,fp8,0,0.39744532108306885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,64,128,0,1,float16,fp8,0,0.22662933667500815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,64,128,0,1,fp8,fp8,0,0.23966399828592935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,1,128,0,1,float16,float16,0,0.17614932854970297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,1,128,0,1,float16,fp8,0,0.1764799952507019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,1,128,0,1,fp8,fp8,0,0.20333866278330484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,2,128,0,1,float16,float16,0,0.1762133240699768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,2,128,0,1,float16,fp8,0,0.177946666876475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,2,128,0,1,fp8,fp8,0,0.2038080096244812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,4,128,0,1,float16,float16,0,0.1794933279355367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,4,128,0,1,float16,fp8,0,0.17851734161376953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,4,128,0,1,fp8,fp8,0,0.2044586737950643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,8,128,0,1,float16,float16,0,0.1817013422648112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,8,128,0,1,float16,fp8,0,0.18213866154352823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,64,8,128,0,1,fp8,fp8,0,0.20798399051030478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,64,128,0,1,float16,float16,0,0.12471466263135274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,64,128,0,1,float16,fp8,0,0.12190933028856914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,64,128,0,1,fp8,fp8,0,0.1304159959157308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,1,128,0,1,float16,float16,0,0.0962666670481364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,1,128,0,1,float16,fp8,0,0.09676266709963481
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,1,128,0,1,fp8,fp8,0,0.11129066348075867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,4,128,0,1,fp8,fp8,0,0.11262399951616923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,2,128,0,1,float16,float16,0,0.09578133622805278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,2,128,0,1,float16,fp8,0,0.09667733311653137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,2,128,0,1,fp8,fp8,0,0.11152533690134685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,4,128,0,1,float16,float16,0,0.09770666559537251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,4,128,0,1,float16,fp8,0,0.0965173343817393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,8,128,0,1,float16,float16,0,0.0988106628259023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,8,128,0,1,float16,fp8,0,0.09835199515024821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,64,8,128,0,1,fp8,fp8,0,0.11331199606259663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,64,128,0,1,float16,float16,0,0.06976533432801564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,64,128,0,1,float16,fp8,0,0.07032533486684163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,64,128,0,1,fp8,fp8,0,0.07338133454322815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,1,128,0,1,float16,float16,0,0.05455466608206431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,1,128,0,1,float16,fp8,0,0.05425066749254862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,1,128,0,1,fp8,fp8,0,0.06149866680304209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,2,128,0,1,float16,float16,0,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,2,128,0,1,float16,fp8,0,0.054117331902186074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,2,128,0,1,fp8,fp8,0,0.06159466505050659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,4,128,0,1,float16,float16,0,0.05541333556175232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,4,128,0,1,float16,fp8,0,0.05449066559473673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,4,128,0,1,fp8,fp8,0,0.0631466656923294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,8,128,0,1,float16,float16,0,0.05539733171463013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,8,128,0,1,float16,fp8,0,0.05542399982611338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,1,128,0,1,float16,fp8,0,0.03374933451414108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,64,8,128,0,1,fp8,fp8,0,0.06311999758084615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,64,128,0,1,float16,float16,0,0.03722133239110311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,64,128,0,1,float16,fp8,0,0.03611200054486593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,64,128,0,1,fp8,fp8,0,0.04233600199222565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,1,128,0,1,float16,float16,0,0.03245333333810171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,1,128,0,1,fp8,fp8,0,0.03643733263015747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,2,128,0,1,float16,float16,0,0.03276266654332479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,2,128,0,1,float16,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,2,128,0,1,fp8,fp8,0,0.036389333506425224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,4,128,0,1,float16,float16,0,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,4,128,0,1,float16,fp8,0,0.03286399940649668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,4,128,0,1,fp8,fp8,0,0.03856533269087473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,64,128,0,1,fp8,fp8,0,0.026672000686327618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,1,128,0,1,float16,float16,0,0.022240000466505688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,8,128,0,1,float16,float16,0,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,8,128,0,1,float16,fp8,0,0.033904001116752625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,64,8,128,0,1,fp8,fp8,0,0.038362666964530945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,64,128,0,1,float16,float16,0,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,64,128,0,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,1,128,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,1,128,0,1,fp8,fp8,0,0.02459733436505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,2,128,0,1,float16,float16,0,0.022490667800108593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,2,128,0,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,2,128,0,1,fp8,fp8,0,0.02437866727511088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,4,128,0,1,float16,float16,0,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,4,128,0,1,float16,fp8,0,0.024085332949956257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,4,128,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,8,128,0,1,float16,float16,0,0.023845332364241283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,8,128,0,1,float16,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,64,8,128,0,1,fp8,fp8,0,0.02492266645034154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,64,128,0,1,float16,float16,0,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,2,128,0,1,float16,float16,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,64,128,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,64,128,0,1,fp8,fp8,0,0.019850666324297588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,1,128,0,1,float16,float16,0,0.01806933308641116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,4,128,0,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,1,128,0,1,float16,fp8,0,0.017711999515692394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,1,128,0,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,2,128,0,1,float16,fp8,0,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,2,128,0,1,fp8,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,4,128,0,1,float16,float16,0,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,4,128,0,1,float16,fp8,0,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,8,128,0,1,float16,float16,0,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,8,128,0,1,float16,fp8,0,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,64,8,128,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,64,128,0,1,float16,float16,0,0.01618133361140887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,64,128,0,1,float16,fp8,0,0.016373333831628162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,64,128,0,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,1,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,1,128,0,1,float16,fp8,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,2,128,0,1,float16,float16,0,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,4,128,0,1,fp8,fp8,0,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,2,128,0,1,float16,fp8,0,0.01603200038274129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,2,128,0,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,4,128,0,1,float16,float16,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,4,128,0,1,float16,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,8,128,0,1,float16,float16,0,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,8,128,0,1,float16,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,64,8,128,0,1,fp8,fp8,0,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,1,128,0,1,float16,float16,0,0.4609280029932658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,1,128,0,1,float16,fp8,0,0.4605706532796224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,1,128,0,1,fp8,fp8,0,0.6072853406270345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,2,128,0,1,float16,float16,0,0.46350399653116864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,2,128,0,1,float16,fp8,0,0.46270934740702313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,2,128,0,1,fp8,fp8,0,0.6122026840845743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,4,128,0,1,float16,float16,0,0.46902934710184735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,4,128,0,1,float16,fp8,0,0.4668746789296468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,4,128,0,1,fp8,fp8,0,0.6120533148447672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,8,128,0,1,float16,float16,0,0.4766933520634969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,8,128,0,1,float16,fp8,0,0.47468264897664386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,64,8,128,0,1,fp8,fp8,0,0.6168906688690186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,64,128,0,1,float16,float16,0,0.29872532685597736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,64,128,0,1,float16,fp8,0,0.2935093243916829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,1,128,0,1,fp8,fp8,0,0.31458133459091187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,64,128,0,1,fp8,fp8,0,0.3460693359375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,1,128,0,1,float16,float16,0,0.23881065845489502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,1,128,0,1,float16,fp8,0,0.23772267500559488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,2,128,0,1,float16,float16,0,0.2392853299776713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,2,128,0,1,float16,fp8,0,0.23876800139745077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,2,128,0,1,fp8,fp8,0,0.3150346676508586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,4,128,0,1,float16,float16,0,0.24105066061019897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,4,128,0,1,float16,fp8,0,0.24108266830444336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,4,128,0,1,fp8,fp8,0,0.31337066491444904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,8,128,0,1,float16,float16,0,0.24517333507537842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,8,128,0,1,float16,fp8,0,0.2451146642367045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,64,128,0,1,float16,float16,0,0.15593600273132324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,64,8,128,0,1,fp8,fp8,0,0.3176800012588501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,64,128,0,1,float16,fp8,0,0.15553067127863565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,64,128,0,1,fp8,fp8,0,0.18492267529169717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,1,128,0,1,float16,float16,0,0.12495999534924825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,1,128,0,1,float16,fp8,0,0.1258240044116974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,1,128,0,1,fp8,fp8,0,0.16580266753832498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,2,128,0,1,float16,float16,0,0.12683733304341635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,2,128,0,1,float16,fp8,0,0.1269866625467936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,2,128,0,1,fp8,fp8,0,0.1658560037612915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,4,128,0,1,float16,float16,0,0.12925333778063455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,4,128,0,1,float16,fp8,0,0.12867200374603271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,4,128,0,1,fp8,fp8,0,0.16643733779589334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,8,128,0,1,float16,float16,0,0.12919466694196066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,8,128,0,1,float16,fp8,0,0.12986666957537332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,64,8,128,0,1,fp8,fp8,0,0.16911466916402182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,64,128,0,1,float16,float16,0,0.08669867118199666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,64,128,0,1,float16,fp8,0,0.08646933237711589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,64,128,0,1,fp8,fp8,0,0.10145599643389384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,1,128,0,1,float16,float16,0,0.07012266914049785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,1,128,0,1,float16,fp8,0,0.06868800024191539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,1,128,0,1,fp8,fp8,0,0.0890826682249705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,2,128,0,1,float16,float16,0,0.06933866441249847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,2,128,0,1,float16,fp8,0,0.06923733154932658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,2,128,0,1,fp8,fp8,0,0.08955199519793193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,4,128,0,1,float16,float16,0,0.0697920024394989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,4,128,0,1,float16,fp8,0,0.07055466870466869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,4,128,0,1,fp8,fp8,0,0.09171199798583984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,8,128,0,1,float16,float16,0,0.07038400073846181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,8,128,0,1,float16,fp8,0,0.07029866675535838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,64,8,128,0,1,fp8,fp8,0,0.09156800309816997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,64,128,0,1,float16,float16,0,0.045226668318112694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,64,128,0,1,float16,fp8,0,0.04448533554871877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,2,128,0,1,float16,fp8,0,0.04001066585381826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,64,128,0,1,fp8,fp8,0,0.05685866872469584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,1,128,0,1,float16,float16,0,0.04054400076468786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,1,128,0,1,float16,fp8,0,0.04035733391841253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,1,128,0,1,fp8,fp8,0,0.0510453333457311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,2,128,0,1,float16,float16,0,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,2,128,0,1,fp8,fp8,0,0.051488002141316734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,4,128,0,1,float16,float16,0,0.04014399896065394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,4,128,0,1,float16,fp8,0,0.04081599911053976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,4,128,0,1,fp8,fp8,0,0.052933335304260254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,8,128,0,1,float16,float16,0,0.04101333270470301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,8,128,0,1,float16,fp8,0,0.04121066629886627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,64,8,128,0,1,fp8,fp8,0,0.05271466573079427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,64,128,0,1,float16,float16,0,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,64,128,0,1,float16,fp8,0,0.027818667391935985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,64,128,0,1,fp8,fp8,0,0.03366400053103765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,1,128,0,1,float16,float16,0,0.025792000194390614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,1,128,0,1,float16,fp8,0,0.026144000391165417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,1,128,0,1,fp8,fp8,0,0.03252800057331721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,2,128,0,1,float16,float16,0,0.02678400029738744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,2,128,0,1,float16,fp8,0,0.026047999660174053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,8,128,0,1,float16,float16,0,0.026917333404223125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,2,128,0,1,fp8,fp8,0,0.031712000568707786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,4,128,0,1,float16,float16,0,0.02610666553179423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,4,128,0,1,float16,fp8,0,0.0262773334980011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,4,128,0,1,fp8,fp8,0,0.0325546662012736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,8,128,0,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,64,8,128,0,1,fp8,fp8,0,0.03268799930810928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,64,128,0,1,float16,float16,0,0.02012266715367635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,64,128,0,1,float16,fp8,0,0.020074666788180668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,64,128,0,1,fp8,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,1,128,0,1,float16,float16,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,1,128,0,1,float16,fp8,0,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,1,128,0,1,fp8,fp8,0,0.022730665902296703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,2,128,0,1,float16,float16,0,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,2,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,2,128,0,1,fp8,fp8,0,0.022304000953833263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,4,128,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,4,128,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,4,128,0,1,fp8,fp8,0,0.022266666094462078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,8,128,0,1,float16,float16,0,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,8,128,0,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,64,8,128,0,1,fp8,fp8,0,0.02274133265018463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,64,128,0,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,64,128,0,1,float16,fp8,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,64,128,0,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,1,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,1,128,0,1,float16,fp8,0,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,1,128,0,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,2,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,2,128,0,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,2,128,0,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,4,128,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,4,128,0,1,float16,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,4,128,0,1,fp8,fp8,0,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,8,128,0,1,float16,float16,0,0.01618133361140887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,8,128,0,1,float16,fp8,0,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,64,8,128,0,1,fp8,fp8,0,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,64,128,0,1,float16,float16,0,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,64,128,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,64,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,1,128,0,1,float16,float16,0,0.014352000008026758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,1,128,0,1,float16,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,2,128,0,1,float16,float16,0,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,2,128,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,2,128,0,1,fp8,fp8,0,0.01628799984852473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,4,128,0,1,float16,float16,0,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,4,128,0,1,float16,fp8,0,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,4,128,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,8,128,0,1,float16,float16,0,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,8,128,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,64,8,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,1,128,0,1,float16,float16,0,0.38604267438252765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,1,128,0,1,float16,fp8,0,0.38224534193674725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,1,128,0,1,fp8,fp8,0,0.5361013412475586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,2,128,0,1,float16,float16,0,0.38236268361409503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,2,128,0,1,float16,fp8,0,0.3833386500676473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,2,128,0,1,fp8,fp8,0,0.5362559954325358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,4,128,0,1,float16,float16,0,0.38839999834696454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,4,128,0,1,float16,fp8,0,0.3856319983800252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,64,128,0,1,float16,float16,0,0.2389919956525167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,8,128,0,1,fp8,fp8,0,0.538810650507609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,8,128,0,1,float16,float16,0,0.3901706536610921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,4,128,0,1,fp8,fp8,0,0.5395146608352661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,64,8,128,0,1,float16,fp8,0,0.3932853142420451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,64,128,0,1,float16,fp8,0,0.23149333397547403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,64,128,0,1,fp8,fp8,0,0.2975626587867737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,2,128,0,1,float16,fp8,0,0.20245865980784097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,1,128,0,1,float16,float16,0,0.1974560022354126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,1,128,0,1,float16,fp8,0,0.19920533895492554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,1,128,0,1,fp8,fp8,0,0.2768159906069438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,2,128,0,1,float16,float16,0,0.20101332664489746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,2,128,0,1,fp8,fp8,0,0.27778132756551105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,4,128,0,1,float16,float16,0,0.20014933745066324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,4,128,0,1,float16,fp8,0,0.20307199160257974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,4,128,0,1,fp8,fp8,0,0.279039998849233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,8,128,0,1,float16,float16,0,0.20299200216929117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,8,128,0,1,float16,fp8,0,0.20387732982635498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,64,8,128,0,1,fp8,fp8,0,0.2801706592241923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,64,128,0,1,float16,float16,0,0.12329066793123881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,64,128,0,1,float16,fp8,0,0.12176533540089925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,64,128,0,1,fp8,fp8,0,0.15639999508857727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,1,128,0,1,float16,float16,0,0.10619200269381206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,4,128,0,1,float16,float16,0,0.1051680048306783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,1,128,0,1,float16,fp8,0,0.10494400064150493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,1,128,0,1,fp8,fp8,0,0.14477866888046265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,2,128,0,1,float16,float16,0,0.10431466499964397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,2,128,0,1,float16,fp8,0,0.10545066992441814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,2,128,0,1,fp8,fp8,0,0.14477333426475525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,4,128,0,1,float16,fp8,0,0.10539199908574422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,4,128,0,1,fp8,fp8,0,0.1479200025399526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,8,128,0,1,float16,float16,0,0.10608533024787903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,8,128,0,1,float16,fp8,0,0.10689600308736165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,64,8,128,0,1,fp8,fp8,0,0.14709333578745523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,64,128,0,1,float16,float16,0,0.06346666812896729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,64,128,0,1,float16,fp8,0,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,64,128,0,1,fp8,fp8,0,0.08398399750391643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,1,128,0,1,float16,float16,0,0.05875200033187866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,1,128,0,1,float16,fp8,0,0.06049066781997681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,1,128,0,1,fp8,fp8,0,0.08063999811808269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,2,128,0,1,float16,float16,0,0.05974400043487549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,2,128,0,1,float16,fp8,0,0.059605335195859276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,2,128,0,1,fp8,fp8,0,0.07965866724650066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,4,128,0,1,float16,float16,0,0.059578667084376015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,4,128,0,1,float16,fp8,0,0.05992533266544342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,4,128,0,1,fp8,fp8,0,0.08183999856313069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,8,128,0,1,float16,float16,0,0.05922133227189382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,8,128,0,1,float16,fp8,0,0.0606826643149058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,64,8,128,0,1,fp8,fp8,0,0.08152000109354655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,64,128,0,1,float16,float16,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,64,128,0,1,float16,fp8,0,0.036720000207424164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,64,128,0,1,fp8,fp8,0,0.04845866560935974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,1,128,0,1,float16,float16,0,0.03643200049797694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,1,128,0,1,float16,fp8,0,0.03586666782697042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,1,128,0,1,fp8,fp8,0,0.04821866750717163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,2,128,0,1,float16,float16,0,0.03616533428430557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,2,128,0,1,float16,fp8,0,0.035973332822322845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,2,128,0,1,fp8,fp8,0,0.04656533400217692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,4,128,0,1,float16,float16,0,0.03585600107908249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,4,128,0,1,float16,fp8,0,0.037290667494138084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,4,128,0,1,fp8,fp8,0,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,8,128,0,1,float16,float16,0,0.036714665591716766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,8,128,0,1,float16,fp8,0,0.035877334574858345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,64,8,128,0,1,fp8,fp8,0,0.04808533191680908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,64,128,0,1,float16,float16,0,0.02492800106604894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,64,128,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,64,128,0,1,fp8,fp8,0,0.032858667274316154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,1,128,0,1,float16,float16,0,0.024832000335057575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,4,128,0,1,float16,float16,0,0.02425066630045573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,1,128,0,1,float16,fp8,0,0.024495999018351238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,1,128,0,1,fp8,fp8,0,0.029882666965325672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,2,128,0,1,float16,float16,0,0.02476799984773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,2,128,0,1,float16,fp8,0,0.024634666740894318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,2,128,0,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,4,128,0,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,4,128,0,1,fp8,fp8,0,0.03027733415365219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,8,128,0,1,float16,float16,0,0.02446399877468745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,8,128,0,1,float16,fp8,0,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,64,8,128,0,1,fp8,fp8,0,0.030458666384220123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,64,128,0,1,float16,float16,0,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,64,128,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,64,128,0,1,fp8,fp8,0,0.02276800076166789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,1,128,0,1,float16,float16,0,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,1,128,0,1,float16,fp8,0,0.018298666924238205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,1,128,0,1,fp8,fp8,0,0.020794666061798733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,2,128,0,1,float16,float16,0,0.018058666338523228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,2,128,0,1,float16,fp8,0,0.018432000031073887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,2,128,0,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,4,128,0,1,float16,float16,0,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,4,128,0,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,4,128,0,1,fp8,fp8,0,0.022858666876951855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,8,128,0,1,float16,float16,0,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,8,128,0,1,float16,fp8,0,0.0186666672428449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,64,8,128,0,1,fp8,fp8,0,0.021509334444999695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,64,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,64,128,0,1,float16,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,64,128,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,1,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,1,128,0,1,float16,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,1,128,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,2,128,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,8,128,0,1,float16,float16,0,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,2,128,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,2,128,0,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,4,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,4,128,0,1,float16,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,4,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,8,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,64,8,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,64,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,64,128,0,1,float16,fp8,0,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,64,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,1,128,0,1,float16,float16,0,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,1,128,0,1,float16,fp8,0,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,1,128,0,1,fp8,fp8,0,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,2,128,0,1,float16,float16,0,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,2,128,0,1,float16,fp8,0,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,2,128,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,4,128,0,1,float16,float16,0,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,4,128,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,4,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,8,128,0,1,float16,float16,0,0.014549333602190018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,8,128,0,1,float16,fp8,0,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,64,8,128,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,128,0,1,fp8,fp8,0,34.0554453531901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,128,0,1,fp8,fp8,0,33.80221811930338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,128,0,1,float16,fp8,0,49.93279520670573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,1,128,0,1,float16,float16,0,50.89512634277344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,float16,0,50.61854553222656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,fp8,0,50.272613525390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,fp8,0,50.676839192708336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,float16,0,50.810516357421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,4,128,0,1,fp8,fp8,0,33.97591908772787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,128,0,1,fp8,fp8,0,18.329556783040363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,float16,0,25.983072916666668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,fp8,0,26.16308339436849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,128,0,1,float16,float16,0,26.128799438476562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,float16,0,51.11102294921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,128,0,1,fp8,fp8,0,35.43451182047526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,128,0,1,fp8,fp8,0,17.466917673746746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,1,128,0,1,float16,fp8,0,25.139633178710938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,fp8,0,50.2379150390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,128,0,1,fp8,fp8,0,16.979440053304035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,fp8,0,25.245450337727863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,float16,0,25.314730326334637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,128,0,1,fp8,fp8,0,16.87051773071289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,float16,0,25.77068328857422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,fp8,0,25.89466603597005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,128,0,1,fp8,fp8,0,8.8198610941569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,float16,0,13.22231419881185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,fp8,0,13.192586263020834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,float16,0,25.54021962483724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,128,0,1,fp8,fp8,0,17.696085611979168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,128,0,1,fp8,fp8,0,9.203786849975586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,128,0,1,float16,float16,0,13.17187754313151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,1,128,0,1,float16,fp8,0,12.952597300211588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,fp8,0,25.578501383463543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,128,0,1,fp8,fp8,0,8.592341105143229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,float16,0,12.757867177327475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,fp8,0,12.741668701171875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,float16,0,12.761104583740234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,128,0,1,fp8,fp8,0,8.529424031575521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,128,0,1,fp8,fp8,0,8.506634394327799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,fp8,0,12.817818959554037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,float16,0,6.67251714070638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,128,0,1,fp8,fp8,0,4.500405311584473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,fp8,0,6.937583923339844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,float16,0,12.993701934814453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,fp8,0,12.933391571044922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,128,0,1,float16,float16,0,6.596741358439128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,128,0,1,fp8,fp8,0,4.4179948170979815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,1,128,0,1,float16,fp8,0,6.632469177246094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,128,0,1,fp8,fp8,0,4.38917859395345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,float16,0,6.4947357177734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,fp8,0,6.526053110758464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,128,0,1,fp8,fp8,0,4.354645411173503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,float16,0,6.857135772705078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,fp8,0,6.5194136301676435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,128,0,1,fp8,fp8,0,4.41427739461263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,float16,0,6.639008204142253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,fp8,0,6.654186884562175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,128,0,1,fp8,fp8,0,19.796080271402996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,128,0,1,fp8,fp8,0,19.34659703572591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,128,0,1,float16,float16,0,29.696367899576824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,1,128,0,1,float16,fp8,0,29.52027638753255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,fp8,0,30.263402303059895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,float16,0,29.940528869628906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,float16,0,29.96503448486328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,fp8,0,29.55310312906901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,4,128,0,1,fp8,fp8,0,19.84010187784831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,128,0,1,fp8,fp8,0,10.410170873006185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,float16,0,14.932795206705729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,fp8,0,15.289044698079428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,128,0,1,fp8,fp8,0,19.840298970540363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,128,0,1,float16,float16,0,14.703355153401693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,float16,0,28.753028869628906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,128,0,1,fp8,fp8,0,9.986640294392904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,1,128,0,1,float16,fp8,0,15.351812998453775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,fp8,0,29.541056315104168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,128,0,1,fp8,fp8,0,9.852767944335938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,float16,0,14.957771301269531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,fp8,0,15.073562622070312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,float16,0,14.761311848958334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,128,0,1,fp8,fp8,0,10.175402959187826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,fp8,0,15.100906372070312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,128,0,1,fp8,fp8,0,10.038143793741861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,float16,0,7.6038023630778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,128,0,1,fp8,fp8,0,5.289279937744141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,fp8,0,7.642192204793294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,float16,0,14.882303873697916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,128,0,1,float16,float16,0,7.511407852172852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,fp8,0,14.673540751139322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,128,0,1,fp8,fp8,0,4.909040133158366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,1,128,0,1,float16,fp8,0,7.532063802083333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,128,0,1,fp8,fp8,0,4.99513053894043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,float16,0,7.307685216267903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,fp8,0,7.390431722005208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,128,0,1,fp8,fp8,0,4.978341420491536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,float16,0,7.4554398854573565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,fp8,0,7.531738917032878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,128,0,1,fp8,fp8,0,5.0221865971883135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,float16,0,3.8679253260294595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,128,0,1,fp8,fp8,0,2.667109489440918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,fp8,0,3.885792096455892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,float16,0,7.529104232788086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,fp8,0,7.350037256876628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,128,0,1,float16,float16,0,3.8105599085489907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,128,0,1,fp8,fp8,0,2.566160043080648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,1,128,0,1,float16,fp8,0,3.8171733220418296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,128,0,1,fp8,fp8,0,2.5785120328267417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,float16,0,3.840378761291504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,fp8,0,3.7591145833333335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,float16,0,3.7685279846191406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,128,0,1,fp8,fp8,0,2.606368064880371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,fp8,0,3.7557814915974936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,float16,0,3.846757253011068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,128,0,1,fp8,fp8,0,2.6234453519185386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,fp8,0,3.8509225845336914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,128,0,1,fp8,fp8,0,14.083503723144531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,128,0,1,fp8,fp8,0,14.05947240193685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,128,0,1,float16,float16,0,20.697376251220703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,1,128,0,1,float16,fp8,0,21.07326380411784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,fp8,0,20.93177541097005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,float16,0,21.53990936279297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,float16,0,21.073418935139973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,128,0,1,fp8,fp8,0,14.238789876302084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,128,0,1,fp8,fp8,0,7.699055989583333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,float16,0,10.637594858805338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,fp8,0,11.340010325113932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,fp8,0,20.358907063802082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,128,0,1,fp8,fp8,0,14.097087860107422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,float16,0,21.194048563639324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,128,0,1,float16,float16,0,10.240496317545572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,128,0,1,fp8,fp8,0,6.927706400553386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,fp8,0,20.748079935709637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,128,0,1,fp8,fp8,0,6.979125340779622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,1,128,0,1,float16,fp8,0,10.440469106038412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,float16,0,10.494373321533203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,fp8,0,10.221136093139648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,128,0,1,fp8,fp8,0,6.924160003662109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,float16,0,10.26303482055664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,float16,0,5.436389287312825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,128,0,1,fp8,fp8,0,7.088842391967773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,fp8,0,10.562101364135742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,fp8,0,5.431360244750977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,48,128,0,1,fp8,fp8,0,3.754591941833496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,float16,0,10.496298472086588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,fp8,0,10.739962259928385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,128,0,1,float16,float16,0,5.40451176961263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,128,0,1,fp8,fp8,0,3.616208076477051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,1,128,0,1,float16,fp8,0,5.6898988087972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,128,0,1,fp8,fp8,0,3.5119733810424805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,float16,0,5.19869327545166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,fp8,0,5.225712140401204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,float16,0,5.224677403767903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,128,0,1,fp8,fp8,0,3.5288960138956704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,fp8,0,5.359477361043294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,float16,0,2.747034708658854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,128,0,1,fp8,fp8,0,3.5468854904174805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,fp8,0,2.7674080530802407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,float16,0,5.470517476399739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,48,128,0,1,fp8,fp8,0,1.9182559649149578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,fp8,0,5.336741129557292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,128,0,1,float16,float16,0,2.679141362508138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,128,0,1,float16,fp8,0,2.6803998947143555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,1,128,0,1,fp8,fp8,0,1.8614452679951985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,float16,0,2.6792640686035156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,128,0,1,fp8,fp8,0,1.8537119229634602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,fp8,0,2.6288960774739585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,128,0,1,fp8,fp8,0,1.8382933934529622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,float16,0,2.666698773701986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,fp8,0,2.6849759419759116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,float16,0,2.6695305506388345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,fp8,0,2.6794347763061523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,48,8,128,0,1,fp8,fp8,0,1.8425547281901042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,128,0,1,fp8,fp8,0,18.600666046142578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,128,0,1,fp8,fp8,0,18.285616556803387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,128,0,1,float16,float16,0,27.254987080891926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,1,128,0,1,float16,fp8,0,27.14269765218099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,float16,0,27.65099843343099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,fp8,0,28.20282236735026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,float16,0,27.591087341308594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,fp8,0,27.690511067708332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,4,128,0,1,fp8,fp8,0,19.10540262858073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,128,0,1,fp8,fp8,0,9.62709871927897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,float16,0,13.803301493326822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,128,0,1,float16,float16,0,13.388506571451822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,fp8,0,14.201157887776693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,128,0,1,fp8,fp8,0,18.327611287434895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,float16,0,27.14966328938802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,128,0,1,fp8,fp8,0,9.159088134765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,1,128,0,1,float16,fp8,0,14.020821889241537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,128,0,1,fp8,fp8,0,9.31820297241211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,fp8,0,27.36505126953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,float16,0,13.315391540527344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,fp8,0,13.6692746480306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,128,0,1,fp8,fp8,0,9.260143915812174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,float16,0,13.746495564778646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,fp8,0,13.926639556884766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,float16,0,7.729855855305989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,128,0,1,fp8,fp8,0,5.052026748657227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,fp8,0,7.162186940511067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,128,0,1,fp8,fp8,0,9.744650522867838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,float16,0,14.017045338948568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,128,0,1,float16,float16,0,6.939903895060222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,fp8,0,14.108992258707682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,128,0,1,float16,fp8,0,6.978085199991862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,1,128,0,1,fp8,fp8,0,4.583919843037923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,128,0,1,fp8,fp8,0,4.833754539489746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,float16,0,6.7736161549886065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,fp8,0,6.738746643066406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,128,0,1,fp8,fp8,0,4.669594764709473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,float16,0,6.915274937947591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,fp8,0,6.922261555989583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,128,0,1,fp8,fp8,0,4.890560150146484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,float16,0,3.6071573893229165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,128,0,1,fp8,fp8,0,2.502682685852051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,fp8,0,3.6096906661987305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,float16,0,6.85043207804362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,fp8,0,6.773589452107747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,128,0,1,float16,float16,0,3.3593174616495767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,128,0,1,fp8,fp8,0,2.3934666315714517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,1,128,0,1,float16,fp8,0,3.4510294596354165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,128,0,1,fp8,fp8,0,2.3147145907084146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,float16,0,3.4008426666259766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,fp8,0,3.45415465037028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,float16,0,3.3042774200439453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,128,0,1,fp8,fp8,0,2.3931360244750977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,fp8,0,3.4991305669148765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,float16,0,1.777546723683675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,float16,0,3.412517229715983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,128,0,1,fp8,fp8,0,2.3485706647237143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,128,0,1,fp8,fp8,0,1.3013226985931396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,fp8,0,1.7806399663289387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,fp8,0,3.4847466150919595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,128,0,1,float16,float16,0,1.71506134668986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,128,0,1,float16,fp8,0,1.7118933995564778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,1,128,0,1,fp8,fp8,0,1.2974133491516113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,float16,0,1.7249706586201985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,128,0,1,fp8,fp8,0,1.2838293711344402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,fp8,0,1.7884480158487956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,float16,0,1.717840035756429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,fp8,0,1.724837303161621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,4,128,0,1,fp8,fp8,0,1.2597546577453613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,float16,0,1.7736105918884277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,128,0,1,fp8,fp8,0,1.2512106895446777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,fp8,0,1.7608906428019206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,128,0,1,fp8,fp8,0,10.981221516927084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,128,0,1,fp8,fp8,0,10.977668762207031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,128,0,1,float16,fp8,0,15.811519622802734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,1,128,0,1,float16,float16,0,15.959968566894531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,float16,0,15.630821228027344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,fp8,0,15.721370697021484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,float16,0,15.697808583577475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,fp8,0,15.814197540283203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,4,128,0,1,fp8,fp8,0,11.373797098795572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,128,0,1,fp8,fp8,0,5.852319717407227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,float16,0,8.09780248006185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,fp8,0,8.583173116048178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,128,0,1,float16,float16,0,7.944010416666667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,128,0,1,fp8,fp8,0,11.274895985921225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,float16,0,16.027477264404297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,128,0,1,fp8,fp8,0,5.414000193277995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,1,128,0,1,float16,fp8,0,8.310159683227539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,128,0,1,fp8,fp8,0,5.635264078776042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,fp8,0,16.71251169840495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,float16,0,8.54751968383789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,fp8,0,8.440282821655273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,128,0,1,fp8,fp8,0,6.026426951090495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,float16,0,8.039898554484049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,fp8,0,8.9442507425944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,float16,0,4.163466771443685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,128,0,1,fp8,fp8,0,5.5259145100911455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,fp8,0,4.08411184946696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,float16,0,8.060933430989584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,48,128,0,1,fp8,fp8,0,2.894709269205729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,fp8,0,8.025738398234049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,128,0,1,float16,float16,0,3.9517653783162436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,128,0,1,fp8,fp8,0,2.7197014490763345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,1,128,0,1,float16,fp8,0,3.967712084452311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,128,0,1,fp8,fp8,0,2.7350667317708335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,float16,0,3.892709414164225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,fp8,0,3.820458730061849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,128,0,1,fp8,fp8,0,2.8043734232584634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,float16,0,3.7690292994181314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,fp8,0,4.008629480997722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,float16,0,2.026858647664388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,128,0,1,fp8,fp8,0,2.784735997517904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,128,0,1,fp8,fp8,0,1.4932053883870442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,fp8,0,4.0912478764851885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,float16,0,4.040479977925618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,128,0,1,float16,float16,0,1.9423519770304363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,fp8,0,2.098128000895182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,128,0,1,float16,fp8,0,1.9667466481526692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,1,128,0,1,fp8,fp8,0,1.4332906405131023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,float16,0,1.9660693804423015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,128,0,1,fp8,fp8,0,1.4448693593343098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,fp8,0,2.00652805964152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,128,0,1,fp8,fp8,0,1.414591948191325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,float16,0,1.9366772969563801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,fp8,0,1.9501546223958333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,float16,0,1.9126027425130208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,float16,0,1.0663999716440837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,fp8,0,2.019791920979818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,48,8,128,0,1,fp8,fp8,0,1.4326666196187336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,fp8,0,1.0923199653625488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,48,128,0,1,fp8,fp8,0,0.8159786860148112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,128,0,1,float16,float16,0,1.0820106665293376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,128,0,1,float16,fp8,0,1.0389066537221272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,1,128,0,1,fp8,fp8,0,0.7880586783091227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,float16,0,1.0442986488342285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,float16,0,1.0449706713358562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,128,0,1,fp8,fp8,0,0.7813706398010254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,fp8,0,1.0867040157318115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,128,0,1,fp8,fp8,0,0.7834346294403076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,fp8,0,1.0432746410369873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,float16,0,1.0389546553293865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,fp8,0,1.0476799805959065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,48,8,128,0,1,fp8,fp8,0,0.787834644317627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,128,0,1,fp8,fp8,0,10.736400604248047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,128,0,1,fp8,fp8,0,10.830122629801432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,128,0,1,float16,float16,0,15.282703399658203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,1,128,0,1,float16,fp8,0,15.218395233154297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,float16,0,15.0533816019694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,fp8,0,15.037413279215494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,float16,0,15.314570109049479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,128,0,1,fp8,fp8,0,5.9167734781901045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,128,0,1,fp8,fp8,0,10.69937006632487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,float16,0,7.925498962402344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,fp8,0,8.067802429199219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,128,0,1,fp8,fp8,0,10.875199635823568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,fp8,0,15.1540158589681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,128,0,1,fp8,fp8,0,5.345034917195638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,float16,0,17.03583526611328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,fp8,0,16.34389877319336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,128,0,1,float16,float16,0,7.705237070719401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,1,128,0,1,float16,fp8,0,7.5376536051432295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,float16,0,7.787786483764648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,128,0,1,fp8,fp8,0,6.157402674357097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,fp8,0,7.616010665893555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,128,0,1,fp8,fp8,0,5.508026758829753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,float16,0,4.040170669555664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,float16,0,7.590218861897786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,128,0,1,fp8,fp8,0,5.597616195678711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,fp8,0,3.9931519826253257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,fp8,0,7.863087972005208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,fp8,0,7.7801971435546875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,float16,0,7.821744283040364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,48,128,0,1,fp8,fp8,0,2.951007843017578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,128,0,1,float16,float16,0,3.726655960083008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,128,0,1,fp8,fp8,0,2.6348959604899087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,1,128,0,1,float16,fp8,0,3.690138816833496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,float16,0,3.7424052556355796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,128,0,1,fp8,fp8,0,2.6366666158040366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,fp8,0,3.8534507751464844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,128,0,1,fp8,fp8,0,2.6456106503804526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,float16,0,3.6987358729044595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,fp8,0,3.8234453201293945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,float16,0,1.978277365366618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,float16,0,3.8178399403889975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,fp8,0,1.9556907018025715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,128,0,1,fp8,fp8,0,2.7074454625447593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,fp8,0,3.7230774561564126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,48,128,0,1,fp8,fp8,0,1.4655146598815918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,128,0,1,float16,float16,0,1.8702346483866374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,128,0,1,fp8,fp8,0,1.382080078125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,1,128,0,1,float16,fp8,0,1.8231946627298992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,float16,0,1.8619839350382488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,128,0,1,fp8,fp8,0,1.3878614107767742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,fp8,0,1.8478506406148274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,float16,0,1.8118613560994465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,128,0,1,fp8,fp8,0,1.365978717803955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,fp8,0,1.867813269297282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,float16,0,1.8794132868448894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,float16,0,0.9849332968393961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,128,0,1,fp8,fp8,0,1.3875840504964192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,fp8,0,1.8367946942647297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,fp8,0,1.0135040283203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,48,128,0,1,fp8,fp8,0,0.805344025293986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,128,0,1,float16,float16,0,0.9542880058288574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,128,0,1,fp8,fp8,0,0.7586186726888021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,1,128,0,1,float16,fp8,0,0.9581387042999268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,float16,0,0.9589653015136719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,fp8,0,0.9586186408996582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,2,128,0,1,fp8,fp8,0,0.7580266793568929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,float16,0,0.9593066374460856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,fp8,0,0.964021364847819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,4,128,0,1,fp8,fp8,0,0.7339519659678141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,float16,0,0.9561279614766439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,fp8,0,0.9616906642913818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,48,8,128,0,1,fp8,fp8,0,0.7375679810841879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,float16,0,0.5521546602249146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,fp8,0,0.5605013370513916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,48,128,0,1,fp8,fp8,0,0.42960532506306964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,128,0,1,float16,float16,0,0.5415199995040894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,128,0,1,float16,fp8,0,0.5431413253148397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,1,128,0,1,fp8,fp8,0,0.39959466457366943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,float16,0,0.5382986863454183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,fp8,0,0.5406879981358846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,2,128,0,1,fp8,fp8,0,0.40386664867401123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,float16,0,0.5416053136189779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,fp8,0,0.5425333182017008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,4,128,0,1,fp8,fp8,0,0.4026453495025635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,float16,0,0.544426679611206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,128,0,1,fp8,fp8,0,0.40326400597890216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,fp8,0,0.5468159914016724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,128,0,1,fp8,fp8,0,6.648186365763347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,128,0,1,fp8,fp8,0,6.7514292399088545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,128,0,1,float16,float16,0,9.08679453531901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,1,128,0,1,float16,fp8,0,9.079776128133139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,float16,0,9.329850514729818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,fp8,0,9.166138966878256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,float16,0,9.162319819132486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,fp8,0,9.273328145345053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,4,128,0,1,fp8,fp8,0,6.750170389811198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,128,0,1,fp8,fp8,0,3.6675678888956704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,float16,0,4.970261255900065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,fp8,0,4.904458681742351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,128,0,1,float16,float16,0,4.51686414082845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,128,0,1,fp8,fp8,0,6.767546971638997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,float16,0,9.36518923441569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,fp8,0,9.170192082722982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,128,0,1,fp8,fp8,0,3.30679988861084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,1,128,0,1,float16,fp8,0,4.5669600168863935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,128,0,1,fp8,fp8,0,3.2924267450968423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,float16,0,4.574917475382487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,fp8,0,4.5635732014973955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,float16,0,4.591285387674968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,128,0,1,fp8,fp8,0,3.3523680369059243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,fp8,0,4.391205469767253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,float16,0,2.370053291320801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,128,0,1,fp8,fp8,0,3.3471412658691406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,float16,0,4.600005467732747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,128,0,1,fp8,fp8,0,1.8287839889526367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,fp8,0,4.45195738474528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,fp8,0,2.416565259297689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,128,0,1,float16,float16,0,2.172218640645345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,128,0,1,float16,fp8,0,2.1908586819966636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,1,128,0,1,fp8,fp8,0,1.6415680249532063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,float16,0,2.197370688120524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,128,0,1,fp8,fp8,0,1.6587786674499512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,fp8,0,2.2106773058573403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,128,0,1,fp8,fp8,0,1.672346591949463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,float16,0,2.274672031402588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,fp8,0,2.1825119654337564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,float16,0,2.1647146542867026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,fp8,0,2.2705119450887046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,fp8,0,1.196773370107015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,float16,0,1.175935983657837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,48,8,128,0,1,fp8,fp8,0,1.6778559684753418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,48,128,0,1,fp8,fp8,0,0.9513599872589111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,128,0,1,float16,float16,0,1.123306671778361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,128,0,1,fp8,fp8,0,0.880021333694458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,1,128,0,1,float16,fp8,0,1.141434669494629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,float16,0,1.2040800253550212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,fp8,0,1.1287039915720622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,2,128,0,1,fp8,fp8,0,0.8680693308512369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,float16,0,1.110869328180949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,fp8,0,1.1148426532745361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,4,128,0,1,fp8,fp8,0,0.8652160167694092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,fp8,0,1.1207253138224285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,float16,0,1.1165813604990642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,48,8,128,0,1,fp8,fp8,0,0.8800053596496582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,128,0,1,float16,float16,0,0.5997866789499918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,float16,0,0.6280266841252645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,fp8,0,0.6358986695607504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,48,128,0,1,fp8,fp8,0,0.5134826501210531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,128,0,1,fp8,fp8,0,0.4761813481648763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,1,128,0,1,float16,fp8,0,0.6038613319396973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,float16,0,0.601301352183024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,128,0,1,fp8,fp8,0,0.4750773509343465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,fp8,0,0.6038399934768677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,float16,0,0.6045173406600952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,fp8,0,0.6068640152613322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,4,128,0,1,fp8,fp8,0,0.475322683652242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,float16,0,0.603440006573995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,fp8,0,0.6098933219909668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,48,8,128,0,1,fp8,fp8,0,0.4793920119603475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,float16,0,0.35949865976969403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,128,0,1,fp8,fp8,0,0.26340266068776447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,fp8,0,0.3675520022710164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,48,128,0,1,fp8,fp8,0,0.287173330783844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,128,0,1,float16,float16,0,0.34996267159779865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,1,128,0,1,float16,fp8,0,0.3511253197987874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,float16,0,0.3496053218841553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,fp8,0,0.35156798362731934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,2,128,0,1,fp8,fp8,0,0.26690133412679035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,float16,0,0.3517599900563558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,fp8,0,0.3563573360443115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,4,128,0,1,fp8,fp8,0,0.2638933261235555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,float16,0,0.3543999989827474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,fp8,0,0.355840007464091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,48,8,128,0,1,fp8,fp8,0,0.2663466731707255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,128,0,1,fp8,fp8,0,7.033472061157227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,128,0,1,fp8,fp8,0,7.0681813557942705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,128,0,1,float16,float16,0,9.34286371866862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,1,128,0,1,float16,fp8,0,9.233376185099283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,float16,0,9.3177490234375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,fp8,0,9.262816111246744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,float16,0,9.387695948282877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,fp8,0,9.417093276977539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,4,128,0,1,fp8,fp8,0,7.131706873575847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,float16,0,4.971503893534343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,128,0,1,fp8,fp8,0,3.915408134460449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,fp8,0,5.1129865646362305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,128,0,1,float16,float16,0,4.615903854370117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,128,0,1,fp8,fp8,0,7.185354868570964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,float16,0,9.382816314697266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,fp8,0,9.46835199991862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,128,0,1,fp8,fp8,0,3.497189203898112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,1,128,0,1,float16,fp8,0,4.6089121500651045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,128,0,1,fp8,fp8,0,3.550304094950358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,float16,0,4.55955187479655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,fp8,0,4.594095865885417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,float16,0,4.604373296101888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,128,0,1,fp8,fp8,0,3.5560906728108725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,fp8,0,4.646426518758138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,float16,0,2.483194669087728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,fp8,0,2.4250879287719727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,48,128,0,1,fp8,fp8,0,2.013312021891276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,128,0,1,fp8,fp8,0,3.6414880752563477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,float16,0,4.626250584920247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,fp8,0,4.645589192708333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,128,0,1,float16,float16,0,2.203114668528239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,128,0,1,float16,fp8,0,2.171738624572754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,1,128,0,1,fp8,fp8,0,1.7483946482340496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,float16,0,2.142394701639811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,128,0,1,fp8,fp8,0,1.7393706639607747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,fp8,0,2.1706506411234536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,float16,0,2.191274642944336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,fp8,0,2.2103306452433267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,4,128,0,1,fp8,fp8,0,1.758581320444743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,float16,0,2.247706731160482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,float16,0,1.2190453211466472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,128,0,1,fp8,fp8,0,1.7715039253234863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,fp8,0,2.2261172930399575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,128,0,1,fp8,fp8,0,1.0279306570688884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,fp8,0,1.2226026852925618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,128,0,1,float16,float16,0,1.1012053489685059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,128,0,1,float16,fp8,0,1.1285226345062256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,1,128,0,1,fp8,fp8,0,0.9063359896341959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,float16,0,1.1056266625722249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,fp8,0,1.1772106488545735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,2,128,0,1,fp8,fp8,0,0.898080031077067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,float16,0,1.113104025522868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,fp8,0,1.1156693299611409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,float16,0,0.6284106572469076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,fp8,0,1.1237013339996338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,4,128,0,1,fp8,fp8,0,0.9047786394755045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,float16,0,1.1094506581624348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,48,8,128,0,1,fp8,fp8,0,0.9250293572743734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,fp8,0,0.6362560192743937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,128,0,1,float16,float16,0,0.5844159921010336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,48,128,0,1,fp8,fp8,0,0.5407040119171143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,128,0,1,float16,fp8,0,0.5888586839040121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,1,128,0,1,fp8,fp8,0,0.48211201032002765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,float16,0,0.5881173213322958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,float16,0,0.585749348004659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,fp8,0,0.5909173488616943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,2,128,0,1,fp8,fp8,0,0.48790931701660156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,fp8,0,0.5929386615753174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,4,128,0,1,fp8,fp8,0,0.48094932238260907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,float16,0,0.589631994565328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,fp8,0,0.5928159952163696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,48,8,128,0,1,fp8,fp8,0,0.4907253185908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,float16,0,0.3431573311487834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,fp8,0,0.35148266951243085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,float16,0,0.325653334458669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,48,128,0,1,fp8,fp8,0,0.2938506603240967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,128,0,1,float16,float16,0,0.32727466026941937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,128,0,1,float16,fp8,0,0.3269706765810649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,fp8,0,0.3287786642710368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,1,128,0,1,fp8,fp8,0,0.25668267409006756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,fp8,0,0.3279946645100911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,2,128,0,1,fp8,fp8,0,0.2579573392868042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,float16,0,0.3280373414357503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,4,128,0,1,fp8,fp8,0,0.25913600126902264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,float16,0,0.33028266827265423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,fp8,0,0.33058132727940875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,48,8,128,0,1,fp8,fp8,0,0.25915199518203735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,float16,0,0.19076800346374512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,fp8,0,0.19538132349650064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,48,128,0,1,fp8,fp8,0,0.16831467549006143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,128,0,1,float16,float16,0,0.179967999458313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,128,0,1,float16,fp8,0,0.1809013287226359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,1,128,0,1,fp8,fp8,0,0.15336533387502035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,float16,0,0.18152532974878946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,fp8,0,0.18336532513300577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,2,128,0,1,fp8,fp8,0,0.1527839998404185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,float16,0,0.18290666739145914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,fp8,0,0.18230400482813516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,4,128,0,1,fp8,fp8,0,0.155157337586085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,fp8,0,0.1839466691017151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,float16,0,0.18148799737294516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,48,8,128,0,1,fp8,fp8,0,0.15571199854214987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,128,0,1,fp8,fp8,0,4.581957181294759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,128,0,1,float16,float16,0,5.85154660542806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,1,128,0,1,float16,fp8,0,5.847530364990234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,float16,0,5.874485015869141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,128,0,1,fp8,fp8,0,4.627514521280925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,fp8,0,5.881674448649089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,float16,0,5.880352020263672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,fp8,0,5.80681037902832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,4,128,0,1,fp8,fp8,0,4.667850812276204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,float16,0,3.2473653157552085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,128,0,1,fp8,fp8,0,2.6754026412963867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,128,0,1,float16,float16,0,2.7786026000976562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,fp8,0,3.2854080200195312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,128,0,1,fp8,fp8,0,4.736837387084961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,float16,0,5.949471791585286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,fp8,0,5.929098765055339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,128,0,1,float16,fp8,0,2.821178754170736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,1,128,0,1,fp8,fp8,0,2.326698621114095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,float16,0,2.81604798634847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,128,0,1,fp8,fp8,0,2.3169333140055337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,fp8,0,2.8512585957845054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,float16,0,2.835829416910807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,128,0,1,fp8,fp8,0,2.34116268157959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,fp8,0,2.8084214528401694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,float16,0,2.8399839401245117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,float16,0,1.5426239967346191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,fp8,0,2.926197369893392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,fp8,0,1.5697867075602214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,48,8,128,0,1,fp8,fp8,0,2.390735944112142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,48,128,0,1,fp8,fp8,0,1.3457333246866863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,128,0,1,float16,float16,0,1.3783253033955891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,128,0,1,float16,fp8,0,1.4029547373453777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,1,128,0,1,fp8,fp8,0,1.1914880275726318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,float16,0,1.3787040710449219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,fp8,0,1.3815627098083496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,2,128,0,1,fp8,fp8,0,1.1917706330617268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,float16,0,1.376362641652425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,fp8,0,1.392367998758952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,4,128,0,1,fp8,fp8,0,1.1811947027842205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,float16,0,1.384394645690918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,float16,0,0.7868053118387858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,fp8,0,1.4116800626118977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,48,8,128,0,1,fp8,fp8,0,1.1971999804178874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,fp8,0,0.7993066310882568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,48,128,0,1,fp8,fp8,0,0.7026293277740479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,128,0,1,float16,float16,0,0.7118346691131592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,128,0,1,fp8,fp8,0,0.6204320192337036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,1,128,0,1,float16,fp8,0,0.7351306279500326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,float16,0,0.7081226507822672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,fp8,0,0.7192213535308838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,2,128,0,1,fp8,fp8,0,0.610864003499349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,float16,0,0.7138506571451823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,fp8,0,0.7240800062815348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,4,128,0,1,fp8,fp8,0,0.6237333218256632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,float16,0,0.7189813454945883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,fp8,0,0.7260426680246989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,float16,0,0.41389334201812744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,48,8,128,0,1,fp8,fp8,0,0.6227573156356812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,fp8,0,0.42293334007263184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,48,128,0,1,fp8,fp8,0,0.3757813374201457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,128,0,1,float16,float16,0,0.3845920165379842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,128,0,1,float16,fp8,0,0.38706668217976886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,1,128,0,1,fp8,fp8,0,0.3301600019137065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,float16,0,0.38362665971120197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,fp8,0,0.38838934898376465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,float16,0,0.38549331823984784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,2,128,0,1,fp8,fp8,0,0.331493337949117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,float16,0,0.38663466771443683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,fp8,0,0.3893653154373169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,4,128,0,1,fp8,fp8,0,0.333840012550354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,fp8,0,0.3935519854227702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,48,8,128,0,1,fp8,fp8,0,0.33553600311279297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,float16,0,0.2328746716181437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,fp8,0,0.23990933100382486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,48,128,0,1,fp8,fp8,0,0.20043732722600302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,128,0,1,float16,float16,0,0.218725323677063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,128,0,1,float16,fp8,0,0.21954667568206787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,1,128,0,1,fp8,fp8,0,0.17639466126759848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,float16,0,0.21934932470321655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,fp8,0,0.21870932976404825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,2,128,0,1,fp8,fp8,0,0.17571200927098593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,float16,0,0.22288533051808676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,fp8,0,0.220634659131368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,4,128,0,1,fp8,fp8,0,0.17966934045155844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,float16,0,0.2204479972521464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,fp8,0,0.22292266289393106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,48,8,128,0,1,fp8,fp8,0,0.17972266674041748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,float16,0,0.13149333000183105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,fp8,0,0.13513599832852682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,48,128,0,1,fp8,fp8,0,0.12097066640853882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,128,0,1,float16,float16,0,0.12474133570988973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,128,0,1,float16,fp8,0,0.12603200475374857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,1,128,0,1,fp8,fp8,0,0.10751466949780782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,float16,0,0.12483200430870056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,fp8,0,0.12544533610343933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,2,128,0,1,fp8,fp8,0,0.10761599739392598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,float16,0,0.12502933541933695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,fp8,0,0.12621333201726279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,4,128,0,1,fp8,fp8,0,0.11016533772150676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,float16,0,0.12602133552233377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,fp8,0,0.1269706686337789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,48,8,128,0,1,fp8,fp8,0,0.11225600043932597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,128,0,1,fp8,fp8,0,5.201530774434407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,128,0,1,float16,float16,0,6.081066767374675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,1,128,0,1,float16,fp8,0,5.925930658976237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,128,0,1,fp8,fp8,0,5.242730776468913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,fp8,0,6.128746668497722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,float16,0,6.21394157409668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,float16,0,6.240682601928711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,fp8,0,6.25492795308431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,float16,0,3.5164000193277993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,fp8,0,3.5172106424967446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,4,128,0,1,fp8,fp8,0,5.636175791422526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,48,128,0,1,fp8,fp8,0,2.95036252339681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,128,0,1,float16,float16,0,2.86028258005778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,float16,0,6.386821111043294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,128,0,1,fp8,fp8,0,5.6912797292073565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,fp8,0,6.381669362386067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,128,0,1,float16,fp8,0,2.8944374720255532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,1,128,0,1,fp8,fp8,0,2.6251039505004883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,float16,0,2.926063855489095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,fp8,0,2.9112106959025064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,2,128,0,1,fp8,fp8,0,2.673877398173014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,float16,0,3.098997433980306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,fp8,0,3.103989283243815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,4,128,0,1,fp8,fp8,0,2.839845339457194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,float16,0,3.1236852010091147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,float16,0,1.732650597890218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,fp8,0,3.1528905232747397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,fp8,0,1.7007946968078613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,48,128,0,1,fp8,fp8,0,1.4827574094136555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,128,0,1,float16,float16,0,1.4346027374267578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,48,8,128,0,1,fp8,fp8,0,2.8781493504842124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,128,0,1,float16,fp8,0,1.4364693959554036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,1,128,0,1,fp8,fp8,0,1.3298880259195964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,float16,0,1.476352055867513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,fp8,0,1.4493865966796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,2,128,0,1,fp8,fp8,0,1.3214826583862305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,float16,0,1.5132320721944172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,fp8,0,1.5152959823608398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,4,128,0,1,fp8,fp8,0,1.4328586260477703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,float16,0,1.5186667442321777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,float16,0,0.8675626913706461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,fp8,0,1.5185119311014812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,fp8,0,0.8513759771982828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,48,8,128,0,1,fp8,fp8,0,1.4387359619140625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,48,128,0,1,fp8,fp8,0,0.7442560195922852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,128,0,1,float16,fp8,0,0.7326506773630778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,128,0,1,float16,float16,0,0.7343093554178873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,1,128,0,1,fp8,fp8,0,0.6611093282699585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,float16,0,0.740768035252889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,fp8,0,0.7349440256754557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,2,128,0,1,fp8,fp8,0,0.6644426584243774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,float16,0,0.7578240235646566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,fp8,0,0.7579840024312338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,float16,0,0.7608053684234619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,4,128,0,1,fp8,fp8,0,0.7217386563618978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,fp8,0,0.7563839753468832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,float16,0,0.44338667392730713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,48,8,128,0,1,fp8,fp8,0,0.7339253425598145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,fp8,0,0.435477336247762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,48,128,0,1,fp8,fp8,0,0.3831520080566406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,128,0,1,float16,float16,0,0.3801120122273763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,128,0,1,float16,fp8,0,0.381440003712972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,1,128,0,1,fp8,fp8,0,0.33511467774709064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,float16,0,0.3814133405685425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,fp8,0,0.37945600350697833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,2,128,0,1,fp8,fp8,0,0.34110931555430096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,float16,0,0.3906826575597127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,fp8,0,0.3859359820683797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,4,128,0,1,fp8,fp8,0,0.3667626778284709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,float16,0,0.38929065068562824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,fp8,0,0.3856853246688843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,48,8,128,0,1,fp8,fp8,0,0.3742506504058838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,float16,0,0.23651200532913208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,fp8,0,0.23301867643992105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,48,128,0,1,fp8,fp8,0,0.18911999464035034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,128,0,1,float16,float16,0,0.20164267222086588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,128,0,1,float16,fp8,0,0.20175466934839884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,1,128,0,1,fp8,fp8,0,0.1604586640993754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,float16,0,0.20268799861272177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,fp8,0,0.202890674273173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,2,128,0,1,fp8,fp8,0,0.16149866580963135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,float16,0,0.20600533485412598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,fp8,0,0.20715200901031494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,4,128,0,1,fp8,fp8,0,0.17570666472117105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,float16,0,0.2104853391647339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,fp8,0,0.206112007300059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,128,0,1,float16,fp8,0,0.1069546639919281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,48,8,128,0,1,fp8,fp8,0,0.1767680048942566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,float16,0,0.12691199779510498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,fp8,0,0.12468266487121582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,48,128,0,1,fp8,fp8,0,0.10313600301742554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,128,0,1,float16,float16,0,0.10569066802660625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,1,128,0,1,fp8,fp8,0,0.08802133798599243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,float16,0,0.10662933190663655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,fp8,0,0.10739733775456746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,fp8,0,0.1090666651725769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,2,128,0,1,fp8,fp8,0,0.08690133690834045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,float16,0,0.10785599549611409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,fp8,0,0.10786666472752889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,4,128,0,1,fp8,fp8,0,0.0937546690305074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,float16,0,0.10920000076293945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,48,8,128,0,1,fp8,fp8,0,0.09531733393669128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,float16,0,0.06675733129183452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,fp8,0,0.06629333396752675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,48,128,0,1,fp8,fp8,0,0.059450666109720864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,128,0,1,float16,float16,0,0.058335999647776283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,128,0,1,float16,fp8,0,0.05774400134881338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,1,128,0,1,fp8,fp8,0,0.04982399940490723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,float16,0,0.05874133110046387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,fp8,0,0.0584799995024999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,2,128,0,1,fp8,fp8,0,0.05007466673851013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,float16,0,0.0601440022389094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,fp8,0,0.05982933441797892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,4,128,0,1,fp8,fp8,0,0.053344001372655235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,float16,0,0.059664001067479454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,fp8,0,0.05967999994754791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,48,8,128,0,1,fp8,fp8,0,0.053557331363360085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,1,128,0,1,float16,float16,0,4.4235413869222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,1,128,0,1,fp8,fp8,0,4.264261245727539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,1,128,0,1,float16,fp8,0,4.386965433756511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,2,128,0,1,float16,float16,0,4.697536150614421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,2,128,0,1,fp8,fp8,0,4.235461235046387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,2,128,0,1,float16,fp8,0,4.672848065694173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,4,128,0,1,float16,float16,0,4.972410519917806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,4,128,0,1,float16,fp8,0,4.957311948140462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,48,128,0,1,float16,float16,0,2.7991838455200195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,4,128,0,1,fp8,fp8,0,4.704928080240886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,48,128,0,1,float16,fp8,0,2.7405598958333335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,48,128,0,1,fp8,fp8,0,2.4940640131632485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,8,128,0,1,float16,float16,0,4.898415883382161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,8,128,0,1,float16,fp8,0,4.933343887329102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,48,8,128,0,1,fp8,fp8,0,4.722410519917806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,1,128,0,1,float16,float16,0,2.217088063557943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,1,128,0,1,float16,fp8,0,2.205519994099935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,1,128,0,1,fp8,fp8,0,2.1257813771565757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,2,128,0,1,float16,float16,0,2.3111252784729004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,2,128,0,1,float16,fp8,0,2.291680018107096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,2,128,0,1,fp8,fp8,0,2.1334826151529946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,4,128,0,1,float16,float16,0,2.4095306396484375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,4,128,0,1,float16,fp8,0,2.42905060450236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,4,128,0,1,fp8,fp8,0,2.3263947168986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,8,128,0,1,float16,float16,0,2.4906506538391113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,48,128,0,1,float16,float16,0,1.4038666089375813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,48,128,0,1,float16,fp8,0,1.3459946314493816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,8,128,0,1,float16,fp8,0,2.451258659362793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,48,128,0,1,fp8,fp8,0,1.2186720371246338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,48,8,128,0,1,fp8,fp8,0,2.392063935597738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,1,128,0,1,float16,float16,0,1.1187893549601238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,1,128,0,1,float16,fp8,0,1.104373296101888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,1,128,0,1,fp8,fp8,0,1.056997299194336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,2,128,0,1,float16,float16,0,1.131941318511963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,2,128,0,1,float16,fp8,0,1.1299200057983398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,2,128,0,1,fp8,fp8,0,1.0687466462453206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,4,128,0,1,float16,float16,0,1.1841333707173665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,4,128,0,1,float16,fp8,0,1.1980853080749512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,4,128,0,1,fp8,fp8,0,1.1778773466746013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,8,128,0,1,float16,float16,0,1.1971466541290283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,8,128,0,1,float16,fp8,0,1.2023733456929524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,48,128,0,1,float16,float16,0,0.7093066374460856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,48,8,128,0,1,fp8,fp8,0,1.1853280067443848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,1,128,0,1,float16,fp8,0,0.5636533498764038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,48,128,0,1,float16,fp8,0,0.6851360003153483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,48,128,0,1,fp8,fp8,0,0.6146399974822998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,1,128,0,1,float16,float16,0,0.5678773323694865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,1,128,0,1,fp8,fp8,0,0.5291626850763956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,2,128,0,1,float16,float16,0,0.5801920096079508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,2,128,0,1,float16,fp8,0,0.5761653184890747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,2,128,0,1,fp8,fp8,0,0.5308746496836344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,4,128,0,1,float16,float16,0,0.5930933157602946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,4,128,0,1,float16,fp8,0,0.5954666535059611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,4,128,0,1,fp8,fp8,0,0.5937066475550333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,8,128,0,1,float16,float16,0,0.5937493244806925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,8,128,0,1,float16,fp8,0,0.5944000085194906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,48,8,128,0,1,fp8,fp8,0,0.6051786740620931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,48,128,0,1,float16,float16,0,0.36501868565877277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,48,128,0,1,float16,fp8,0,0.35171735286712646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,48,128,0,1,fp8,fp8,0,0.31470932563145954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,1,128,0,1,float16,float16,0,0.2943413257598877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,1,128,0,1,float16,fp8,0,0.29293866952260333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,1,128,0,1,fp8,fp8,0,0.2744426727294922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,2,128,0,1,float16,float16,0,0.2951253255208333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,2,128,0,1,float16,fp8,0,0.29681066672007245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,2,128,0,1,fp8,fp8,0,0.27454932530721027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,4,128,0,1,float16,float16,0,0.3053706685702006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,4,128,0,1,float16,fp8,0,0.3051626682281494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,4,128,0,1,fp8,fp8,0,0.304474671681722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,8,128,0,1,float16,float16,0,0.30903466542561847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,8,128,0,1,float16,fp8,0,0.30528533458709717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,48,8,128,0,1,fp8,fp8,0,0.30754133065541583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,48,128,0,1,float16,float16,0,0.19300800561904907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,48,128,0,1,float16,fp8,0,0.18870933850606283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,48,128,0,1,fp8,fp8,0,0.15985066692034403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,1,128,0,1,float16,float16,0,0.157642662525177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,1,128,0,1,float16,fp8,0,0.15794133146603903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,1,128,0,1,fp8,fp8,0,0.13270399967829385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,4,128,0,1,fp8,fp8,0,0.14728533228238425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,2,128,0,1,float16,float16,0,0.15847466389338175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,2,128,0,1,float16,fp8,0,0.15895467003186545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,2,128,0,1,fp8,fp8,0,0.1337440013885498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,4,128,0,1,float16,float16,0,0.162063995997111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,4,128,0,1,float16,fp8,0,0.16355733076731363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,8,128,0,1,float16,float16,0,0.16605866948763529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,8,128,0,1,float16,fp8,0,0.16358932852745056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,48,8,128,0,1,fp8,fp8,0,0.14964266618092856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,48,128,0,1,float16,float16,0,0.10813867052396138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,48,128,0,1,float16,fp8,0,0.10373333096504211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,48,128,0,1,fp8,fp8,0,0.08785067001978557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,1,128,0,1,float16,float16,0,0.08489599823951721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,1,128,0,1,float16,fp8,0,0.08634133140246074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,4,128,0,1,float16,fp8,0,0.08790399630864461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,4,128,0,1,fp8,fp8,0,0.0788266658782959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,1,128,0,1,fp8,fp8,0,0.0728053351243337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,2,128,0,1,float16,float16,0,0.08531199892361958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,2,128,0,1,float16,fp8,0,0.08585066596666972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,2,128,0,1,fp8,fp8,0,0.07361599802970886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,4,128,0,1,float16,float16,0,0.08793600400288899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,8,128,0,1,float16,float16,0,0.08781866232554118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,8,128,0,1,float16,fp8,0,0.08875200152397156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,48,8,128,0,1,fp8,fp8,0,0.08025600016117096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,48,128,0,1,float16,float16,0,0.05741333464781443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,48,128,0,1,float16,fp8,0,0.057029331723848976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,48,128,0,1,fp8,fp8,0,0.05282666782538096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,1,128,0,1,float16,float16,0,0.04794133206208547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,1,128,0,1,float16,fp8,0,0.04884799818197886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,1,128,0,1,fp8,fp8,0,0.042410666743914284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,2,128,0,1,float16,float16,0,0.048997332652409874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,8,128,0,1,float16,float16,0,0.05086933573087057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,2,128,0,1,float16,fp8,0,0.048954665660858154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,2,128,0,1,fp8,fp8,0,0.043493335445721946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,4,128,0,1,float16,float16,0,0.04975466430187225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,4,128,0,1,float16,fp8,0,0.049551998575528465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,4,128,0,1,fp8,fp8,0,0.0462666650613149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,8,128,0,1,float16,fp8,0,0.050106664498647056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,48,8,128,0,1,fp8,fp8,0,0.0469706654548645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,48,128,0,1,float16,float16,0,0.03382933388153712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,48,128,0,1,float16,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,2,128,0,1,float16,float16,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,48,128,0,1,fp8,fp8,0,0.03137599925200144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,1,128,0,1,float16,float16,0,0.031023999055226643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,1,128,0,1,float16,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,1,128,0,1,fp8,fp8,0,0.02886933336655299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,2,128,0,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,2,128,0,1,fp8,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,4,128,0,1,float16,float16,0,0.032074667513370514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,4,128,0,1,float16,fp8,0,0.03172266731659571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,4,128,0,1,fp8,fp8,0,0.03035733352104823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,8,128,0,1,float16,float16,0,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,8,128,0,1,float16,fp8,0,0.0317546675602595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,48,8,128,0,1,fp8,fp8,0,0.030896000564098358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,1,128,0,1,float16,float16,0,1.9068214098612468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,1,128,0,1,float16,fp8,0,1.8778187433878581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,1,128,0,1,fp8,fp8,0,1.7013600667317708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,2,128,0,1,float16,float16,0,1.996880054473877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,2,128,0,1,fp8,fp8,0,1.7121119499206543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,2,128,0,1,float16,fp8,0,1.9612000783284504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,4,128,0,1,float16,float16,0,2.10152530670166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,4,128,0,1,float16,fp8,0,2.096709410349528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,4,128,0,1,fp8,fp8,0,1.9136959711710613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,48,128,0,1,float16,float16,0,1.2257920106252034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,48,128,0,1,float16,fp8,0,1.1888586680094402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,8,128,0,1,float16,float16,0,2.120031992594401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,48,128,0,1,fp8,fp8,0,1.0349173545837402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,8,128,0,1,float16,fp8,0,2.110933303833008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,48,8,128,0,1,fp8,fp8,0,1.9646453857421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,1,128,0,1,float16,float16,0,0.9524373213450114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,1,128,0,1,fp8,fp8,0,0.846682627995809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,1,128,0,1,float16,fp8,0,0.9505919615427653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,2,128,0,1,float16,float16,0,0.9712959925333658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,2,128,0,1,float16,fp8,0,0.9662026564280192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,2,128,0,1,fp8,fp8,0,0.8581013679504395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,4,128,0,1,float16,float16,0,1.045039971669515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,4,128,0,1,float16,fp8,0,1.0404319763183594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,4,128,0,1,fp8,fp8,0,0.9667840003967285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,8,128,0,1,float16,float16,0,1.0400853157043457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,8,128,0,1,float16,fp8,0,1.0326773325602214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,48,128,0,1,float16,float16,0,0.6226079861323038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,48,8,128,0,1,fp8,fp8,0,0.9771200021107992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,48,128,0,1,float16,fp8,0,0.6043999989827474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,48,128,0,1,fp8,fp8,0,0.5225386619567871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,1,128,0,1,float16,float16,0,0.48649601141611737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,1,128,0,1,float16,fp8,0,0.48369598388671875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,1,128,0,1,fp8,fp8,0,0.42664531866709393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,2,128,0,1,float16,float16,0,0.4947679837544759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,2,128,0,1,float16,fp8,0,0.4950293302536011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,2,128,0,1,fp8,fp8,0,0.4291040102640788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,4,128,0,1,float16,float16,0,0.5112586816151937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,8,128,0,1,float16,fp8,0,0.5098880132039388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,4,128,0,1,float16,fp8,0,0.5079893271128336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,4,128,0,1,fp8,fp8,0,0.4938133160273234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,8,128,0,1,float16,float16,0,0.5115040143330892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,48,8,128,0,1,fp8,fp8,0,0.5021706819534302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,48,128,0,1,float16,float16,0,0.3210453391075134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,48,128,0,1,float16,fp8,0,0.3127466638882955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,48,128,0,1,fp8,fp8,0,0.27086933453877765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,1,128,0,1,float16,float16,0,0.25179733832677204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,1,128,0,1,float16,fp8,0,0.2514773408571879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,1,128,0,1,fp8,fp8,0,0.22060799598693848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,2,128,0,1,float16,float16,0,0.2555893262227376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,2,128,0,1,float16,fp8,0,0.25383466482162476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,2,128,0,1,fp8,fp8,0,0.22317334016164145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,8,128,0,1,float16,fp8,0,0.2637493411699931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,4,128,0,1,float16,float16,0,0.2635413408279419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,4,128,0,1,float16,fp8,0,0.26262933015823364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,4,128,0,1,fp8,fp8,0,0.25390400489171344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,8,128,0,1,float16,float16,0,0.2645439902941386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,48,128,0,1,float16,float16,0,0.1701493263244629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,48,8,128,0,1,fp8,fp8,0,0.25804799795150757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,48,128,0,1,float16,fp8,0,0.1677280068397522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,48,128,0,1,fp8,fp8,0,0.1458399991194407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,1,128,0,1,float16,float16,0,0.1355839967727661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,1,128,0,1,float16,fp8,0,0.13518399993578592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,1,128,0,1,fp8,fp8,0,0.11867200334866841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,2,128,0,1,float16,float16,0,0.13762666781743368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,2,128,0,1,float16,fp8,0,0.13643200198809305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,2,128,0,1,fp8,fp8,0,0.11989333232243855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,4,128,0,1,float16,float16,0,0.1416373352209727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,4,128,0,1,float16,fp8,0,0.14214932918548584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,4,128,0,1,fp8,fp8,0,0.13249066472053528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,8,128,0,1,float16,float16,0,0.14356266458829245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,8,128,0,1,float16,fp8,0,0.1418773333231608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,48,8,128,0,1,fp8,fp8,0,0.13531200091044107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,48,128,0,1,float16,float16,0,0.09840533137321472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,48,128,0,1,float16,fp8,0,0.09490666786829631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,48,128,0,1,fp8,fp8,0,0.08058133224646251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,2,128,0,1,float16,fp8,0,0.0765173335870107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,1,128,0,1,float16,float16,0,0.07645866771539052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,1,128,0,1,float16,fp8,0,0.07574933270613353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,1,128,0,1,fp8,fp8,0,0.06564266482988994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,2,128,0,1,float16,float16,0,0.07683200140794118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,8,128,0,1,float16,float16,0,0.0791786660750707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,2,128,0,1,fp8,fp8,0,0.06620266536871593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,4,128,0,1,float16,float16,0,0.07860266665617625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,4,128,0,1,float16,fp8,0,0.07874666651089986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,4,128,0,1,fp8,fp8,0,0.07088000078996022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,8,128,0,1,float16,fp8,0,0.08041066428025563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,48,8,128,0,1,fp8,fp8,0,0.0737066666285197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,48,128,0,1,float16,float16,0,0.052842666705449425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,1,128,0,1,fp8,fp8,0,0.038736000657081604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,48,128,0,1,float16,fp8,0,0.052015999952952065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,48,128,0,1,fp8,fp8,0,0.04756799836953481
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,1,128,0,1,float16,float16,0,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,1,128,0,1,float16,fp8,0,0.043050666650136314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,2,128,0,1,float16,float16,0,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,2,128,0,1,float16,fp8,0,0.04398933549722036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,2,128,0,1,fp8,fp8,0,0.03898133337497711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,8,128,0,1,fp8,fp8,0,0.04231466849644979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,4,128,0,1,float16,float16,0,0.04518933097521464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,4,128,0,1,float16,fp8,0,0.044997334480285645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,4,128,0,1,fp8,fp8,0,0.04142933338880539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,8,128,0,1,float16,float16,0,0.04515733321507772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,48,8,128,0,1,float16,fp8,0,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,48,128,0,1,float16,float16,0,0.03215466688076655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,48,128,0,1,float16,fp8,0,0.03179199993610382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,48,128,0,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,1,128,0,1,float16,float16,0,0.030224000414212544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,1,128,0,1,float16,fp8,0,0.02979733298222224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,1,128,0,1,fp8,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,2,128,0,1,float16,float16,0,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,2,128,0,1,float16,fp8,0,0.02993600070476532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,2,128,0,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,4,128,0,1,float16,float16,0,0.030666666726271313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,4,128,0,1,float16,fp8,0,0.03036266565322876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,4,128,0,1,fp8,fp8,0,0.028650666276613872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,48,128,0,1,fp8,fp8,0,0.02256533255179723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,8,128,0,1,float16,float16,0,0.030671998858451843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,8,128,0,1,float16,fp8,0,0.030576000610987347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,1,128,0,1,fp8,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,48,8,128,0,1,fp8,fp8,0,0.028533334533373516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,48,128,0,1,float16,float16,0,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,48,128,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,1,128,0,1,float16,float16,0,0.021877333521842957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,1,128,0,1,float16,fp8,0,0.02213866760333379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,4,128,0,1,fp8,fp8,0,0.02083733429511388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,2,128,0,1,float16,float16,0,0.022064000368118286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,2,128,0,1,float16,fp8,0,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,2,128,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,4,128,0,1,float16,float16,0,0.02223466585079829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,4,128,0,1,float16,fp8,0,0.022384000321229298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,8,128,0,1,float16,float16,0,0.02254933367172877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,8,128,0,1,float16,fp8,0,0.02266666789849599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,48,8,128,0,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,1,128,0,1,float16,float16,0,0.8706826368967692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,1,128,0,1,float16,fp8,0,0.8587839603424072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,1,128,0,1,fp8,fp8,0,0.8402132987976074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,2,128,0,1,float16,float16,0,0.9144480228424072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,2,128,0,1,float16,fp8,0,0.8917653560638428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,2,128,0,1,fp8,fp8,0,0.8478773434956869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,4,128,0,1,float16,float16,0,0.9450293382008871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,4,128,0,1,fp8,fp8,0,0.96451735496521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,4,128,0,1,float16,fp8,0,0.9578453699747721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,8,128,0,1,float16,float16,0,0.9784853458404541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,48,128,0,1,float16,fp8,0,0.5701493422190348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,8,128,0,1,fp8,fp8,0,0.9739200274149576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,48,128,0,1,float16,float16,0,0.5855253140131632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,48,8,128,0,1,float16,fp8,0,0.9659039974212646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,48,128,0,1,fp8,fp8,0,0.5210453271865845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,1,128,0,1,float16,float16,0,0.4446186621983846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,1,128,0,1,float16,fp8,0,0.4398346741994222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,1,128,0,1,fp8,fp8,0,0.4218826691309611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,2,128,0,1,float16,float16,0,0.45820267995198566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,4,128,0,1,float16,fp8,0,0.4753760099411011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,2,128,0,1,float16,fp8,0,0.45376535256703693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,2,128,0,1,fp8,fp8,0,0.43109333515167236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,4,128,0,1,float16,float16,0,0.4767520030339559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,4,128,0,1,fp8,fp8,0,0.49322664737701416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,8,128,0,1,float16,float16,0,0.4797706604003906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,8,128,0,1,float16,fp8,0,0.47124266624450684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,48,8,128,0,1,fp8,fp8,0,0.4976693391799927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,48,128,0,1,float16,float16,0,0.30434133609135944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,48,128,0,1,float16,fp8,0,0.29635733366012573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,48,128,0,1,fp8,fp8,0,0.2693706750869751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,1,128,0,1,float16,float16,0,0.23191465934117636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,1,128,0,1,float16,fp8,0,0.23215999205907187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,1,128,0,1,fp8,fp8,0,0.21818667650222778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,2,128,0,1,float16,float16,0,0.2382133404413859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,2,128,0,1,float16,fp8,0,0.23732799291610718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,2,128,0,1,fp8,fp8,0,0.22286399205525717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,4,128,0,1,float16,float16,0,0.24597332874933878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,4,128,0,1,float16,fp8,0,0.24601600567499796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,4,128,0,1,fp8,fp8,0,0.2542933424313863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,48,128,0,1,fp8,fp8,0,0.14273066322008768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,8,128,0,1,float16,float16,0,0.24812267223993936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,8,128,0,1,float16,fp8,0,0.24651199579238892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,48,8,128,0,1,fp8,fp8,0,0.2573920090993245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,48,128,0,1,float16,float16,0,0.16977600256601968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,48,128,0,1,float16,fp8,0,0.16249066591262817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,1,128,0,1,float16,float16,0,0.12688533465067545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,1,128,0,1,float16,fp8,0,0.1269546647866567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,1,128,0,1,fp8,fp8,0,0.1185706655184428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,2,128,0,1,float16,float16,0,0.12954666217168173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,2,128,0,1,float16,fp8,0,0.12825066844622293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,2,128,0,1,fp8,fp8,0,0.12026133139928182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,4,128,0,1,float16,float16,0,0.13335999846458435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,8,128,0,1,fp8,fp8,0,0.13594667116800943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,4,128,0,1,float16,fp8,0,0.13340800007184347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,4,128,0,1,fp8,fp8,0,0.13174933195114136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,8,128,0,1,float16,float16,0,0.13522133231163025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,48,8,128,0,1,float16,fp8,0,0.13592533270517984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,48,128,0,1,float16,float16,0,0.09449066718419392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,48,128,0,1,float16,fp8,0,0.08987200260162354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,48,128,0,1,fp8,fp8,0,0.08012266457080841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,1,128,0,1,float16,float16,0,0.07092266778151195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,2,128,0,1,fp8,fp8,0,0.06564266482988994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,1,128,0,1,float16,fp8,0,0.07050666709740956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,1,128,0,1,fp8,fp8,0,0.06514666477839152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,2,128,0,1,float16,float16,0,0.07183466851711273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,2,128,0,1,float16,fp8,0,0.07158933579921722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,8,128,0,1,float16,fp8,0,0.07505600154399872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,4,128,0,1,float16,float16,0,0.0738560010989507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,4,128,0,1,float16,fp8,0,0.0740533322095871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,4,128,0,1,fp8,fp8,0,0.07115733126799266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,8,128,0,1,float16,float16,0,0.07531733314196269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,48,8,128,0,1,fp8,fp8,0,0.07274133463700612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,48,128,0,1,float16,float16,0,0.051541333397229515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,48,128,0,1,float16,fp8,0,0.04916800061861674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,48,128,0,1,fp8,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,1,128,0,1,float16,float16,0,0.04051200052102407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,1,128,0,1,float16,fp8,0,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,1,128,0,1,fp8,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,4,128,0,1,float16,fp8,0,0.0422986646493276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,4,128,0,1,fp8,fp8,0,0.042250668009122215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,2,128,0,1,float16,float16,0,0.041135999063650765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,2,128,0,1,float16,fp8,0,0.04241600135962168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,2,128,0,1,fp8,fp8,0,0.03844800094763438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,4,128,0,1,float16,float16,0,0.0431573341290156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,8,128,0,1,float16,float16,0,0.04327466587225596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,8,128,0,1,float16,fp8,0,0.043493335445721946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,48,8,128,0,1,fp8,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,48,128,0,1,float16,float16,0,0.03211733450492223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,48,128,0,1,float16,fp8,0,0.031023999055226643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,48,128,0,1,fp8,fp8,0,0.030069333811601002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,1,128,0,1,float16,float16,0,0.029018667836983997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,1,128,0,1,float16,fp8,0,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,1,128,0,1,fp8,fp8,0,0.02683199942111969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,2,128,0,1,float16,float16,0,0.029845332105954487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,2,128,0,1,float16,fp8,0,0.02974933385848999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,2,128,0,1,fp8,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,4,128,0,1,float16,float16,0,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,4,128,0,1,float16,fp8,0,0.030495998760064442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,4,128,0,1,fp8,fp8,0,0.028245332340399425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,8,128,0,1,float16,float16,0,0.031093334158261616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,8,128,0,1,float16,fp8,0,0.031871999303499855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,48,8,128,0,1,fp8,fp8,0,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,48,128,0,1,float16,float16,0,0.021701333423455555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,48,128,0,1,float16,fp8,0,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,2,128,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,48,128,0,1,fp8,fp8,0,0.0222080002228419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,1,128,0,1,float16,float16,0,0.020597333709398907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,1,128,0,1,float16,fp8,0,0.020442667106787365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,1,128,0,1,fp8,fp8,0,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,2,128,0,1,float16,float16,0,0.02057066683967908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,2,128,0,1,fp8,fp8,0,0.02038399999340375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,4,128,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,4,128,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,4,128,0,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,8,128,0,1,float16,float16,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,8,128,0,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,48,8,128,0,1,fp8,fp8,0,0.02178666740655899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,48,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,48,128,0,1,float16,fp8,0,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,48,128,0,1,fp8,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,1,128,0,1,float16,float16,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,1,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,1,128,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,2,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,2,128,0,1,float16,fp8,0,0.018016000588734944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,2,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,4,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,4,128,0,1,float16,fp8,0,0.018122666825850803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,4,128,0,1,fp8,fp8,0,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,8,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,8,128,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,48,8,128,0,1,fp8,fp8,0,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,1,128,0,1,float16,float16,0,0.5119253396987915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,1,128,0,1,float16,fp8,0,0.5134613513946533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,2,128,0,1,fp8,fp8,0,0.6002773443857828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,1,128,0,1,fp8,fp8,0,0.5854933261871338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,2,128,0,1,float16,float16,0,0.5241866509119669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,2,128,0,1,float16,fp8,0,0.5167946815490723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,4,128,0,1,float16,float16,0,0.555023988087972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,4,128,0,1,float16,fp8,0,0.5445226828257242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,4,128,0,1,fp8,fp8,0,0.6558239857355753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,8,128,0,1,float16,float16,0,0.5566879908243815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,8,128,0,1,float16,fp8,0,0.5442506472269694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,48,128,0,1,float16,float16,0,0.3394560019175212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,48,8,128,0,1,fp8,fp8,0,0.6672960122426351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,48,128,0,1,float16,fp8,0,0.3290773431460063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,48,128,0,1,fp8,fp8,0,0.35091201464335126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,1,128,0,1,float16,float16,0,0.2634506622950236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,1,128,0,1,float16,fp8,0,0.26421332359313965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,1,128,0,1,fp8,fp8,0,0.30154667297999066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,2,128,0,1,float16,float16,0,0.26971733570098877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,4,128,0,1,float16,fp8,0,0.2774453361829122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,2,128,0,1,float16,fp8,0,0.2659999926884969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,8,128,0,1,float16,float16,0,0.28381333748499554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,2,128,0,1,fp8,fp8,0,0.30663466453552246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,4,128,0,1,float16,float16,0,0.2817973295847575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,4,128,0,1,fp8,fp8,0,0.3351413408915202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,8,128,0,1,float16,fp8,0,0.27873067061106366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,48,8,128,0,1,fp8,fp8,0,0.34029332796732586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,48,128,0,1,float16,float16,0,0.17890133460362753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,48,128,0,1,float16,fp8,0,0.17357333501180014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,48,128,0,1,fp8,fp8,0,0.185263991355896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,1,128,0,1,float16,float16,0,0.14082133769989014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,1,128,0,1,float16,fp8,0,0.14113066593805948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,1,128,0,1,fp8,fp8,0,0.15980266531308493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,2,128,0,1,float16,float16,0,0.1421066621939341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,2,128,0,1,float16,fp8,0,0.14155733585357666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,2,128,0,1,fp8,fp8,0,0.1613599956035614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,4,128,0,1,float16,float16,0,0.14847999811172485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,4,128,0,1,float16,fp8,0,0.1481226682662964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,4,128,0,1,fp8,fp8,0,0.17384000619252524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,8,128,0,1,float16,float16,0,0.15128533045450845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,8,128,0,1,float16,fp8,0,0.14826132853825888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,1,128,0,1,float16,float16,0,0.07655466596285503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,48,8,128,0,1,fp8,fp8,0,0.1776319940884908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,48,128,0,1,float16,float16,0,0.09603733817736308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,48,128,0,1,float16,fp8,0,0.09366933504740398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,48,128,0,1,fp8,fp8,0,0.10080533226331075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,1,128,0,1,float16,fp8,0,0.07778133451938629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,4,128,0,1,float16,fp8,0,0.08027199904123943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,1,128,0,1,fp8,fp8,0,0.08733333150545756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,2,128,0,1,float16,float16,0,0.07758399844169617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,2,128,0,1,float16,fp8,0,0.07740800082683563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,2,128,0,1,fp8,fp8,0,0.0876693328221639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,4,128,0,1,float16,float16,0,0.0795306662718455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,4,128,0,1,fp8,fp8,0,0.09286933143933614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,8,128,0,1,float16,float16,0,0.08115200201670329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,8,128,0,1,float16,fp8,0,0.0819893330335617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,48,8,128,0,1,fp8,fp8,0,0.09556800127029419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,48,128,0,1,float16,float16,0,0.05379733443260193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,48,128,0,1,float16,fp8,0,0.05277333160241445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,48,128,0,1,fp8,fp8,0,0.057855998476346336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,1,128,0,1,float16,float16,0,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,1,128,0,1,float16,fp8,0,0.0440586656332016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,1,128,0,1,fp8,fp8,0,0.04946133494377136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,2,128,0,1,float16,float16,0,0.04490133126576742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,2,128,0,1,float16,fp8,0,0.04494933287302653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,2,128,0,1,fp8,fp8,0,0.049141332507133484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,4,128,0,1,float16,float16,0,0.04669866462548574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,48,128,0,1,float16,float16,0,0.030666666726271313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,4,128,0,1,float16,fp8,0,0.04626133541266123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,4,128,0,1,fp8,fp8,0,0.05307200054327647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,8,128,0,1,float16,float16,0,0.04631466666857401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,8,128,0,1,float16,fp8,0,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,48,8,128,0,1,fp8,fp8,0,0.053743998209635414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,48,128,0,1,float16,fp8,0,0.029829333225886028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,2,128,0,1,float16,fp8,0,0.02881066749493281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,48,128,0,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,1,128,0,1,float16,float16,0,0.028399998943010967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,1,128,0,1,float16,fp8,0,0.028517333169778187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,1,128,0,1,fp8,fp8,0,0.030847998956839245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,2,128,0,1,float16,float16,0,0.02829866607983907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,2,128,0,1,fp8,fp8,0,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,4,128,0,1,float16,float16,0,0.02880000074704488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,4,128,0,1,float16,fp8,0,0.029824001093705494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,4,128,0,1,fp8,fp8,0,0.03194133440653483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,8,128,0,1,float16,float16,0,0.029120000700155895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,8,128,0,1,float16,fp8,0,0.028912000358104706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,1,128,0,1,float16,fp8,0,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,48,8,128,0,1,fp8,fp8,0,0.032602667808532715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,48,128,0,1,float16,float16,0,0.023706667125225067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,48,128,0,1,float16,fp8,0,0.024400000770886738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,48,128,0,1,fp8,fp8,0,0.025807999074459076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,1,128,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,1,128,0,1,fp8,fp8,0,0.02422400067249934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,2,128,0,1,float16,float16,0,0.02235200007756551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,2,128,0,1,float16,fp8,0,0.022485333184401195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,2,128,0,1,fp8,fp8,0,0.02420799930890401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,8,128,0,1,fp8,fp8,0,0.024911999702453613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,4,128,0,1,float16,float16,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,4,128,0,1,float16,fp8,0,0.023658665517965954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,4,128,0,1,fp8,fp8,0,0.02474133421977361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,8,128,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,48,8,128,0,1,float16,fp8,0,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,48,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,48,128,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,48,128,0,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,1,128,0,1,float16,float16,0,0.016341333587964375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,1,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,1,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,2,128,0,1,float16,float16,0,0.016719999412695568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,2,128,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,2,128,0,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,4,128,0,1,float16,float16,0,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,4,128,0,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,4,128,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,8,128,0,1,float16,float16,0,0.016447999825080235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,8,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,48,8,128,0,1,fp8,fp8,0,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,48,128,0,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,48,128,0,1,float16,fp8,0,0.016186666985352833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,48,128,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,1,128,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,1,128,0,1,float16,fp8,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,1,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,2,128,0,1,float16,float16,0,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,2,128,0,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,2,128,0,1,fp8,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,4,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,4,128,0,1,float16,fp8,0,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,4,128,0,1,fp8,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,8,128,0,1,float16,float16,0,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,8,128,0,1,float16,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,48,8,128,0,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,1,128,0,1,fp8,fp8,0,0.466325322786967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,1,128,0,1,float16,float16,0,0.352234681447347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,1,128,0,1,float16,fp8,0,0.35288000106811523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,2,128,0,1,float16,float16,0,0.35545066992441815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,4,128,0,1,float16,fp8,0,0.37088000774383545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,2,128,0,1,float16,fp8,0,0.3552693525950114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,2,128,0,1,fp8,fp8,0,0.46883734067281085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,4,128,0,1,float16,float16,0,0.36535465717315674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,4,128,0,1,fp8,fp8,0,0.499941349029541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,8,128,0,1,fp8,fp8,0,0.5074400107065836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,8,128,0,1,float16,float16,0,0.36689066886901855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,48,8,128,0,1,float16,fp8,0,0.3717333475748698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,48,128,0,1,float16,float16,0,0.2270666758219401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,48,128,0,1,float16,fp8,0,0.2225653330485026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,48,128,0,1,fp8,fp8,0,0.2656533320744832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,1,128,0,1,float16,float16,0,0.18432533740997314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,2,128,0,1,fp8,fp8,0,0.2454506754875183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,1,128,0,1,float16,fp8,0,0.1856106718381246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,1,128,0,1,fp8,fp8,0,0.24181866645812988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,2,128,0,1,float16,float16,0,0.18621333440144858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,2,128,0,1,float16,fp8,0,0.18565332889556885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,4,128,0,1,float16,float16,0,0.1909066637357076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,4,128,0,1,float16,fp8,0,0.19141866763432822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,4,128,0,1,fp8,fp8,0,0.2567360003789266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,8,128,0,1,float16,float16,0,0.19339199860890707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,8,128,0,1,float16,fp8,0,0.19595199823379517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,48,8,128,0,1,fp8,fp8,0,0.2608426610628764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,48,128,0,1,float16,float16,0,0.1188266674677531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,48,128,0,1,float16,fp8,0,0.11680000027020772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,48,128,0,1,fp8,fp8,0,0.14089066783587137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,1,128,0,1,float16,float16,0,0.09887466828028361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,1,128,0,1,float16,fp8,0,0.0997973382472992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,2,128,0,1,float16,float16,0,0.09913600484530131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,1,128,0,1,fp8,fp8,0,0.12963733077049255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,2,128,0,1,float16,fp8,0,0.09995733698209126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,2,128,0,1,fp8,fp8,0,0.12919466694196066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,4,128,0,1,float16,float16,0,0.10146666566530864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,4,128,0,1,float16,fp8,0,0.10207466284434001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,4,128,0,1,fp8,fp8,0,0.1344053347905477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,8,128,0,1,float16,float16,0,0.10178132851918538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,8,128,0,1,float16,fp8,0,0.10290132959683736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,48,8,128,0,1,fp8,fp8,0,0.1374613344669342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,48,128,0,1,float16,float16,0,0.06484800080458324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,48,128,0,1,float16,fp8,0,0.06389866769313812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,48,128,0,1,fp8,fp8,0,0.07844799757003784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,1,128,0,1,float16,float16,0,0.0545066644748052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,1,128,0,1,float16,fp8,0,0.05509866774082184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,1,128,0,1,fp8,fp8,0,0.07072000205516815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,2,128,0,1,float16,float16,0,0.055248002211252846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,2,128,0,1,float16,fp8,0,0.0557226687669754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,2,128,0,1,fp8,fp8,0,0.0718453327814738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,4,128,0,1,float16,float16,0,0.0572320024172465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,48,128,0,1,float16,float16,0,0.035973332822322845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,4,128,0,1,float16,fp8,0,0.05698133508364359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,48,128,0,1,fp8,fp8,0,0.047082667549451195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,4,128,0,1,fp8,fp8,0,0.07339199880758922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,8,128,0,1,float16,float16,0,0.056661332647005715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,8,128,0,1,float16,fp8,0,0.05691199998060862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,48,8,128,0,1,fp8,fp8,0,0.07432533303896587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,48,128,0,1,float16,fp8,0,0.03517866631348928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,2,128,0,1,fp8,fp8,0,0.04156800111134847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,1,128,0,1,float16,float16,0,0.03349333256483078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,1,128,0,1,float16,fp8,0,0.03316266586383184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,1,128,0,1,fp8,fp8,0,0.04257600009441376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,2,128,0,1,float16,float16,0,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,2,128,0,1,float16,fp8,0,0.03403733422358831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,4,128,0,1,float16,float16,0,0.034154665966828666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,4,128,0,1,float16,fp8,0,0.033946665624777474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,4,128,0,1,fp8,fp8,0,0.0431573341290156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,8,128,0,1,float16,float16,0,0.034485332667827606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,8,128,0,1,float16,fp8,0,0.03432533393303553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,48,8,128,0,1,fp8,fp8,0,0.04423999786376953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,48,128,0,1,float16,float16,0,0.02463999887307485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,48,128,0,1,float16,fp8,0,0.025600001215934753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,48,128,0,1,fp8,fp8,0,0.030207999050617218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,1,128,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,1,128,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,1,128,0,1,fp8,fp8,0,0.0277813325325648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,4,128,0,1,float16,fp8,0,0.024533333877722423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,2,128,0,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,2,128,0,1,float16,fp8,0,0.024154665569464367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,2,128,0,1,fp8,fp8,0,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,4,128,0,1,float16,float16,0,0.023754666248957317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,4,128,0,1,fp8,fp8,0,0.028522667785485584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,8,128,0,1,float16,float16,0,0.023893333971500397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,8,128,0,1,float16,fp8,0,0.023973333338896435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,48,8,128,0,1,fp8,fp8,0,0.028490667541821797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,48,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,48,128,0,1,float16,fp8,0,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,48,128,0,1,fp8,fp8,0,0.022426667312781017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,1,128,0,1,float16,float16,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,1,128,0,1,float16,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,1,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,2,128,0,1,float16,float16,0,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,2,128,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,2,128,0,1,fp8,fp8,0,0.022287999590237934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,4,128,0,1,float16,float16,0,0.018085333208243053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,4,128,0,1,float16,fp8,0,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,4,128,0,1,fp8,fp8,0,0.022128000855445862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,8,128,0,1,float16,float16,0,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,8,128,0,1,float16,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,48,8,128,0,1,fp8,fp8,0,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,48,128,0,1,float16,float16,0,0.015664000064134598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,48,128,0,1,float16,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,48,128,0,1,fp8,fp8,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,1,128,0,1,float16,float16,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,1,128,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,1,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,2,128,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,2,128,0,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,2,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,4,128,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,4,128,0,1,float16,fp8,0,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,4,128,0,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,8,128,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,8,128,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,48,8,128,0,1,fp8,fp8,0,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,48,128,0,1,float16,float16,0,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,48,128,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,48,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,1,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,1,128,0,1,float16,fp8,0,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,1,128,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,2,128,0,1,float16,float16,0,0.014042666802803675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,2,128,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,2,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,4,128,0,1,float16,float16,0,0.014362666755914688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,4,128,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,4,128,0,1,fp8,fp8,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,8,128,0,1,float16,float16,0,0.014352000008026758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,8,128,0,1,float16,fp8,0,0.014639999717473984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,48,8,128,0,1,fp8,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,1,128,0,1,float16,float16,0,0.2934346596399943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,1,128,0,1,float16,fp8,0,0.2910719911257426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,1,128,0,1,fp8,fp8,0,0.40857064723968506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,2,128,0,1,float16,float16,0,0.29492799441019696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,2,128,0,1,float16,fp8,0,0.29527999957402545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,2,128,0,1,fp8,fp8,0,0.411135991414388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,4,128,0,1,float16,float16,0,0.2997066577275594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,4,128,0,1,float16,fp8,0,0.2997973362604777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,8,128,0,1,float16,float16,0,0.3017599980036418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,4,128,0,1,fp8,fp8,0,0.42421332995096844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,8,128,0,1,float16,fp8,0,0.3044266700744629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,48,8,128,0,1,fp8,fp8,0,0.4269440174102783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,48,128,0,1,float16,float16,0,0.175162672996521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,1,128,0,1,fp8,fp8,0,0.21409600973129272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,48,128,0,1,float16,fp8,0,0.17198399702707926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,48,128,0,1,fp8,fp8,0,0.2257759968439738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,1,128,0,1,float16,float16,0,0.153221329053243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,2,128,0,1,fp8,fp8,0,0.21292267243067423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,4,128,0,1,float16,float16,0,0.1561973293622335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,1,128,0,1,float16,fp8,0,0.15242133537928262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,2,128,0,1,float16,float16,0,0.15161066253980002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,2,128,0,1,float16,fp8,0,0.1527413328488668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,4,128,0,1,float16,fp8,0,0.1546026666959127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,48,128,0,1,float16,fp8,0,0.09002133210500081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,4,128,0,1,fp8,fp8,0,0.21830934286117554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,8,128,0,1,float16,float16,0,0.15716266632080078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,8,128,0,1,float16,fp8,0,0.15639467040697733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,48,128,0,1,float16,float16,0,0.09210667014122009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,48,8,128,0,1,fp8,fp8,0,0.22140266497929892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,48,128,0,1,fp8,fp8,0,0.12128000458081563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,1,128,0,1,float16,float16,0,0.08206399778525035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,1,128,0,1,float16,fp8,0,0.08250133196512859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,1,128,0,1,fp8,fp8,0,0.11306666334470113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,2,128,0,1,float16,float16,0,0.0825973351796468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,8,128,0,1,float16,float16,0,0.08434133728345235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,2,128,0,1,float16,fp8,0,0.08274133503437042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,2,128,0,1,fp8,fp8,0,0.1128480037053426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,4,128,0,1,float16,float16,0,0.08294400076071422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,4,128,0,1,float16,fp8,0,0.08322133123874664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,4,128,0,1,fp8,fp8,0,0.11637333035469055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,8,128,0,1,float16,fp8,0,0.08411733309427898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,48,128,0,1,float16,float16,0,0.04944533109664917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,48,8,128,0,1,fp8,fp8,0,0.11802132924397786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,48,128,0,1,float16,fp8,0,0.049327999353408813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,48,128,0,1,fp8,fp8,0,0.06683733562628429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,1,128,0,1,float16,float16,0,0.04727466901143392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,1,128,0,1,float16,fp8,0,0.047055999437967934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,1,128,0,1,fp8,fp8,0,0.06448533137639363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,2,128,0,1,float16,float16,0,0.04703466594219208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,2,128,0,1,float16,fp8,0,0.04780800143877665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,2,128,0,1,fp8,fp8,0,0.06344000001748402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,4,128,0,1,float16,float16,0,0.04807466765244802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,8,128,0,1,fp8,fp8,0,0.0650079995393753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,4,128,0,1,float16,fp8,0,0.04860266546408335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,4,128,0,1,fp8,fp8,0,0.06493866443634033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,8,128,0,1,float16,float16,0,0.048138668139775596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,48,8,128,0,1,float16,fp8,0,0.04831466575463613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,48,128,0,1,float16,float16,0,0.03263466556866964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,48,128,0,1,float16,fp8,0,0.03256533294916153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,48,128,0,1,fp8,fp8,0,0.04110399881998698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,1,128,0,1,float16,float16,0,0.0305226668715477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,1,128,0,1,float16,fp8,0,0.03062933435042699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,1,128,0,1,fp8,fp8,0,0.039290666580200195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,2,128,0,1,float16,float16,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,2,128,0,1,float16,fp8,0,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,2,128,0,1,fp8,fp8,0,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,4,128,0,1,float16,float16,0,0.031397332747777305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,4,128,0,1,float16,fp8,0,0.03205333401759466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,4,128,0,1,fp8,fp8,0,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,8,128,0,1,float16,float16,0,0.030933332939942677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,8,128,0,1,float16,fp8,0,0.0316746657093366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,1,128,0,1,float16,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,48,8,128,0,1,fp8,fp8,0,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,48,128,0,1,float16,float16,0,0.022255999346574146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,48,128,0,1,float16,fp8,0,0.022458667556444805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,48,128,0,1,fp8,fp8,0,0.026789332429567974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,1,128,0,1,float16,float16,0,0.021898667017618816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,1,128,0,1,fp8,fp8,0,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,2,128,0,1,float16,float16,0,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,2,128,0,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,2,128,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,4,128,0,1,float16,float16,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,4,128,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,4,128,0,1,fp8,fp8,0,0.026202666262785595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,8,128,0,1,float16,float16,0,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,8,128,0,1,float16,fp8,0,0.02199999988079071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,48,8,128,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,48,128,0,1,float16,float16,0,0.01834133391578992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,48,128,0,1,float16,fp8,0,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,48,128,0,1,fp8,fp8,0,0.02203733225663503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,1,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,1,128,0,1,float16,fp8,0,0.017984000345071156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,1,128,0,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,2,128,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,2,128,0,1,float16,fp8,0,0.017887999614079792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,2,128,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,4,128,0,1,float16,float16,0,0.01773333301146825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,4,128,0,1,float16,fp8,0,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,4,128,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,8,128,0,1,float16,float16,0,0.017850667238235474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,8,128,0,1,float16,fp8,0,0.0179626668492953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,48,8,128,0,1,fp8,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,48,128,0,1,float16,float16,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,48,128,0,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,48,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,1,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,1,128,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,1,128,0,1,fp8,fp8,0,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,2,128,0,1,float16,float16,0,0.01423466702302297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,2,128,0,1,float16,fp8,0,0.014639999717473984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,2,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,4,128,0,1,float16,float16,0,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,4,128,0,1,float16,fp8,0,0.014570667097965876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,4,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,8,128,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,8,128,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,48,8,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,48,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,48,128,0,1,float16,fp8,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,48,128,0,1,fp8,fp8,0,0.016341333587964375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,1,128,0,1,float16,float16,0,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,4,128,0,1,float16,float16,0,0.014090667168299357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,1,128,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,1,128,0,1,fp8,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,2,128,0,1,float16,float16,0,0.014165333161751429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,2,128,0,1,float16,fp8,0,0.014490666488806406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,2,128,0,1,fp8,fp8,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,4,128,0,1,float16,fp8,0,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,4,128,0,1,fp8,fp8,0,0.01603200038274129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,8,128,0,1,float16,float16,0,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,8,128,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,48,8,128,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,128,0,1,fp8,fp8,0,28.00653839111328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,128,0,1,fp8,fp8,0,28.066884358723957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,128,0,1,float16,float16,0,43.99426778157552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,1,128,0,1,float16,fp8,0,44.8271484375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,float16,0,43.91077677408854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,fp8,0,45.09169006347656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,float16,0,44.16753133138021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,fp8,0,44.91997273763021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,4,128,0,1,fp8,fp8,0,27.978426615397137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,128,0,1,fp8,fp8,0,14.361562093098959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,float16,0,21.34776560465495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,fp8,0,21.550458272298176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,128,0,1,float16,float16,0,20.87161127726237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,128,0,1,fp8,fp8,0,29.847727457682293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,float16,0,44.186065673828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,128,0,1,fp8,fp8,0,14.04968007405599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,1,128,0,1,float16,fp8,0,21.48096974690755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,fp8,0,42.359718322753906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,128,0,1,fp8,fp8,0,14.112495422363281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,float16,0,20.99395243326823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,fp8,0,21.268287658691406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,128,0,1,fp8,fp8,0,13.978917439778646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,float16,0,21.416725158691406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,fp8,0,21.926432291666668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,128,0,1,fp8,fp8,0,7.33946164449056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,float16,0,10.82211176554362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,fp8,0,10.886671702067057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,128,0,1,fp8,fp8,0,14.200485229492188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,float16,0,21.622329711914062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,128,0,1,fp8,fp8,0,7.071317036946614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,128,0,1,float16,float16,0,11.385930379231771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,fp8,0,21.58153025309245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,1,128,0,1,float16,fp8,0,10.663871765136719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,128,0,1,fp8,fp8,0,7.392458597819011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,float16,0,11.14135996500651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,fp8,0,10.785162607828775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,float16,0,11.00869369506836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,128,0,1,fp8,fp8,0,7.2294877370198565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,float16,0,5.573898951212565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,fp8,0,5.623274485270183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,fp8,0,10.748191833496094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,128,0,1,fp8,fp8,0,7.24624506632487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,40,128,0,1,fp8,fp8,0,3.7142772674560547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,float16,0,10.893712361653646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,fp8,0,10.82041041056315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,128,0,1,float16,float16,0,5.5143890380859375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,128,0,1,fp8,fp8,0,3.6306613286336265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,128,0,1,fp8,fp8,0,3.691904067993164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,1,128,0,1,float16,fp8,0,5.585397084554036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,float16,0,5.450741449991862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,fp8,0,5.508501052856445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,float16,0,5.551631927490234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,128,0,1,fp8,fp8,0,3.7602132161458335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,fp8,0,5.503029505411784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,128,0,1,fp8,fp8,0,3.611034711201986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,float16,0,5.439354578653972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,fp8,0,5.507146835327148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,128,0,1,fp8,fp8,0,16.56679407755534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,128,0,1,fp8,fp8,0,16.283098856608074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,128,0,1,float16,float16,0,24.999338785807293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,1,128,0,1,float16,fp8,0,25.20282745361328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,float16,0,25.26141357421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,fp8,0,24.359776814778645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,float16,0,25.05003611246745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,fp8,0,25.539456685384113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,4,128,0,1,fp8,fp8,0,16.55664571126302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,128,0,1,fp8,fp8,0,8.78397305806478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,float16,0,12.533701578776041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,fp8,0,12.642597198486328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,128,0,1,fp8,fp8,0,16.564917246500652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,128,0,1,float16,float16,0,12.235440572102865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,float16,0,24.503631591796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,128,0,1,fp8,fp8,0,8.213946660359701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,1,128,0,1,float16,fp8,0,12.07424545288086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,fp8,0,24.41077931722005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,128,0,1,fp8,fp8,0,8.405722935994467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,float16,0,12.26953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,fp8,0,12.138501485188803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,128,0,1,fp8,fp8,0,8.102869033813477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,float16,0,12.219322204589844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,fp8,0,12.197226206461588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,128,0,1,fp8,fp8,0,4.271055857340495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,fp8,0,6.399173100789388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,float16,0,6.5450185139973955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,128,0,1,fp8,fp8,0,8.746997197469076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,float16,0,12.214874267578125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,128,0,1,float16,float16,0,6.417834599812825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,fp8,0,12.502911885579428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,128,0,1,fp8,fp8,0,4.287530581156413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,1,128,0,1,float16,fp8,0,6.288560231526692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,128,0,1,fp8,fp8,0,4.178714752197266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,float16,0,6.223946889241536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,fp8,0,6.358042399088542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,128,0,1,fp8,fp8,0,4.14518388112386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,float16,0,6.0960744222005205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,fp8,0,6.4224802652994795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,128,0,1,fp8,fp8,0,2.219871997833252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,128,0,1,fp8,fp8,0,4.240410804748535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,float16,0,3.253573417663574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,fp8,0,3.274085362752279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,128,0,1,float16,float16,0,3.127706527709961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,float16,0,6.311370849609375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,fp8,0,6.369861602783203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,128,0,1,float16,fp8,0,3.147455851236979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,1,128,0,1,fp8,fp8,0,2.1112426122029624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,128,0,1,fp8,fp8,0,2.168922742207845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,float16,0,3.224405288696289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,fp8,0,3.188591957092285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,128,0,1,fp8,fp8,0,2.10972261428833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,float16,0,3.1663519541422525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,fp8,0,3.1538187662760415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,float16,0,3.2313919067382812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,128,0,1,fp8,fp8,0,2.153557300567627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,fp8,0,3.1794986724853516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,128,0,1,fp8,fp8,0,11.558607737223307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,128,0,1,fp8,fp8,0,11.799215952555338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,128,0,1,float16,float16,0,17.49182891845703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,1,128,0,1,float16,fp8,0,17.164042154947918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,float16,0,17.08668263753255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,fp8,0,17.226821899414062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,float16,0,17.095199584960938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,128,0,1,fp8,fp8,0,11.999098459879557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,128,0,1,fp8,fp8,0,6.295237223307292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,float16,0,9.261098861694336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,fp8,0,16.99182891845703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,fp8,0,9.499071756998697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,128,0,1,fp8,fp8,0,12.170997619628906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,float16,0,18.318213144938152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,128,0,1,float16,float16,0,9.132826487223307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,128,0,1,fp8,fp8,0,6.055306752522786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,fp8,0,17.688138326009113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,1,128,0,1,float16,fp8,0,9.120496114095053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,128,0,1,fp8,fp8,0,5.934549331665039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,float16,0,8.55186653137207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,fp8,0,8.8252321879069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,128,0,1,fp8,fp8,0,5.834357579549153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,float16,0,8.89138158162435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,128,0,1,fp8,fp8,0,5.861295700073242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,fp8,0,8.596746444702148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,float16,0,4.6362559000651045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,fp8,0,4.560442606608073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,40,128,0,1,fp8,fp8,0,3.1331841150919595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,float16,0,8.744474411010742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,fp8,0,8.677562713623047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,128,0,1,fp8,fp8,0,2.9414774576822915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,128,0,1,float16,float16,0,4.442768096923828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,1,128,0,1,float16,fp8,0,4.388954798380534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,128,0,1,fp8,fp8,0,2.948666572570801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,fp8,0,4.306741396586101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,float16,0,4.463290532430013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,float16,0,4.386746724446614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,fp8,0,4.35372257232666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,4,128,0,1,fp8,fp8,0,2.9510720570882163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,float16,0,2.2866506576538086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,128,0,1,fp8,fp8,0,2.985504150390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,fp8,0,2.258058706919352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,40,128,0,1,fp8,fp8,0,1.6351839701334636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,float16,0,4.454538663228353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,fp8,0,4.425696055094401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,128,0,1,float16,float16,0,2.194218635559082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,128,0,1,float16,fp8,0,2.125978628794352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,1,128,0,1,fp8,fp8,0,1.5622506141662598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,128,0,1,fp8,fp8,0,1.5396159489949544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,float16,0,2.1937813758850098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,fp8,0,2.190608024597168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,float16,0,2.27511994043986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,128,0,1,fp8,fp8,0,1.538570721944173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,fp8,0,2.177840073903402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,128,0,1,fp8,fp8,0,1.5384052594502766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,float16,0,2.1688586870829263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,fp8,0,2.197498639424642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,128,0,1,fp8,fp8,0,15.43393580118815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,128,0,1,fp8,fp8,0,15.475770314534506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,128,0,1,float16,float16,0,22.38269805908203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,1,128,0,1,float16,fp8,0,22.11608123779297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,float16,0,22.523518880208332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,fp8,0,22.872586568196613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,float16,0,22.79863993326823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,fp8,0,22.78135935465495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,4,128,0,1,fp8,fp8,0,16.65285364786784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,128,0,1,fp8,fp8,0,8.68286387125651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,float16,0,12.602250417073568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,fp8,0,12.53768539428711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,128,0,1,float16,float16,0,12.203866322835287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,128,0,1,fp8,fp8,0,15.437739054361979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,float16,0,23.29229227701823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,128,0,1,fp8,fp8,0,7.73197873433431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,1,128,0,1,float16,fp8,0,11.495290120442709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,fp8,0,22.461903889973957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,128,0,1,fp8,fp8,0,7.738901138305664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,float16,0,11.23751449584961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,fp8,0,11.263652801513672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,128,0,1,fp8,fp8,0,7.744981129964192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,float16,0,11.190906524658203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,fp8,0,11.357402801513672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,128,0,1,fp8,fp8,0,7.783589045206706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,float16,0,5.837429046630859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,128,0,1,fp8,fp8,0,4.0335038503011065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,fp8,0,5.888800303141276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,128,0,1,float16,float16,0,5.633391698201497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,float16,0,11.595381418863932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,fp8,0,11.4082400004069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,128,0,1,fp8,fp8,0,3.774794578552246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,1,128,0,1,float16,fp8,0,5.726624170939128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,128,0,1,fp8,fp8,0,3.8501014709472656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,float16,0,5.600144068400065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,fp8,0,5.63587760925293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,128,0,1,fp8,fp8,0,3.8556321461995444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,float16,0,5.706469217936198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,fp8,0,5.771018981933594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,float16,0,2.9408321380615234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,128,0,1,fp8,fp8,0,3.902970631917318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,128,0,1,fp8,fp8,0,2.082058588663737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,fp8,0,2.945178667704264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,float16,0,5.740336100260417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,fp8,0,5.772970835367839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,128,0,1,float16,float16,0,2.9127413431803384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,128,0,1,float16,fp8,0,3.0592692693074546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,1,128,0,1,fp8,fp8,0,1.9239253997802734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,128,0,1,fp8,fp8,0,1.9437813758850098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,float16,0,2.670741399129232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,fp8,0,2.847306569417318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,float16,0,2.9517494837443032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,128,0,1,fp8,fp8,0,1.9768320719401042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,fp8,0,2.8808107376098633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,float16,0,2.878170649210612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,float16,0,1.4827946027119954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,128,0,1,fp8,fp8,0,1.9529493649800618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,fp8,0,1.5405920346577961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,40,128,0,1,fp8,fp8,0,1.10153063138326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,fp8,0,2.9494508107503257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,128,0,1,float16,float16,0,1.4993492762247722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,128,0,1,fp8,fp8,0,1.0566240151723225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,1,128,0,1,float16,fp8,0,1.4393919308980305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,float16,0,1.4367200533548992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,128,0,1,fp8,fp8,0,1.0508906841278076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,fp8,0,1.4480373064676921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,128,0,1,fp8,fp8,0,1.0525226593017578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,float16,0,1.443269411722819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,fp8,0,1.431045373280843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,float16,0,1.4417920112609863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,128,0,1,fp8,fp8,0,1.053978681564331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,fp8,0,1.4480640093485515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,128,0,1,fp8,fp8,0,9.094218571980795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,128,0,1,fp8,fp8,0,9.042149225870768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,128,0,1,float16,float16,0,13.319562276204428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,1,128,0,1,float16,fp8,0,13.154555002848307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,float16,0,13.192005157470703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,fp8,0,13.112767537434896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,fp8,0,13.1910769144694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,float16,0,13.340277353922525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,4,128,0,1,fp8,fp8,0,9.129082361857096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,128,0,1,fp8,fp8,0,4.854325294494629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,float16,0,6.965936024983724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,fp8,0,6.786122639973958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,128,0,1,float16,float16,0,6.706357320149739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,128,0,1,fp8,fp8,0,9.189226786295572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,float16,0,13.210197448730469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,128,0,1,fp8,fp8,0,4.51857058207194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,1,128,0,1,float16,fp8,0,6.554704030354817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,128,0,1,fp8,fp8,0,4.6168212890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,fp8,0,13.756239573160807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,float16,0,6.676490783691406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,fp8,0,6.497669219970703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,128,0,1,fp8,fp8,0,4.54965337117513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,float16,0,7.065637588500977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,fp8,0,6.992757161458333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,float16,0,3.801637331644694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,fp8,0,3.7589600880940757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,128,0,1,fp8,fp8,0,4.924703915913899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,40,128,0,1,fp8,fp8,0,2.5827199618021646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,float16,0,7.536261240641276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,128,0,1,float16,float16,0,3.449295997619629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,fp8,0,7.487765630086263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,128,0,1,float16,fp8,0,3.4572267532348633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,1,128,0,1,fp8,fp8,0,2.2926559448242188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,128,0,1,fp8,fp8,0,2.240608056386312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,float16,0,3.3050559361775718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,fp8,0,3.350858688354492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,float16,0,3.3032639821370444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,128,0,1,fp8,fp8,0,2.283141295115153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,fp8,0,3.278623898824056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,float16,0,1.6739253997802734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,float16,0,3.3613974253336587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,128,0,1,fp8,fp8,0,2.2999679247538247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,128,0,1,fp8,fp8,0,1.2830026944478352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,fp8,0,1.683274745941162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,fp8,0,3.3751840591430664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,128,0,1,float16,float16,0,1.6620480219523113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,128,0,1,fp8,fp8,0,1.1924106280008953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,1,128,0,1,float16,fp8,0,1.643514633178711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,128,0,1,fp8,fp8,0,1.1746559937795003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,float16,0,1.6148266792297363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,fp8,0,1.654607931772868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,float16,0,1.6947253545125325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,128,0,1,fp8,fp8,0,1.1796159744262695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,fp8,0,1.617301305135091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,float16,0,1.597285270690918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,128,0,1,fp8,fp8,0,1.1853439807891846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,fp8,0,1.6161173184712727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,float16,0,0.8967626889546713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,128,0,1,fp8,fp8,0,0.68504532178243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,128,0,1,float16,fp8,0,0.8965226809183756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,fp8,0,0.9214826424916586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,128,0,1,float16,float16,0,0.8803306420644125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,1,128,0,1,fp8,fp8,0,0.629045327504476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,float16,0,0.8731359640757242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,128,0,1,fp8,fp8,0,0.6290239890416464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,fp8,0,0.8808693091074625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,float16,0,0.8825333118438721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,fp8,0,0.8819999694824219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,4,128,0,1,fp8,fp8,0,0.6311839818954468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,float16,0,0.8831199804941813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,fp8,0,0.8822666803995768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,40,8,128,0,1,fp8,fp8,0,0.6342879931131998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,128,0,1,fp8,fp8,0,9.040741602579752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,128,0,1,fp8,fp8,0,8.971888224283854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,128,0,1,float16,float16,0,12.625391642252604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,1,128,0,1,float16,fp8,0,12.621200561523438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,float16,0,12.60711415608724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,fp8,0,12.907669067382812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,float16,0,12.580448150634766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,128,0,1,fp8,fp8,0,4.885562578837077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,128,0,1,fp8,fp8,0,8.938496271769205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,float16,0,6.6208852132161455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,fp8,0,6.857061386108398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,128,0,1,fp8,fp8,0,9.015381495157877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,fp8,0,12.844987233479818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,float16,0,12.886496225992838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,fp8,0,12.791770935058594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,128,0,1,fp8,fp8,0,4.457824071248372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,128,0,1,float16,float16,0,6.3061173756917315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,128,0,1,fp8,fp8,0,4.561360041300456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,1,128,0,1,float16,fp8,0,6.273445129394531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,float16,0,6.4460798899332685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,fp8,0,6.278479894002278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,float16,0,6.330085118611653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,128,0,1,fp8,fp8,0,4.518426577250163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,fp8,0,7.348778406778972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,float16,0,3.652768135070801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,fp8,0,3.6060158411661782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,128,0,1,fp8,fp8,0,4.5597333908081055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,40,128,0,1,fp8,fp8,0,2.4112745920817056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,float16,0,6.979728062947591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,fp8,0,7.163583755493164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,128,0,1,float16,float16,0,3.182799975077311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,128,0,1,fp8,fp8,0,2.265519936879476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,1,128,0,1,float16,fp8,0,3.1374505360921225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,float16,0,3.1583681106567383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,128,0,1,fp8,fp8,0,2.201466719309489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,fp8,0,3.105034510294596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,128,0,1,fp8,fp8,0,2.218170642852783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,float16,0,3.0564371744791665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,fp8,0,3.1468000411987305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,float16,0,1.614202658335368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,128,0,1,fp8,fp8,0,2.253712018330892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,fp8,0,1.6299039522806804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,float16,0,3.1551574071248374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,40,128,0,1,fp8,fp8,0,1.2352906862894695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,fp8,0,3.145930608113607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,128,0,1,float16,float16,0,1.5253225962320964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,128,0,1,fp8,fp8,0,1.139024019241333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,1,128,0,1,float16,fp8,0,1.566757361094157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,float16,0,1.524720033009847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,fp8,0,1.5146026611328125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,2,128,0,1,fp8,fp8,0,1.1788907051086426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,float16,0,1.5020532608032227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,128,0,1,fp8,fp8,0,1.1370986302693684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,fp8,0,1.5320960680643718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,float16,0,1.5187093416849773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,fp8,0,1.5309333801269531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,40,8,128,0,1,fp8,fp8,0,1.1483306884765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,float16,0,0.8302559852600098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,fp8,0,0.8492373625437418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,40,128,0,1,fp8,fp8,0,0.6704266866048177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,128,0,1,float16,float16,0,0.8006506760915121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,128,0,1,float16,fp8,0,0.8073333104451498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,128,0,1,fp8,fp8,0,0.6166719992955526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,1,128,0,1,fp8,fp8,0,0.6137226819992065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,float16,0,0.8077600002288818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,float16,0,0.8109707037607828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,128,0,1,fp8,fp8,0,0.623631993929545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,fp8,0,0.8119573593139648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,fp8,0,0.8078560034434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,float16,0,0.8071839809417725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,fp8,0,0.8127733071645101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,40,4,128,0,1,fp8,fp8,0,0.6183679898579916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,128,0,1,float16,fp8,0,0.4579999844233195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,float16,0,0.4644320011138916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,float16,0,0.469925324122111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,fp8,0,0.47780267397562665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,40,128,0,1,fp8,fp8,0,0.3605653444925944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,128,0,1,float16,float16,0,0.460752010345459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,1,128,0,1,fp8,fp8,0,0.3364479939142863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,fp8,0,0.4589386781056722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,2,128,0,1,fp8,fp8,0,0.3357119957605998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,float16,0,0.45850666364034015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,fp8,0,0.46193599700927734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,4,128,0,1,fp8,fp8,0,0.3354880015055339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,float16,0,0.458842674891154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,fp8,0,0.45980266729990643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,40,8,128,0,1,fp8,fp8,0,0.3377813498179118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,128,0,1,fp8,fp8,0,5.508277257283528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,128,0,1,float16,float16,0,7.636245091756185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,128,0,1,fp8,fp8,0,5.628943761189778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,1,128,0,1,float16,fp8,0,7.654703776041667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,float16,0,7.716469446818034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,fp8,0,7.679824193318685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,float16,0,7.612341562906901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,fp8,0,7.83131726582845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,4,128,0,1,fp8,fp8,0,5.621461232503255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,float16,0,4.087375958760579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,128,0,1,fp8,fp8,0,2.9788853327433267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,fp8,0,4.136453310648601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,128,0,1,fp8,fp8,0,5.538031895955403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,128,0,1,float16,float16,0,3.8514719009399414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,float16,0,7.693994522094727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,fp8,0,7.7772477467854815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,128,0,1,fp8,fp8,0,2.720730781555176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,1,128,0,1,float16,fp8,0,3.8067092895507812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,128,0,1,fp8,fp8,0,2.796485265096029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,float16,0,3.7529493967692056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,fp8,0,3.6069119771321616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,float16,0,3.772869427998861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,fp8,0,3.7098185221354165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,4,128,0,1,fp8,fp8,0,2.7473227183024087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,float16,0,1.9319574038187664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,128,0,1,fp8,fp8,0,2.768810590108236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,float16,0,3.870730717976888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,128,0,1,fp8,fp8,0,1.537541389465332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,fp8,0,1.9645759264628093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,fp8,0,3.9186134338378906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,128,0,1,float16,float16,0,1.8440106709798176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,128,0,1,float16,fp8,0,1.8496960004170735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,1,128,0,1,fp8,fp8,0,1.3688693046569824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,float16,0,1.7753760019938152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,128,0,1,fp8,fp8,0,1.3660799662272136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,fp8,0,1.856112003326416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,float16,0,1.80950927734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,128,0,1,fp8,fp8,0,1.3807786305745442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,fp8,0,1.8173920313517253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,float16,0,1.8046560287475586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,float16,0,0.9919093449910482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,128,0,1,fp8,fp8,0,1.396176020304362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,fp8,0,1.8722880681355794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,fp8,0,1.0068639914194744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,40,128,0,1,fp8,fp8,0,0.8162240187327067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,128,0,1,float16,float16,0,0.9298506577809652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,128,0,1,float16,fp8,0,0.9434879620869955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,1,128,0,1,fp8,fp8,0,0.7452053229014078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,float16,0,0.9308373133341471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,fp8,0,0.9336480299631754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,2,128,0,1,fp8,fp8,0,0.7238879998524984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,float16,0,0.9374667008717855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,fp8,0,0.9369440078735352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,4,128,0,1,fp8,fp8,0,0.7282826900482178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,float16,0,0.9397493203481039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,128,0,1,fp8,fp8,0,0.43008001645406085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,fp8,0,0.9461440245310465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,128,0,1,float16,float16,0,0.5068426529566447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,float16,0,0.5278079907099406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,40,8,128,0,1,fp8,fp8,0,0.7347413698832194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,fp8,0,0.5383466482162476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,128,0,1,fp8,fp8,0,0.3839413324991862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,1,128,0,1,float16,fp8,0,0.5117013454437256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,float16,0,0.5104000171025594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,128,0,1,fp8,fp8,0,0.3858720064163208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,fp8,0,0.5150826772054037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,float16,0,0.5102560122807821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,fp8,0,0.5110346476236979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,4,128,0,1,fp8,fp8,0,0.3861173391342163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,float16,0,0.5164053440093994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,fp8,0,0.5157066583633423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,40,8,128,0,1,fp8,fp8,0,0.38995734850565594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,float16,0,0.2874720096588135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,fp8,0,0.2943466703097026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,40,128,0,1,fp8,fp8,0,0.24126400550206503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,128,0,1,float16,float16,0,0.2725813388824463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,128,0,1,float16,fp8,0,0.27585599819819134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,1,128,0,1,fp8,fp8,0,0.22380266586939493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,float16,0,0.2709386746088664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,fp8,0,0.2757546703020732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,2,128,0,1,fp8,fp8,0,0.22376000881195068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,float16,0,0.2749920090039571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,fp8,0,0.2743893265724182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,4,128,0,1,fp8,fp8,0,0.2237066626548767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,float16,0,0.2732959985733032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,fp8,0,0.27851200103759766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,40,8,128,0,1,fp8,fp8,0,0.22340800364812216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,128,0,1,fp8,fp8,0,5.8633066813151045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,128,0,1,fp8,fp8,0,5.8913224538167315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,128,0,1,float16,float16,0,7.759093602498372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,1,128,0,1,float16,fp8,0,7.730826695760091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,float16,0,7.749754587809245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,fp8,0,7.747274398803711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,float16,0,7.869269053141276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,fp8,0,7.736975987752278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,4,128,0,1,fp8,fp8,0,5.921114603678386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,float16,0,4.226757367451985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,fp8,0,4.244415918986003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,40,128,0,1,fp8,fp8,0,3.3713547388712564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,128,0,1,float16,float16,0,3.7169440587361655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,128,0,1,fp8,fp8,0,6.048543930053711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,float16,0,7.848042805989583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,fp8,0,7.967621485392253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,128,0,1,fp8,fp8,0,2.93559996287028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,1,128,0,1,float16,fp8,0,3.7758665084838867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,128,0,1,fp8,fp8,0,2.8926773071289062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,float16,0,3.8856000900268555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,fp8,0,3.882848103841146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,float16,0,3.825968106587728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,128,0,1,fp8,fp8,0,2.9614454905192056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,fp8,0,3.89575990041097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,float16,0,2.051647981007894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,float16,0,3.8609491984049478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,128,0,1,fp8,fp8,0,2.9785439173380532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,fp8,0,2.0357866287231445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,fp8,0,3.8812853495279946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,40,128,0,1,fp8,fp8,0,1.643162727355957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,128,0,1,float16,float16,0,1.8276106516520183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,128,0,1,fp8,fp8,0,1.48416535059611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,1,128,0,1,float16,fp8,0,1.816383997599284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,float16,0,1.7942132949829102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,128,0,1,fp8,fp8,0,1.4679199854532878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,fp8,0,1.820133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,float16,0,1.8518293698628743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,fp8,0,1.816442648569743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,4,128,0,1,fp8,fp8,0,1.4761759440104167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,float16,0,1.8682773907979329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,fp8,0,1.8443573315938313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,float16,0,1.0187573432922363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,40,8,128,0,1,fp8,fp8,0,1.4860639572143555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,fp8,0,1.0285600026448567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,40,128,0,1,fp8,fp8,0,0.8568373521169027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,128,0,1,float16,float16,0,0.9255092938741049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,128,0,1,float16,fp8,0,0.9436000188191732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,1,128,0,1,fp8,fp8,0,0.7694506645202637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,float16,0,0.9385333061218262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,fp8,0,0.9349973201751709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,2,128,0,1,fp8,fp8,0,0.7831146717071533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,float16,0,0.9344639778137207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,fp8,0,0.9380213419596354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,fp8,0,0.943674643834432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,4,128,0,1,fp8,fp8,0,0.7608746687571207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,float16,0,0.9345920085906982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,float16,0,0.5283786853154501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,40,8,128,0,1,fp8,fp8,0,0.7768800258636475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,fp8,0,0.5391039848327637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,40,128,0,1,fp8,fp8,0,0.45476798216501874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,128,0,1,float16,float16,0,0.49219731489817303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,fp8,0,0.5015146732330322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,128,0,1,float16,fp8,0,0.4960586627324422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,1,128,0,1,fp8,fp8,0,0.4053013324737549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,float16,0,0.49582401911417645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,2,128,0,1,fp8,fp8,0,0.4073493480682373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,float16,0,0.49717334906260174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,fp8,0,0.49988798300425213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,4,128,0,1,fp8,fp8,0,0.41316266854604083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,float16,0,0.5006986856460571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,fp8,0,0.5049706697463989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,40,8,128,0,1,fp8,fp8,0,0.41409067312876385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,float16,0,0.2905813256899516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,fp8,0,0.29953600962956745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,40,128,0,1,fp8,fp8,0,0.24664533138275146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,128,0,1,float16,float16,0,0.2791999975840251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,128,0,1,float16,fp8,0,0.2791840036710103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,1,128,0,1,fp8,fp8,0,0.21694932381312051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,float16,0,0.27801599105199176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,fp8,0,0.2823946674664815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,2,128,0,1,fp8,fp8,0,0.21709867318471274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,128,0,1,fp8,fp8,0,0.22155199448267618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,float16,0,0.16174933314323425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,fp8,0,0.16381866733233133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,float16,0,0.2810080051422119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,40,128,0,1,fp8,fp8,0,0.142384002606074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,128,0,1,float16,fp8,0,0.15366400281588236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,fp8,0,0.28277333577473956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,4,128,0,1,fp8,fp8,0,0.21709332863489786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,float16,0,0.28014934062957764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,fp8,0,0.28441067536671955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,128,0,1,float16,float16,0,0.15331733226776123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,fp8,0,0.1553439994653066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,1,128,0,1,fp8,fp8,0,0.1288053294022878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,float16,0,0.1530933380126953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,fp8,0,0.15334933002789816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,2,128,0,1,fp8,fp8,0,0.12893866499265036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,float16,0,0.1530133287111918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,4,128,0,1,fp8,fp8,0,0.13130133350690207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,float16,0,0.15478400389353433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,fp8,0,0.1563093364238739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,40,8,128,0,1,fp8,fp8,0,0.13225600123405457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,128,0,1,fp8,fp8,0,3.823376019795736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,128,0,1,float16,float16,0,4.814181327819824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,1,128,0,1,float16,fp8,0,4.743029276529948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,128,0,1,fp8,fp8,0,3.870944023132324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,float16,0,4.862511952718099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,fp8,0,4.727866808573405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,float16,0,4.860325177510579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,fp8,0,4.8837385177612305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,float16,0,2.6136533419291177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,4,128,0,1,fp8,fp8,0,3.925333340962728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,fp8,0,2.6130453745524087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,40,128,0,1,fp8,fp8,0,2.2303466796875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,float16,0,4.922560056050618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,128,0,1,float16,float16,0,2.3697546323140464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,128,0,1,fp8,fp8,0,3.9627040227254233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,fp8,0,4.975786526997884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,128,0,1,fp8,fp8,0,1.929632027943929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,1,128,0,1,float16,fp8,0,2.38265593846639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,float16,0,2.3121652603149414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,128,0,1,fp8,fp8,0,1.9729280471801758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,fp8,0,2.3570079803466797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,float16,0,2.389402707417806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,fp8,0,2.305386702219645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,4,128,0,1,fp8,fp8,0,1.951365311940511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,float16,0,2.3848160107930503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,128,0,1,fp8,fp8,0,1.9841972986857097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,fp8,0,2.3659733136494956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,float16,0,1.2852853139241536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,128,0,1,fp8,fp8,0,1.132794698079427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,fp8,0,1.3123359680175781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,128,0,1,float16,float16,0,1.1494399706522624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,128,0,1,float16,fp8,0,1.1660053730010986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,1,128,0,1,fp8,fp8,0,0.9815519650777181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,float16,0,1.1589226722717285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,fp8,0,1.175317366917928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,2,128,0,1,fp8,fp8,0,0.9832853476206461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,fp8,0,1.1660959720611572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,float16,0,1.1679573059082031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,4,128,0,1,fp8,fp8,0,0.9912213484446207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,float16,0,1.1682240168253581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,fp8,0,1.1795093218485515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,40,8,128,0,1,fp8,fp8,0,1.0181653499603271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,float16,0,0.662389318148295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,fp8,0,0.6677119731903076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,40,128,0,1,fp8,fp8,0,0.5855786800384521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,128,0,1,float16,float16,0,0.5990560054779053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,128,0,1,float16,fp8,0,0.6086399952570597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,1,128,0,1,fp8,fp8,0,0.5135466655095419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,float16,0,0.5999626715977987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,fp8,0,0.6075733502705892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,2,128,0,1,fp8,fp8,0,0.5193653504053751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,float16,0,0.6034133434295654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,fp8,0,0.6099466482798258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,4,128,0,1,fp8,fp8,0,0.5204586585362753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,float16,0,0.6093066533406576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,fp8,0,0.6210240125656128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,128,0,1,float16,float16,0,0.32638933261235553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,40,8,128,0,1,fp8,fp8,0,0.5270666678746542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,float16,0,0.3476746479670207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,128,0,1,fp8,fp8,0,0.3119093378384908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,fp8,0,0.3564480145772298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,128,0,1,float16,fp8,0,0.32850666840871173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,1,128,0,1,fp8,fp8,0,0.26390933990478516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,float16,0,0.3285920023918152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,fp8,0,0.3304160038630168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,2,128,0,1,fp8,fp8,0,0.26480533679326373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,float16,0,0.3240426580111186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,fp8,0,0.33055466413497925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,4,128,0,1,fp8,fp8,0,0.2661813298861186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,float16,0,0.32943467299143475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,fp8,0,0.33432531356811523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,40,8,128,0,1,fp8,fp8,0,0.27190933624903363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,float16,0,0.19588800271352133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,fp8,0,0.2004106640815735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,40,128,0,1,fp8,fp8,0,0.1699733336766561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,128,0,1,float16,float16,0,0.17780800660451254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,float16,0,0.17997866868972778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,128,0,1,float16,fp8,0,0.17918932437896729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,1,128,0,1,fp8,fp8,0,0.15186132987340292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,float16,0,0.17861332496007284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,fp8,0,0.17919466892878214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,2,128,0,1,fp8,fp8,0,0.14970133701960245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,float16,0,0.1106773316860199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,fp8,0,0.17993066708246866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,4,128,0,1,fp8,fp8,0,0.15173332889874777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,float16,0,0.18106132745742798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,fp8,0,0.18242132663726807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,40,8,128,0,1,fp8,fp8,0,0.15377599994341531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,fp8,0,0.11401066184043884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,40,128,0,1,fp8,fp8,0,0.1027786632378896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,128,0,1,float16,float16,0,0.10518933335940044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,128,0,1,float16,fp8,0,0.10588266452153523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,1,128,0,1,fp8,fp8,0,0.09246933460235596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,128,0,1,fp8,fp8,0,0.09344533085823059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,float16,0,0.10592533151308696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,fp8,0,0.10681600371996562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,128,0,1,fp8,fp8,0,0.09636267026265462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,2,128,0,1,fp8,fp8,0,0.0928053359190623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,float16,0,0.10501333077748616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,fp8,0,0.10635200142860413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,float16,0,0.10598933696746826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,fp8,0,0.10729599992434184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,128,0,1,float16,float16,0,4.882944107055664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,128,0,1,fp8,fp8,0,4.233759880065918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,1,128,0,1,float16,fp8,0,4.8577226003011065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,float16,0,4.956128120422363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,128,0,1,fp8,fp8,0,4.264410654703776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,fp8,0,4.980650583902995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,float16,0,5.1477813720703125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,fp8,0,5.216245333353679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,float16,0,2.907109260559082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,fp8,0,2.8396533330281577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,4,128,0,1,fp8,fp8,0,4.344901402791341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,40,128,0,1,fp8,fp8,0,2.4742132822672525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,128,0,1,float16,float16,0,2.4351679484049478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,float16,0,5.256074587504069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,128,0,1,fp8,fp8,0,4.413018544514974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,fp8,0,5.209360122680664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,128,0,1,fp8,fp8,0,2.15885337193807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,1,128,0,1,float16,fp8,0,2.42411740620931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,float16,0,2.4524265925089517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,fp8,0,2.383024056752523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,2,128,0,1,fp8,fp8,0,2.1391199429829917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,float16,0,2.447381337483724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,fp8,0,2.413877328236898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,4,128,0,1,fp8,fp8,0,2.174762725830078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,float16,0,1.4331893920898438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,float16,0,2.581450621287028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,fp8,0,1.4029812812805176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,40,128,0,1,fp8,fp8,0,1.2477013270060222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,128,0,1,float16,float16,0,1.1971360047658284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,128,0,1,fp8,fp8,0,2.210533301035563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,fp8,0,2.5584746996561685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,128,0,1,float16,fp8,0,1.1994933287302654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,1,128,0,1,fp8,fp8,0,1.0663572947184246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,float16,0,1.2038666407267253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,fp8,0,1.2049439748128254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,2,128,0,1,fp8,fp8,0,1.0684160391489665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,float16,0,1.2105333010355632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,128,0,1,fp8,fp8,0,1.0903200308481853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,fp8,0,1.2124053637186687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,float16,0,1.2462560335795085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,fp8,0,1.2541440327962239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,float16,0,0.7237813472747803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,40,8,128,0,1,fp8,fp8,0,1.1051733493804932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,fp8,0,0.705514669418335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,40,128,0,1,fp8,fp8,0,0.6273226737976074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,128,0,1,float16,float16,0,0.6097493171691895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,128,0,1,float16,fp8,0,0.6128640174865723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,1,128,0,1,fp8,fp8,0,0.5318133433659872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,float16,0,0.6111466487248739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,fp8,0,0.6149119933446249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,2,128,0,1,fp8,fp8,0,0.5319519837697347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,float16,0,0.6146453221638998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,fp8,0,0.6202826499938965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,4,128,0,1,fp8,fp8,0,0.55458664894104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,float16,0,0.6308693488438925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,128,0,1,fp8,fp8,0,0.5528213183085123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,float16,0,0.37569598356882733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,fp8,0,0.6273920138676962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,fp8,0,0.3657919963200887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,float16,0,0.3171306649843852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,40,128,0,1,fp8,fp8,0,0.3213546673456828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,128,0,1,float16,float16,0,0.3161333401997884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,128,0,1,float16,fp8,0,0.3178986708323161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,1,128,0,1,fp8,fp8,0,0.2780639926592509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,fp8,0,0.32081600030263263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,2,128,0,1,fp8,fp8,0,0.27802133560180664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,float16,0,0.320853332678477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,fp8,0,0.3229920069376628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,4,128,0,1,fp8,fp8,0,0.28428266445795697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,float16,0,0.3270240028699239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,fp8,0,0.32545600334803265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,40,8,128,0,1,fp8,fp8,0,0.28437334299087524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,float16,0,0.20149334271748862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,fp8,0,0.19728533426920572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,40,128,0,1,fp8,fp8,0,0.16029333074887595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,128,0,1,float16,float16,0,0.1710666616757711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,128,0,1,float16,fp8,0,0.1715573271115621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,1,128,0,1,fp8,fp8,0,0.13449600338935852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,float16,0,0.1721173326174418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,fp8,0,0.17384000619252524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,2,128,0,1,fp8,fp8,0,0.13607466220855713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,float16,0,0.17381866772969565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,fp8,0,0.1737013260523478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,4,128,0,1,fp8,fp8,0,0.1384266714255015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,float16,0,0.17574399709701538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,fp8,0,0.17633599042892456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,128,0,1,float16,fp8,0,0.09075199564297994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,40,8,128,0,1,fp8,fp8,0,0.1399626632531484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,float16,0,0.10765332976977031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,fp8,0,0.10642133156458537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,40,128,0,1,fp8,fp8,0,0.08954133590062459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,float16,0,0.09197866916656494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,128,0,1,float16,float16,0,0.08865599830945332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,1,128,0,1,fp8,fp8,0,0.07521600027879079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,float16,0,0.09052266677220662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,fp8,0,0.09143466750780742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,128,0,1,fp8,fp8,0,0.07925866544246674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,float16,0,0.059952000776926674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,2,128,0,1,fp8,fp8,0,0.07635200023651123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,fp8,0,0.09155733386675517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,4,128,0,1,fp8,fp8,0,0.07788800199826558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,float16,0,0.09317866961161296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,fp8,0,0.09259200096130371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,fp8,0,0.06003733476003011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,fp8,0,0.05215999980767568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,40,128,0,1,fp8,fp8,0,0.05279466509819031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,128,0,1,float16,float16,0,0.05186666548252106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,128,0,1,float16,fp8,0,0.052245333790779114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,1,128,0,1,fp8,fp8,0,0.044549331068992615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,float16,0,0.05287466446558634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,2,128,0,1,fp8,fp8,0,0.044735997915267944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,float16,0,0.0528053343296051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,fp8,0,0.05297600229581197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,4,128,0,1,fp8,fp8,0,0.046154667933781944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,float16,0,0.053264002005259194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,fp8,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,40,8,128,0,1,fp8,fp8,0,0.04820266862710317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,1,128,0,1,float16,float16,0,3.717733383178711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,1,128,0,1,fp8,fp8,0,3.4160000483194985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,1,128,0,1,float16,fp8,0,3.719034512837728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,2,128,0,1,float16,float16,0,3.7203680674235025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,2,128,0,1,fp8,fp8,0,3.457200050354004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,2,128,0,1,float16,fp8,0,3.7778027852376304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,4,128,0,1,float16,float16,0,3.802618662516276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,4,128,0,1,float16,fp8,0,3.8544108072916665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,4,128,0,1,fp8,fp8,0,3.5029118855794272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,40,128,0,1,float16,float16,0,2.333199977874756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,40,128,0,1,float16,fp8,0,2.2892533938090005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,40,128,0,1,fp8,fp8,0,2.069706598917643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,8,128,0,1,fp8,fp8,0,3.5752105712890625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,8,128,0,1,float16,fp8,0,3.964069366455078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,40,8,128,0,1,float16,float16,0,3.9858665466308594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,1,128,0,1,float16,float16,0,1.832896073659261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,1,128,0,1,float16,fp8,0,1.8410879770914714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,1,128,0,1,fp8,fp8,0,1.748586654663086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,2,128,0,1,float16,float16,0,1.8593974113464355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,2,128,0,1,float16,fp8,0,1.8397812843322754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,2,128,0,1,fp8,fp8,0,1.726245403289795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,4,128,0,1,float16,float16,0,1.8726773262023926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,4,128,0,1,float16,fp8,0,1.8772106170654297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,4,128,0,1,fp8,fp8,0,1.7475147247314453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,40,128,0,1,float16,float16,0,1.1541706720987956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,8,128,0,1,float16,float16,0,1.9942399660746257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,8,128,0,1,float16,fp8,0,1.9890666007995605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,40,8,128,0,1,fp8,fp8,0,1.8046560287475586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,40,128,0,1,float16,fp8,0,1.1298186779022217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,1,128,0,1,float16,float16,0,0.9272053241729736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,40,128,0,1,fp8,fp8,0,1.040981372197469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,1,128,0,1,float16,fp8,0,0.9277919928232828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,1,128,0,1,fp8,fp8,0,0.8309973080952963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,2,128,0,1,float16,float16,0,0.936959981918335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,4,128,0,1,float16,fp8,0,0.9405653476715088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,2,128,0,1,float16,fp8,0,0.9409386316935221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,2,128,0,1,fp8,fp8,0,0.8582506974538168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,8,128,0,1,float16,float16,0,0.966917355855306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,4,128,0,1,float16,float16,0,0.9449493090311686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,4,128,0,1,fp8,fp8,0,0.8867733478546143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,8,128,0,1,float16,fp8,0,0.9911519686381022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,40,128,0,1,float16,float16,0,0.5889546473821005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,40,8,128,0,1,fp8,fp8,0,0.9007519880930582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,40,128,0,1,float16,fp8,0,0.5755360126495361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,40,128,0,1,fp8,fp8,0,0.5212426582972208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,1,128,0,1,float16,float16,0,0.47091201941172284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,1,128,0,1,float16,fp8,0,0.47073598702748615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,1,128,0,1,fp8,fp8,0,0.42420268058776855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,2,128,0,1,float16,float16,0,0.47415467103322345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,2,128,0,1,float16,fp8,0,0.47352532545725506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,2,128,0,1,fp8,fp8,0,0.4328320026397705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,4,128,0,1,float16,float16,0,0.4799199899037679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,4,128,0,1,float16,fp8,0,0.4817440112431844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,4,128,0,1,fp8,fp8,0,0.4368853171666463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,8,128,0,1,float16,float16,0,0.48818135261535645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,8,128,0,1,float16,fp8,0,0.4891680081685384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,40,8,128,0,1,fp8,fp8,0,0.44802133242289227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,40,128,0,1,float16,float16,0,0.30587732791900635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,40,128,0,1,float16,fp8,0,0.29974933465321857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,40,128,0,1,fp8,fp8,0,0.26844267050425213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,1,128,0,1,float16,float16,0,0.24544533093770346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,1,128,0,1,float16,fp8,0,0.24514132738113403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,1,128,0,1,fp8,fp8,0,0.22090667486190796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,2,128,0,1,float16,float16,0,0.24928534030914307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,2,128,0,1,float16,fp8,0,0.24755199750264487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,2,128,0,1,fp8,fp8,0,0.22387200593948364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,4,128,0,1,float16,float16,0,0.2524213393529256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,4,128,0,1,float16,fp8,0,0.2536853353182475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,4,128,0,1,fp8,fp8,0,0.22758400440216064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,8,128,0,1,float16,float16,0,0.2558986743291219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,8,128,0,1,float16,fp8,0,0.254202663898468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,40,8,128,0,1,fp8,fp8,0,0.22993600368499756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,40,128,0,1,float16,float16,0,0.16354667147000632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,40,128,0,1,float16,fp8,0,0.1604586640993754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,40,128,0,1,fp8,fp8,0,0.13707199692726135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,1,128,0,1,float16,float16,0,0.13319999972979227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,1,128,0,1,float16,fp8,0,0.1339040001233419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,4,128,0,1,float16,fp8,0,0.1362773378690084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,1,128,0,1,fp8,fp8,0,0.11338667074839275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,2,128,0,1,float16,float16,0,0.1334933340549469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,2,128,0,1,float16,fp8,0,0.13562666376431784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,2,128,0,1,fp8,fp8,0,0.11430933078130086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,4,128,0,1,float16,float16,0,0.13573333621025085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,4,128,0,1,fp8,fp8,0,0.11512000362078349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,8,128,0,1,float16,float16,0,0.13725866874059042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,8,128,0,1,float16,fp8,0,0.13723733027776083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,40,8,128,0,1,fp8,fp8,0,0.1179093321164449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,40,128,0,1,float16,float16,0,0.09269866347312927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,40,128,0,1,float16,fp8,0,0.09026133020718892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,40,128,0,1,fp8,fp8,0,0.07769600053628285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,1,128,0,1,float16,float16,0,0.07282666862010956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,1,128,0,1,float16,fp8,0,0.07449066638946533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,1,128,0,1,fp8,fp8,0,0.0637546678384145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,2,128,0,1,float16,float16,0,0.07409599920113881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,2,128,0,1,float16,fp8,0,0.07422933479150136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,2,128,0,1,fp8,fp8,0,0.0641546646753947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,4,128,0,1,float16,float16,0,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,4,128,0,1,float16,fp8,0,0.075013334552447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,4,128,0,1,fp8,fp8,0,0.06596266726652782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,8,128,0,1,float16,float16,0,0.07622933387756348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,8,128,0,1,float16,fp8,0,0.07665066421031952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,40,8,128,0,1,fp8,fp8,0,0.06769066552321117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,40,128,0,1,float16,float16,0,0.051728000243504844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,40,128,0,1,float16,fp8,0,0.050000001986821495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,40,128,0,1,fp8,fp8,0,0.04576000074545542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,1,128,0,1,float16,float16,0,0.04346133271853129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,1,128,0,1,float16,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,1,128,0,1,fp8,fp8,0,0.03748266647259394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,2,128,0,1,float16,float16,0,0.043151999513308205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,2,128,0,1,float16,fp8,0,0.04460800190766653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,2,128,0,1,fp8,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,4,128,0,1,float16,float16,0,0.04498666524887085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,8,128,0,1,fp8,fp8,0,0.042277331153551735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,4,128,0,1,float16,fp8,0,0.04475200176239014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,4,128,0,1,fp8,fp8,0,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,8,128,0,1,float16,float16,0,0.045141334335009255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,40,8,128,0,1,float16,fp8,0,0.044693330923716225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,40,128,0,1,float16,float16,0,0.030559999247392017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,40,128,0,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,40,128,0,1,fp8,fp8,0,0.029872000217437744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,1,128,0,1,float16,float16,0,0.028837333122889202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,2,128,0,1,fp8,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,1,128,0,1,float16,fp8,0,0.028565332293510437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,4,128,0,1,float16,fp8,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,1,128,0,1,fp8,fp8,0,0.02664533257484436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,8,128,0,1,float16,float16,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,2,128,0,1,float16,float16,0,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,2,128,0,1,float16,fp8,0,0.028938665986061096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,4,128,0,1,float16,float16,0,0.029152000943819683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,4,128,0,1,fp8,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,8,128,0,1,float16,fp8,0,0.030031998952229817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,40,8,128,0,1,fp8,fp8,0,0.02775999903678894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,1,128,0,1,float16,float16,0,1.5605279604593914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,1,128,0,1,float16,fp8,0,1.5625012715657551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,1,128,0,1,fp8,fp8,0,1.3467893600463867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,2,128,0,1,float16,float16,0,1.5710506439208984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,2,128,0,1,fp8,fp8,0,1.3769440650939941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,2,128,0,1,float16,fp8,0,1.5699626604715984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,4,128,0,1,float16,float16,0,1.594165325164795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,4,128,0,1,float16,fp8,0,1.5919520060221355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,4,128,0,1,fp8,fp8,0,1.4059359232584636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,8,128,0,1,float16,float16,0,1.719823996225993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,40,128,0,1,float16,float16,0,1.0185173352559407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,8,128,0,1,float16,fp8,0,1.7015199661254883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,40,128,0,1,float16,fp8,0,0.9886399904886881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,40,128,0,1,fp8,fp8,0,0.8723039627075195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,40,8,128,0,1,fp8,fp8,0,1.4424692789713542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,1,128,0,1,float16,float16,0,0.79258131980896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,1,128,0,1,float16,fp8,0,0.7933440208435059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,1,128,0,1,fp8,fp8,0,0.6600906848907471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,2,128,0,1,float16,float16,0,0.7952586809794108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,2,128,0,1,float16,fp8,0,0.7943039735158285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,2,128,0,1,fp8,fp8,0,0.6929492950439453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,4,128,0,1,float16,float16,0,0.8067146937052408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,4,128,0,1,float16,fp8,0,0.8038079738616943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,4,128,0,1,fp8,fp8,0,0.7054133415222168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,8,128,0,1,float16,float16,0,0.8546826839447021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,8,128,0,1,float16,fp8,0,0.851967970530192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,40,128,0,1,float16,float16,0,0.520853320757548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,40,8,128,0,1,fp8,fp8,0,0.7230292956034342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,40,128,0,1,float16,fp8,0,0.5049493312835693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,40,128,0,1,fp8,fp8,0,0.4370453357696533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,1,128,0,1,float16,float16,0,0.40275732676188153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,1,128,0,1,float16,fp8,0,0.4021120071411133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,1,128,0,1,fp8,fp8,0,0.3381386597951253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,2,128,0,1,float16,float16,0,0.40572798252105713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,2,128,0,1,float16,fp8,0,0.40537599722544354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,2,128,0,1,fp8,fp8,0,0.34293333689371747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,4,128,0,1,float16,float16,0,0.41355733076731366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,4,128,0,1,float16,fp8,0,0.41363199551900226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,4,128,0,1,fp8,fp8,0,0.3550186554590861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,8,128,0,1,float16,float16,0,0.4219839970270793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,8,128,0,1,float16,fp8,0,0.420629342397054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,40,8,128,0,1,fp8,fp8,0,0.3640799919764201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,40,128,0,1,float16,float16,0,0.271504004796346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,40,128,0,1,float16,fp8,0,0.26503467559814453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,40,128,0,1,fp8,fp8,0,0.22717867294947305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,1,128,0,1,float16,float16,0,0.20901866753896078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,1,128,0,1,float16,fp8,0,0.20995734135309854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,1,128,0,1,fp8,fp8,0,0.17931199073791504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,2,128,0,1,float16,float16,0,0.21144000689188638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,2,128,0,1,float16,fp8,0,0.21092800299326578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,2,128,0,1,fp8,fp8,0,0.17988799015680948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,8,128,0,1,fp8,fp8,0,0.18897066513697305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,4,128,0,1,float16,float16,0,0.21638933817545572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,4,128,0,1,float16,fp8,0,0.21654399236043295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,4,128,0,1,fp8,fp8,0,0.1834239959716797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,8,128,0,1,float16,float16,0,0.22131200631459555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,40,8,128,0,1,float16,fp8,0,0.21859200795491537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,40,128,0,1,float16,float16,0,0.14537066221237183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,40,128,0,1,float16,fp8,0,0.14246933658917746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,40,128,0,1,fp8,fp8,0,0.12525332967440286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,1,128,0,1,float16,float16,0,0.1149173378944397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,1,128,0,1,float16,fp8,0,0.11532800396283467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,1,128,0,1,fp8,fp8,0,0.10016533732414246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,2,128,0,1,float16,float16,0,0.11558399597803752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,2,128,0,1,float16,fp8,0,0.11638933420181274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,2,128,0,1,fp8,fp8,0,0.100490669409434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,4,128,0,1,float16,float16,0,0.11801600456237793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,4,128,0,1,float16,fp8,0,0.11772800485293071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,4,128,0,1,fp8,fp8,0,0.10417067011197408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,8,128,0,1,float16,float16,0,0.11999467015266418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,8,128,0,1,float16,fp8,0,0.11946666240692139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,40,8,128,0,1,fp8,fp8,0,0.10612799723943074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,1,128,0,1,float16,fp8,0,0.06540800134340923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,40,128,0,1,float16,float16,0,0.08659199873606364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,40,128,0,1,float16,fp8,0,0.08601599931716919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,40,128,0,1,fp8,fp8,0,0.07204799850781758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,1,128,0,1,float16,float16,0,0.06489066779613495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,1,128,0,1,fp8,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,2,128,0,1,float16,float16,0,0.06592533489068349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,2,128,0,1,float16,fp8,0,0.06576000154018402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,2,128,0,1,fp8,fp8,0,0.05749333401521047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,4,128,0,1,float16,float16,0,0.06619200110435486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,4,128,0,1,float16,fp8,0,0.06588799754778545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,4,128,0,1,fp8,fp8,0,0.058431997895240784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,8,128,0,1,float16,float16,0,0.0680320014556249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,8,128,0,1,float16,fp8,0,0.0687306672334671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,1,128,0,1,float16,float16,0,0.03989866624275843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,40,8,128,0,1,fp8,fp8,0,0.06061333417892456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,40,128,0,1,float16,float16,0,0.04738666613896688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,40,128,0,1,float16,fp8,0,0.04598933458328247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,40,128,0,1,fp8,fp8,0,0.04318933188915253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,1,128,0,1,float16,fp8,0,0.040362666050593056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,1,128,0,1,fp8,fp8,0,0.03615466753641764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,2,128,0,1,float16,float16,0,0.040181333820025124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,2,128,0,1,float16,fp8,0,0.040976000328858696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,2,128,0,1,fp8,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,4,128,0,1,float16,float16,0,0.0408693328499794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,4,128,0,1,float16,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,4,128,0,1,fp8,fp8,0,0.03669333209594091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,8,128,0,1,float16,float16,0,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,8,128,0,1,float16,fp8,0,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,40,8,128,0,1,fp8,fp8,0,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,40,128,0,1,float16,float16,0,0.030224000414212544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,40,128,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,2,128,0,1,float16,fp8,0,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,40,128,0,1,fp8,fp8,0,0.028501334289709728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,1,128,0,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,1,128,0,1,float16,fp8,0,0.028143999477227528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,2,128,0,1,float16,float16,0,0.027903998891512554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,1,128,0,1,fp8,fp8,0,0.02693866689999898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,2,128,0,1,fp8,fp8,0,0.026026666164398193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,4,128,0,1,float16,float16,0,0.02777066578467687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,4,128,0,1,float16,fp8,0,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,8,128,0,1,float16,float16,0,0.028581333657105763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,4,128,0,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,8,128,0,1,fp8,fp8,0,0.027647999425729115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,40,8,128,0,1,float16,fp8,0,0.029018667836983997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,40,128,0,1,float16,float16,0,0.02248000105222066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,40,128,0,1,float16,fp8,0,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,40,128,0,1,fp8,fp8,0,0.02182399978240331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,1,128,0,1,float16,float16,0,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,1,128,0,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,1,128,0,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,4,128,0,1,fp8,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,2,128,0,1,float16,float16,0,0.02056533346573512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,2,128,0,1,float16,fp8,0,0.02201066662867864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,2,128,0,1,fp8,fp8,0,0.019941333681344986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,4,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,4,128,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,8,128,0,1,float16,float16,0,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,8,128,0,1,float16,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,40,8,128,0,1,fp8,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,1,128,0,1,float16,float16,0,0.7285919984181722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,1,128,0,1,float16,fp8,0,0.727679967880249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,1,128,0,1,fp8,fp8,0,0.6533279816309611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,2,128,0,1,float16,float16,0,0.7332800229390463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,2,128,0,1,float16,fp8,0,0.7313120365142822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,2,128,0,1,fp8,fp8,0,0.6879306634267172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,4,128,0,1,float16,float16,0,0.7392746607462565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,4,128,0,1,float16,fp8,0,0.738368034362793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,4,128,0,1,fp8,fp8,0,0.7066720326741537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,8,128,0,1,float16,float16,0,0.7757973670959473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,8,128,0,1,float16,fp8,0,0.7783146699269613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,40,8,128,0,1,fp8,fp8,0,0.7178986867268881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,40,128,0,1,float16,float16,0,0.5002079804738363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,40,128,0,1,float16,fp8,0,0.4848533471425374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,40,128,0,1,fp8,fp8,0,0.43589866161346436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,1,128,0,1,float16,float16,0,0.3722879886627197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,1,128,0,1,float16,fp8,0,0.37193600336710614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,1,128,0,1,fp8,fp8,0,0.3378666639328003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,2,128,0,1,float16,float16,0,0.3755146662394206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,2,128,0,1,float16,fp8,0,0.3725706736246745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,2,128,0,1,fp8,fp8,0,0.3394879897435506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,4,128,0,1,float16,float16,0,0.3808799982070923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,4,128,0,1,float16,fp8,0,0.3787519931793213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,4,128,0,1,fp8,fp8,0,0.3481866518656413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,8,128,0,1,float16,float16,0,0.3921813170115153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,8,128,0,1,float16,fp8,0,0.39188798268636066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,40,128,0,1,float16,float16,0,0.26450133323669434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,40,8,128,0,1,fp8,fp8,0,0.365231990814209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,40,128,0,1,float16,fp8,0,0.2568906744321187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,40,128,0,1,fp8,fp8,0,0.22613332668940225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,1,128,0,1,float16,float16,0,0.19269333283106485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,1,128,0,1,float16,fp8,0,0.1930560072263082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,1,128,0,1,fp8,fp8,0,0.17806933323542276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,2,128,0,1,float16,float16,0,0.19674134254455566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,2,128,0,1,float16,fp8,0,0.19527467091878256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,2,128,0,1,fp8,fp8,0,0.18146665891011557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,4,128,0,1,float16,float16,0,0.2002399961153666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,4,128,0,1,float16,fp8,0,0.20198933283487955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,40,128,0,1,float16,fp8,0,0.14139733711878458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,4,128,0,1,fp8,fp8,0,0.1837493379910787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,8,128,0,1,float16,float16,0,0.20578666528066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,8,128,0,1,float16,fp8,0,0.20446399847666422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,40,8,128,0,1,fp8,fp8,0,0.19006933768590292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,40,128,0,1,float16,float16,0,0.1450933317343394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,40,128,0,1,fp8,fp8,0,0.12343466281890869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,1,128,0,1,float16,float16,0,0.10710400342941284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,1,128,0,1,float16,fp8,0,0.10666666428248088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,1,128,0,1,fp8,fp8,0,0.09891200065612793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,2,128,0,1,float16,float16,0,0.1076586643854777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,2,128,0,1,float16,fp8,0,0.10939199725786845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,2,128,0,1,fp8,fp8,0,0.10017599662144978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,4,128,0,1,float16,float16,0,0.1109386682510376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,4,128,0,1,float16,fp8,0,0.11124266187349956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,4,128,0,1,fp8,fp8,0,0.10146133104960124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,8,128,0,1,float16,float16,0,0.11272000273068745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,8,128,0,1,float16,fp8,0,0.11284266908963521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,40,8,128,0,1,fp8,fp8,0,0.10514133175214131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,40,128,0,1,float16,float16,0,0.08342400193214417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,40,128,0,1,float16,fp8,0,0.0839466651280721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,40,128,0,1,fp8,fp8,0,0.07272000114123027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,1,128,0,1,float16,float16,0,0.06108266611893972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,1,128,0,1,float16,fp8,0,0.06235733131567637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,1,128,0,1,fp8,fp8,0,0.055861334005991616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,2,128,0,1,float16,float16,0,0.062021334966023765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,2,128,0,1,float16,fp8,0,0.0618399977684021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,2,128,0,1,fp8,fp8,0,0.056794668237368263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,4,128,0,1,float16,float16,0,0.06313600142796834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,4,128,0,1,float16,fp8,0,0.06267733375231425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,4,128,0,1,fp8,fp8,0,0.057909334699312844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,8,128,0,1,float16,float16,0,0.06464533507823944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,8,128,0,1,float16,fp8,0,0.06482133269309998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,40,8,128,0,1,fp8,fp8,0,0.060032000144322716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,40,128,0,1,float16,float16,0,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,40,128,0,1,float16,fp8,0,0.04435733457406362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,40,128,0,1,fp8,fp8,0,0.042581334710121155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,1,128,0,1,float16,float16,0,0.03855466594298681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,1,128,0,1,float16,fp8,0,0.03822399924198786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,1,128,0,1,fp8,fp8,0,0.035936000446478523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,2,128,0,1,float16,float16,0,0.03821333249409994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,2,128,0,1,float16,fp8,0,0.0386559988061587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,2,128,0,1,fp8,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,4,128,0,1,float16,float16,0,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,4,128,0,1,float16,fp8,0,0.03915733347336451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,4,128,0,1,fp8,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,8,128,0,1,float16,float16,0,0.03917866696914037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,1,128,0,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,8,128,0,1,float16,fp8,0,0.040661332507928215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,40,8,128,0,1,fp8,fp8,0,0.038362666964530945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,40,128,0,1,float16,float16,0,0.029781334102153778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,40,128,0,1,float16,fp8,0,0.030106666187445324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,40,128,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,1,128,0,1,float16,fp8,0,0.028463999430338543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,1,128,0,1,fp8,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,2,128,0,1,float16,float16,0,0.027610667049884796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,2,128,0,1,float16,fp8,0,0.028512001037597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,2,128,0,1,fp8,fp8,0,0.02585600068171819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,4,128,0,1,float16,float16,0,0.027653334041436512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,4,128,0,1,float16,fp8,0,0.028789333999156952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,4,128,0,1,fp8,fp8,0,0.02869333326816559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,8,128,0,1,float16,float16,0,0.028938665986061096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,8,128,0,1,float16,fp8,0,0.028570666909217834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,40,8,128,0,1,fp8,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,40,128,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,40,128,0,1,float16,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,40,128,0,1,fp8,fp8,0,0.020784000555674236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,1,128,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,1,128,0,1,float16,fp8,0,0.020293333878119785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,1,128,0,1,fp8,fp8,0,0.020330666253964107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,4,128,0,1,fp8,fp8,0,0.019904000063737232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,2,128,0,1,float16,float16,0,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,2,128,0,1,float16,fp8,0,0.019941333681344986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,2,128,0,1,fp8,fp8,0,0.020741333564122517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,40,128,0,1,float16,float16,0,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,4,128,0,1,float16,float16,0,0.0199946661790212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,4,128,0,1,float16,fp8,0,0.01977066695690155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,8,128,0,1,float16,float16,0,0.02037866661945979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,8,128,0,1,float16,fp8,0,0.02041600023706754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,40,8,128,0,1,fp8,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,40,128,0,1,float16,fp8,0,0.0176959993938605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,40,128,0,1,fp8,fp8,0,0.018751999984184902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,1,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,1,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,1,128,0,1,fp8,fp8,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,2,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,2,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,2,128,0,1,fp8,fp8,0,0.017770666629076004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,4,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,4,128,0,1,float16,fp8,0,0.01773333301146825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,4,128,0,1,fp8,fp8,0,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,8,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,1,128,0,1,float16,float16,0,0.4365066687266032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,1,128,0,1,float16,fp8,0,0.43619732062021893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,8,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,40,8,128,0,1,fp8,fp8,0,0.018373332917690277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,1,128,0,1,fp8,fp8,0,0.4813493490219116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,2,128,0,1,float16,float16,0,0.441429336865743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,2,128,0,1,float16,fp8,0,0.44042134284973145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,2,128,0,1,fp8,fp8,0,0.48578667640686035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,4,128,0,1,float16,fp8,0,0.44573867321014404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,4,128,0,1,float16,float16,0,0.4467039903004964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,4,128,0,1,fp8,fp8,0,0.4936586618423462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,8,128,0,1,float16,float16,0,0.4586293299992879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,8,128,0,1,float16,fp8,0,0.4538559913635254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,40,8,128,0,1,fp8,fp8,0,0.5052640040715536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,40,128,0,1,float16,float16,0,0.2879146734873454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,40,128,0,1,float16,fp8,0,0.28114134073257446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,40,128,0,1,fp8,fp8,0,0.2937866648038228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,1,128,0,1,float16,float16,0,0.2215893268585205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,1,128,0,1,float16,fp8,0,0.22229333718617758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,1,128,0,1,fp8,fp8,0,0.24901866912841797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,2,128,0,1,float16,float16,0,0.2265119949976603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,2,128,0,1,float16,fp8,0,0.22408533096313477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,2,128,0,1,fp8,fp8,0,0.25017066796620685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,8,128,0,1,float16,float16,0,0.2345973253250122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,4,128,0,1,float16,float16,0,0.2305813431739807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,4,128,0,1,float16,fp8,0,0.22949333985646567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,4,128,0,1,fp8,fp8,0,0.2568959991137187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,8,128,0,1,float16,fp8,0,0.2340959906578064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,40,8,128,0,1,fp8,fp8,0,0.25705599784851074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,40,128,0,1,float16,float16,0,0.15291733543078104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,40,128,0,1,float16,fp8,0,0.1486026644706726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,40,128,0,1,fp8,fp8,0,0.15795733531316122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,1,128,0,1,float16,float16,0,0.11980799833933513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,1,128,0,1,float16,fp8,0,0.1202880044778188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,1,128,0,1,fp8,fp8,0,0.1362666686375936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,2,128,0,1,float16,float16,0,0.11995200316111247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,2,128,0,1,float16,fp8,0,0.12007466952006023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,2,128,0,1,fp8,fp8,0,0.13648000359535217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,4,128,0,1,float16,float16,0,0.12269333004951477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,4,128,0,1,float16,fp8,0,0.12308800220489502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,4,128,0,1,fp8,fp8,0,0.13823466499646506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,40,128,0,1,fp8,fp8,0,0.0885813335577647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,8,128,0,1,float16,float16,0,0.12449066837628682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,8,128,0,1,float16,fp8,0,0.12592533230781555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,40,8,128,0,1,fp8,fp8,0,0.13853866855303446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,40,128,0,1,float16,float16,0,0.08551999926567078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,40,128,0,1,float16,fp8,0,0.0835040012995402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,1,128,0,1,float16,float16,0,0.06665599842866261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,1,128,0,1,float16,fp8,0,0.06739733119805653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,1,128,0,1,fp8,fp8,0,0.07513600091139476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,2,128,0,1,float16,float16,0,0.06782400111357371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,2,128,0,1,float16,fp8,0,0.06790400048096974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,2,128,0,1,fp8,fp8,0,0.07572799921035767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,4,128,0,1,float16,float16,0,0.06751466790835063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,4,128,0,1,float16,fp8,0,0.06832000116507213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,4,128,0,1,fp8,fp8,0,0.07681066791216533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,8,128,0,1,float16,float16,0,0.07025066514809926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,8,128,0,1,float16,fp8,0,0.07007466753323872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,40,8,128,0,1,fp8,fp8,0,0.07878399888674419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,1,128,0,1,fp8,fp8,0,0.04331733286380768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,40,128,0,1,float16,float16,0,0.045461331804593406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,40,128,0,1,float16,fp8,0,0.04417066772778829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,40,128,0,1,fp8,fp8,0,0.05086933573087057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,1,128,0,1,float16,float16,0,0.038506666819254555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,1,128,0,1,float16,fp8,0,0.03885333240032196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,2,128,0,1,float16,float16,0,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,2,128,0,1,float16,fp8,0,0.038618666430314384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,2,128,0,1,fp8,fp8,0,0.04363733530044556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,4,128,0,1,float16,float16,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,4,128,0,1,float16,fp8,0,0.03980266551176707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,4,128,0,1,fp8,fp8,0,0.04510400195916494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,8,128,0,1,float16,float16,0,0.04045866678158442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,8,128,0,1,float16,fp8,0,0.03995733211437861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,40,8,128,0,1,fp8,fp8,0,0.04740799963474274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,40,128,0,1,float16,float16,0,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,40,128,0,1,float16,fp8,0,0.030293333033720653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,40,128,0,1,fp8,fp8,0,0.032986665765444435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,1,128,0,1,float16,float16,0,0.027664000789324444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,1,128,0,1,float16,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,1,128,0,1,fp8,fp8,0,0.030453334252039593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,2,128,0,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,2,128,0,1,float16,fp8,0,0.027808000644048054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,8,128,0,1,float16,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,2,128,0,1,fp8,fp8,0,0.029904000461101532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,4,128,0,1,float16,float16,0,0.02810666710138321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,4,128,0,1,float16,fp8,0,0.028618666032950085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,4,128,0,1,fp8,fp8,0,0.03072533259789149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,8,128,0,1,float16,float16,0,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,40,8,128,0,1,fp8,fp8,0,0.03181866556406021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,40,128,0,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,40,128,0,1,float16,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,40,128,0,1,fp8,fp8,0,0.024735999604066212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,1,128,0,1,float16,float16,0,0.022405333817005157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,4,128,0,1,float16,float16,0,0.021781332790851593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,1,128,0,1,float16,fp8,0,0.022469334304332733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,1,128,0,1,fp8,fp8,0,0.023706667125225067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,2,128,0,1,float16,float16,0,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,2,128,0,1,float16,fp8,0,0.02187199890613556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,2,128,0,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,4,128,0,1,float16,fp8,0,0.022096000611782074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,4,128,0,1,fp8,fp8,0,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,8,128,0,1,float16,float16,0,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,8,128,0,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,40,8,128,0,1,fp8,fp8,0,0.023818666736284893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,40,128,0,1,float16,float16,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,40,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,40,128,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,1,128,0,1,float16,float16,0,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,1,128,0,1,float16,fp8,0,0.016544000556071598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,1,128,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,2,128,0,1,float16,float16,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,2,128,0,1,float16,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,2,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,4,128,0,1,float16,float16,0,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,4,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,4,128,0,1,fp8,fp8,0,0.018170667191346485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,8,128,0,1,float16,float16,0,0.01632533346613248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,8,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,40,8,128,0,1,fp8,fp8,0,0.01783466711640358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,40,128,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,40,128,0,1,float16,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,40,128,0,1,fp8,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,1,128,0,1,float16,float16,0,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,1,128,0,1,float16,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,1,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,2,128,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,2,128,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,2,128,0,1,fp8,fp8,0,0.01637866720557213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,4,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,4,128,0,1,float16,fp8,0,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,4,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,8,128,0,1,float16,float16,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,8,128,0,1,float16,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,40,8,128,0,1,fp8,fp8,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,1,128,0,1,float16,float16,0,0.2986186742782593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,1,128,0,1,float16,fp8,0,0.30002133051554364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,1,128,0,1,fp8,fp8,0,0.38818132877349854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,2,128,0,1,float16,float16,0,0.3036800026893616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,2,128,0,1,float16,fp8,0,0.3042186697324117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,2,128,0,1,fp8,fp8,0,0.38815999031066895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,4,128,0,1,float16,float16,0,0.3097066680590312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,4,128,0,1,float16,fp8,0,0.3088746666908264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,4,128,0,1,fp8,fp8,0,0.3930453459421794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,8,128,0,1,float16,float16,0,0.3139520088831584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,8,128,0,1,float16,fp8,0,0.31350932518641156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,40,128,0,1,float16,float16,0,0.19218132893244425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,40,8,128,0,1,fp8,fp8,0,0.39666132132212323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,40,128,0,1,float16,fp8,0,0.18989866971969604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,2,128,0,1,float16,float16,0,0.15892266233762106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,1,128,0,1,fp8,fp8,0,0.20380266507466635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,40,128,0,1,fp8,fp8,0,0.22456000248591104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,1,128,0,1,float16,float16,0,0.15754666924476624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,1,128,0,1,float16,fp8,0,0.158160001039505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,2,128,0,1,float16,fp8,0,0.15845866998036703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,2,128,0,1,fp8,fp8,0,0.20570133129755655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,8,128,0,1,fp8,fp8,0,0.20860799153645834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,4,128,0,1,float16,float16,0,0.16089066863059998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,4,128,0,1,float16,fp8,0,0.16319466630617777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,4,128,0,1,fp8,fp8,0,0.2065546711285909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,8,128,0,1,float16,float16,0,0.1638879974683126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,40,8,128,0,1,float16,fp8,0,0.16407466928164163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,40,128,0,1,float16,float16,0,0.10505599776903789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,40,128,0,1,float16,fp8,0,0.10416533549626668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,2,128,0,1,float16,fp8,0,0.08674133817354839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,40,128,0,1,fp8,fp8,0,0.12308800220489502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,1,128,0,1,float16,float16,0,0.08581866820653279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,1,128,0,1,float16,fp8,0,0.0869599978129069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,1,128,0,1,fp8,fp8,0,0.10993066430091858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,2,128,0,1,float16,float16,0,0.08646399776140849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,2,128,0,1,fp8,fp8,0,0.11025599638621013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,4,128,0,1,float16,fp8,0,0.08768533666928609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,4,128,0,1,float16,float16,0,0.08756267031033833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,4,128,0,1,fp8,fp8,0,0.11213333408037822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,40,128,0,1,fp8,fp8,0,0.06817600131034851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,8,128,0,1,float16,float16,0,0.08887466788291931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,1,128,0,1,float16,fp8,0,0.04861866434415182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,1,128,0,1,fp8,fp8,0,0.061039999127388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,8,128,0,1,float16,fp8,0,0.08949866890907288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,40,8,128,0,1,fp8,fp8,0,0.11399466792742412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,40,128,0,1,float16,float16,0,0.054698665936787925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,40,128,0,1,float16,fp8,0,0.05443733433882395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,1,128,0,1,float16,float16,0,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,2,128,0,1,float16,float16,0,0.04775999983151754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,2,128,0,1,float16,fp8,0,0.04872000217437744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,8,128,0,1,float16,fp8,0,0.05087466537952423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,2,128,0,1,fp8,fp8,0,0.06100266675154368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,4,128,0,1,float16,float16,0,0.049029335379600525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,4,128,0,1,float16,fp8,0,0.04903466502825419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,4,128,0,1,fp8,fp8,0,0.06251200040181477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,8,128,0,1,float16,float16,0,0.04933333396911621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,40,8,128,0,1,fp8,fp8,0,0.06363733112812042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,40,128,0,1,float16,float16,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,40,128,0,1,float16,fp8,0,0.03374933451414108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,40,128,0,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,1,128,0,1,float16,float16,0,0.030133334298928578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,1,128,0,1,float16,fp8,0,0.030671998858451843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,1,128,0,1,fp8,fp8,0,0.03690666705369949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,2,128,0,1,float16,float16,0,0.0301706666747729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,2,128,0,1,float16,fp8,0,0.030373332401116688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,2,128,0,1,fp8,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,4,128,0,1,float16,float16,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,4,128,0,1,float16,fp8,0,0.03053866575161616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,40,128,0,1,float16,fp8,0,0.02446399877468745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,4,128,0,1,fp8,fp8,0,0.038160001238187156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,8,128,0,1,float16,float16,0,0.031162666777769726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,8,128,0,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,40,8,128,0,1,fp8,fp8,0,0.0390133336186409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,40,128,0,1,float16,float16,0,0.02380266785621643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,40,128,0,1,fp8,fp8,0,0.02985599885384242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,1,128,0,1,float16,float16,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,1,128,0,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,1,128,0,1,fp8,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,2,128,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,2,128,0,1,float16,fp8,0,0.022634667654832203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,2,128,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,4,128,0,1,float16,float16,0,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,4,128,0,1,float16,fp8,0,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,4,128,0,1,fp8,fp8,0,0.02874133239189784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,8,128,0,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,8,128,0,1,float16,fp8,0,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,40,8,128,0,1,fp8,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,40,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,40,128,0,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,40,128,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,2,128,0,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,1,128,0,1,float16,float16,0,0.018698666244745255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,1,128,0,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,1,128,0,1,fp8,fp8,0,0.02170666555563609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,2,128,0,1,float16,float16,0,0.01829333355029424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,2,128,0,1,fp8,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,4,128,0,1,float16,float16,0,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,4,128,0,1,float16,fp8,0,0.01855466639002164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,4,128,0,1,fp8,fp8,0,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,8,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,8,128,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,40,8,128,0,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,40,128,0,1,float16,float16,0,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,40,128,0,1,float16,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,40,128,0,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,1,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,1,128,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,1,128,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,2,128,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,2,128,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,2,128,0,1,fp8,fp8,0,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,4,128,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,4,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,4,128,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,8,128,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,8,128,0,1,float16,fp8,0,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,40,8,128,0,1,fp8,fp8,0,0.017680000513792038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,40,128,0,1,float16,float16,0,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,40,128,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,40,128,0,1,fp8,fp8,0,0.01635733370979627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,1,128,0,1,float16,float16,0,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,1,128,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,1,128,0,1,fp8,fp8,0,0.01628799984852473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,2,128,0,1,float16,float16,0,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,2,128,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,2,128,0,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,4,128,0,1,float16,float16,0,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,4,128,0,1,float16,fp8,0,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,4,128,0,1,fp8,fp8,0,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,8,128,0,1,float16,float16,0,0.014597332725922266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,8,128,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,40,8,128,0,1,fp8,fp8,0,0.01635733370979627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,1,128,0,1,float16,float16,0,0.2456586758295695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,1,128,0,1,float16,fp8,0,0.24549333254496256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,1,128,0,1,fp8,fp8,0,0.343664010365804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,2,128,0,1,float16,float16,0,0.24958932399749756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,2,128,0,1,float16,fp8,0,0.24770132700602213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,4,128,0,1,float16,float16,0,0.25013866027196247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,2,128,0,1,fp8,fp8,0,0.3452266852060954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,4,128,0,1,float16,fp8,0,0.2511253356933594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,4,128,0,1,fp8,fp8,0,0.3455253442128499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,8,128,0,1,float16,float16,0,0.25489600499471027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,1,128,0,1,float16,float16,0,0.13004266222318014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,8,128,0,1,float16,fp8,0,0.2544586658477783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,40,128,0,1,float16,float16,0,0.1523253321647644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,40,8,128,0,1,fp8,fp8,0,0.34835731983184814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,40,128,0,1,float16,fp8,0,0.14798399806022644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,40,128,0,1,fp8,fp8,0,0.19214399655659994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,1,128,0,1,float16,fp8,0,0.12970667084058127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,1,128,0,1,fp8,fp8,0,0.17874133586883545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,2,128,0,1,float16,float16,0,0.12949867049853006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,2,128,0,1,float16,fp8,0,0.1295093297958374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,2,128,0,1,fp8,fp8,0,0.17951999107996622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,4,128,0,1,float16,float16,0,0.1305333375930786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,4,128,0,1,float16,fp8,0,0.12989866733551025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,4,128,0,1,fp8,fp8,0,0.18202666441599527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,8,128,0,1,float16,float16,0,0.1323093374570211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,8,128,0,1,float16,fp8,0,0.13249599933624268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,40,8,128,0,1,fp8,fp8,0,0.18347734212875366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,1,128,0,1,fp8,fp8,0,0.09676266709963481
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,40,128,0,1,float16,float16,0,0.07818666597207387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,40,128,0,1,float16,fp8,0,0.07587199906508128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,40,128,0,1,fp8,fp8,0,0.10327466328938802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,1,128,0,1,float16,float16,0,0.07015466690063477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,1,128,0,1,float16,fp8,0,0.07126399874687195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,2,128,0,1,float16,float16,0,0.07067733506361644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,2,128,0,1,float16,fp8,0,0.07065066695213318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,2,128,0,1,fp8,fp8,0,0.09691733121871948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,4,128,0,1,float16,float16,0,0.07136533161004384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,4,128,0,1,float16,fp8,0,0.07107200225194295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,4,128,0,1,fp8,fp8,0,0.09815999865531921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,8,128,0,1,float16,float16,0,0.07116266588370006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,8,128,0,1,float16,fp8,0,0.07230400045712788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,40,8,128,0,1,fp8,fp8,0,0.09987733761469524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,40,128,0,1,float16,float16,0,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,40,128,0,1,float16,fp8,0,0.04389866689840952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,40,128,0,1,fp8,fp8,0,0.058335999647776283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,1,128,0,1,float16,float16,0,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,1,128,0,1,float16,fp8,0,0.04268800218900045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,1,128,0,1,fp8,fp8,0,0.055258666475613914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,2,128,0,1,float16,float16,0,0.04190400242805481
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,2,128,0,1,float16,fp8,0,0.04152533411979675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,2,128,0,1,fp8,fp8,0,0.054666668176651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,4,128,0,1,float16,float16,0,0.04206933577855428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,4,128,0,1,float16,fp8,0,0.04222933451334635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,4,128,0,1,fp8,fp8,0,0.05620799958705902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,8,128,0,1,float16,float16,0,0.04234666625658671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,8,128,0,1,float16,fp8,0,0.043237333496411644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,40,8,128,0,1,fp8,fp8,0,0.05657599866390228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,40,128,0,1,float16,float16,0,0.02881066749493281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,40,128,0,1,float16,fp8,0,0.02887466549873352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,40,128,0,1,fp8,fp8,0,0.03629866739114126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,1,128,0,1,float16,float16,0,0.027973333994547527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,1,128,0,1,float16,fp8,0,0.027845333019892376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,1,128,0,1,fp8,fp8,0,0.03427733232577642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,2,128,0,1,float16,float16,0,0.027786667148272198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,2,128,0,1,float16,fp8,0,0.02812266598145167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,2,128,0,1,fp8,fp8,0,0.03514666606982549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,4,128,0,1,float16,float16,0,0.027717334528764088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,8,128,0,1,fp8,fp8,0,0.034917332231998444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,4,128,0,1,float16,fp8,0,0.02805333336194356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,4,128,0,1,fp8,fp8,0,0.03550933301448822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,8,128,0,1,float16,float16,0,0.027813332776228588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,40,8,128,0,1,float16,fp8,0,0.028421332438786823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,40,128,0,1,float16,float16,0,0.02195200075705846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,40,128,0,1,float16,fp8,0,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,40,128,0,1,fp8,fp8,0,0.026170666019121807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,1,128,0,1,float16,float16,0,0.020586666961510975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,1,128,0,1,float16,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,1,128,0,1,fp8,fp8,0,0.025583999852339428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,2,128,0,1,float16,float16,0,0.02049066623051961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,2,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,2,128,0,1,fp8,fp8,0,0.02649066597223282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,4,128,0,1,float16,float16,0,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,4,128,0,1,float16,fp8,0,0.02080533280968666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,4,128,0,1,fp8,fp8,0,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,8,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,8,128,0,1,float16,fp8,0,0.020549333343903225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,40,8,128,0,1,fp8,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,40,128,0,1,float16,float16,0,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,40,128,0,1,float16,fp8,0,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,40,128,0,1,fp8,fp8,0,0.02236266682545344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,1,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,4,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,1,128,0,1,float16,fp8,0,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,1,128,0,1,fp8,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,2,128,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,2,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,8,128,0,1,fp8,fp8,0,0.0216799999276797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,2,128,0,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,4,128,0,1,float16,fp8,0,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,4,128,0,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,8,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,40,8,128,0,1,float16,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,40,128,0,1,float16,float16,0,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,40,128,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,40,128,0,1,fp8,fp8,0,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,1,128,0,1,float16,float16,0,0.013989333063364029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,1,128,0,1,float16,fp8,0,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,1,128,0,1,fp8,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,2,128,0,1,float16,float16,0,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,2,128,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,2,128,0,1,fp8,fp8,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,4,128,0,1,float16,float16,0,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,4,128,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,4,128,0,1,fp8,fp8,0,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,8,128,0,1,float16,float16,0,0.014597332725922266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,8,128,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,40,8,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,40,128,0,1,float16,float16,0,0.014309333016475042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,40,128,0,1,float16,fp8,0,0.014570667097965876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,40,128,0,1,fp8,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,1,128,0,1,float16,float16,0,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,1,128,0,1,float16,fp8,0,0.014186666657527288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,1,128,0,1,fp8,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,2,128,0,1,float16,float16,0,0.013642666240533194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,2,128,0,1,float16,fp8,0,0.014592000593741735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,8,128,0,1,fp8,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,2,128,0,1,fp8,fp8,0,0.016330666840076447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,4,128,0,1,float16,float16,0,0.014261333892742792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,4,128,0,1,float16,fp8,0,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,4,128,0,1,fp8,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,8,128,0,1,float16,float16,0,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,40,8,128,0,1,float16,fp8,0,0.01413333291808764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,128,0,1,fp8,fp8,0,22.308827718098957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,128,0,1,fp8,fp8,0,22.193323771158855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,float16,0,34.740587870279946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,fp8,0,35.4947255452474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,float16,0,35.65420277913412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,fp8,0,35.9355723063151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,float16,0,35.01995086669922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,fp8,0,35.35985565185547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,4,128,0,1,fp8,fp8,0,22.55592091878255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,128,0,1,fp8,fp8,0,11.527039845784506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,float16,0,17.269893646240234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,float16,0,16.77941385904948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,fp8,0,17.381434122721355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,128,0,1,fp8,fp8,0,22.263450622558594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,float16,0,33.96464538574219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,128,0,1,fp8,fp8,0,11.265268961588541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,fp8,0,16.725418090820312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,128,0,1,fp8,fp8,0,11.257039388020834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,fp8,0,34.36669921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,float16,0,17.944037119547527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,fp8,0,18.137962341308594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,float16,0,17.494815826416016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,fp8,0,17.203712463378906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,4,128,0,1,fp8,fp8,0,11.563034057617188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,128,0,1,fp8,fp8,0,11.245109558105469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,float16,0,8.719888051350912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,fp8,0,8.90004793802897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,32,128,0,1,fp8,fp8,0,5.852816263834636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,float16,0,17.25751495361328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,float16,0,8.411861419677734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,fp8,0,16.954928080240887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,fp8,0,8.633471806844076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,1,128,0,1,fp8,fp8,0,5.622826894124349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,128,0,1,fp8,fp8,0,5.678549448649089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,float16,0,8.549818674723307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,fp8,0,8.86086400349935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,float16,0,8.554357528686523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,128,0,1,fp8,fp8,0,6.117919921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,fp8,0,8.758111953735352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,128,0,1,fp8,fp8,0,5.711498896280925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,float16,0,4.428048133850098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,float16,0,8.628549575805664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,fp8,0,8.428880055745443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,fp8,0,4.49400520324707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,32,128,0,1,fp8,fp8,0,3.008005460103353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,float16,0,4.162490526835124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,128,0,1,fp8,fp8,0,2.8782666524251304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,128,0,1,fp8,fp8,0,2.895930608113607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,fp8,0,4.339402516682942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,float16,0,4.307157198588054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,fp8,0,4.291824022928874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,float16,0,4.469818751017253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,fp8,0,4.392378807067871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,4,128,0,1,fp8,fp8,0,2.9062347412109375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,float16,0,4.324165344238281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,128,0,1,fp8,fp8,0,2.863429387410482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,fp8,0,4.382330576578776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,128,0,1,fp8,fp8,0,13.479872385660807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,128,0,1,fp8,fp8,0,13.31884765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,float16,0,19.621344248453777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,fp8,0,19.74901835123698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,float16,0,19.84343973795573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,fp8,0,19.444442749023438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,float16,0,19.066543579101562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,fp8,0,19.46329625447591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,4,128,0,1,fp8,fp8,0,13.311749776204428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,128,0,1,fp8,fp8,0,6.858570734659831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,float16,0,10.19659169514974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,fp8,0,10.49134381612142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,float16,0,9.836725234985352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,128,0,1,fp8,fp8,0,13.542784372965494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,float16,0,19.64626185099284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,128,0,1,fp8,fp8,0,6.8656158447265625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,fp8,0,10.053178787231445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,fp8,0,19.50332768758138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,128,0,1,fp8,fp8,0,6.46723747253418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,float16,0,9.719743728637695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,fp8,0,9.834954579671225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,float16,0,9.823674519856771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,128,0,1,fp8,fp8,0,6.548837025960286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,fp8,0,9.733029047648111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,float16,0,5.056341489156087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,128,0,1,fp8,fp8,0,6.514149347941081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,fp8,0,5.0397599538167315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,32,128,0,1,fp8,fp8,0,3.4917494455973306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,float16,0,10.218010584513346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,fp8,0,9.971477508544922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,float16,0,4.980538686116536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,128,0,1,fp8,fp8,0,3.301088015238444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,fp8,0,4.8791093826293945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,128,0,1,fp8,fp8,0,3.200357437133789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,float16,0,5.115749359130859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,fp8,0,4.980234781901042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,128,0,1,fp8,fp8,0,3.358501434326172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,float16,0,5.06935469309489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,fp8,0,4.992186546325684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,float16,0,2.5282559394836426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,128,0,1,fp8,fp8,0,3.4236319859822593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,float16,0,4.918821334838867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,128,0,1,fp8,fp8,0,1.7361067136128743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,fp8,0,2.5265599886576333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,fp8,0,4.9599307378133135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,float16,0,2.587360064188639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,fp8,0,2.4355039596557617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,1,128,0,1,fp8,fp8,0,1.7379733721415203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,128,0,1,fp8,fp8,0,1.7130452791849773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,float16,0,2.531973361968994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,fp8,0,2.5191733042399087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,128,0,1,fp8,fp8,0,1.7217547098795574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,float16,0,2.5519839922587075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,fp8,0,2.405296007792155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,float16,0,2.4713919957478843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,fp8,0,2.465541362762451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,32,8,128,0,1,fp8,fp8,0,1.7005492846171062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,128,0,1,fp8,fp8,0,9.272250493367514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,128,0,1,fp8,fp8,0,9.222869237263998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,float16,0,13.76259740193685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,fp8,0,13.702938079833984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,float16,0,13.639957427978516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,fp8,0,13.66928482055664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,float16,0,13.62398910522461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,128,0,1,fp8,fp8,0,9.29647445678711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,128,0,1,fp8,fp8,0,4.831637382507324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,float16,0,7.05894406636556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,fp8,0,13.83145014444987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,fp8,0,7.085440317789714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,128,0,1,fp8,fp8,0,9.51473617553711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,float16,0,13.570351918538412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,float16,0,7.3806718190511065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,fp8,0,14.497056325276693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,128,0,1,fp8,fp8,0,4.610906600952148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,128,0,1,fp8,fp8,0,4.785909334818522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,fp8,0,6.810288111368815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,float16,0,7.042117436726888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,fp8,0,6.778858820597331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,float16,0,6.991098403930664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,128,0,1,fp8,fp8,0,4.958730697631836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,float16,0,3.6914666493733725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,fp8,0,3.8277387619018555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,128,0,1,fp8,fp8,0,4.7993119557698565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,fp8,0,7.231269200642903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,float16,0,7.310314814249675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,32,128,0,1,fp8,fp8,0,2.5372907320658364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,fp8,0,6.954720179239909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,float16,0,3.5164000193277993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,128,0,1,fp8,fp8,0,2.4133119583129883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,fp8,0,3.464789390563965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,128,0,1,fp8,fp8,0,2.317471981048584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,float16,0,3.4983625411987305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,fp8,0,3.3257548014322915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,128,0,1,fp8,fp8,0,2.3718719482421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,float16,0,3.386490821838379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,fp8,0,3.4280265172322593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,float16,0,3.4794079462687173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,float16,0,1.8031466801961262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,128,0,1,fp8,fp8,0,2.401146729787191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,fp8,0,1.816208044687907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,32,128,0,1,fp8,fp8,0,1.2824959754943848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,fp8,0,3.6724799474080405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,float16,0,1.7259146372477214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,fp8,0,1.7577759424845378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,1,128,0,1,fp8,fp8,0,1.2659306526184082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,float16,0,1.709338665008545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,128,0,1,fp8,fp8,0,1.2763840357462566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,fp8,0,1.7061120669047039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,float16,0,1.7424906094868977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,128,0,1,fp8,fp8,0,1.262437343597412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,fp8,0,1.7524587313334148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,float16,0,1.729658603668213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,fp8,0,1.71396271387736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,32,8,128,0,1,fp8,fp8,0,1.245850642522176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,128,0,1,fp8,fp8,0,12.2806396484375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,128,0,1,fp8,fp8,0,12.16692860921224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,float16,0,17.94324239095052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,fp8,0,17.754107157389324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,float16,0,17.73953628540039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,fp8,0,18.04362614949544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,float16,0,17.718096415201824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,fp8,0,17.82077916463216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,4,128,0,1,fp8,fp8,0,12.564112345377604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,128,0,1,fp8,fp8,0,6.480581283569336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,float16,0,9.368997573852539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,fp8,0,9.454458872477213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,float16,0,8.986394882202148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,128,0,1,fp8,fp8,0,12.927909851074219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,float16,0,19.316404978434246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,128,0,1,fp8,fp8,0,6.643765131632487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,fp8,0,9.776725133260092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,128,0,1,fp8,fp8,0,6.510490417480469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,fp8,0,20.092559814453125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,float16,0,9.17307217915853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,fp8,0,9.382528305053711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,128,0,1,fp8,fp8,0,6.222741444905599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,float16,0,9.200106938680014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,fp8,0,9.102469126383463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,float16,0,4.672762552897136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,128,0,1,fp8,fp8,0,3.2124268213907876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,128,0,1,fp8,fp8,0,6.237424214680989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,fp8,0,4.769258817036946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,float16,0,9.20845858256022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,fp8,0,9.067365646362305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,128,0,1,fp8,fp8,0,3.0344212849934897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,float16,0,4.465013186136882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,fp8,0,4.542304039001465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,128,0,1,fp8,fp8,0,3.0847625732421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,float16,0,4.463818550109863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,fp8,0,4.59662405649821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,float16,0,4.5845333735148115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,128,0,1,fp8,fp8,0,3.0471200942993164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,fp8,0,4.442271868387858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,float16,0,2.3498826026916504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,128,0,1,fp8,fp8,0,3.160597483317057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,float16,0,4.517375946044922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,fp8,0,4.610618591308594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,fp8,0,2.3695093790690103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,32,128,0,1,fp8,fp8,0,1.7208800315856934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,128,0,1,fp8,fp8,0,1.586309274037679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,float16,0,2.2084426879882812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,fp8,0,2.270437399546305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,float16,0,2.1803359985351562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,128,0,1,fp8,fp8,0,1.5443679491678874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,fp8,0,2.2222240765889487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,float16,0,2.153600056966146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,128,0,1,fp8,fp8,0,1.5625492731730144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,fp8,0,2.171738624572754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,float16,0,1.2128053506215413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,128,0,1,fp8,fp8,0,1.5719200770060222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,float16,0,2.2504639625549316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,fp8,0,1.2065119743347168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,fp8,0,2.237738609313965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,32,128,0,1,fp8,fp8,0,0.8847200075785319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,float16,0,1.1667893727620442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,fp8,0,1.1613600254058838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,1,128,0,1,fp8,fp8,0,0.8548586368560791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,float16,0,1.1693440278371174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,fp8,0,1.166042645772298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,2,128,0,1,fp8,fp8,0,0.8563626607259115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,float16,0,1.1574560006459553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,fp8,0,1.1683413187662761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,4,128,0,1,fp8,fp8,0,0.8549706935882568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,128,0,1,fp8,fp8,0,0.8605066935221354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,float16,0,1.1660373210906982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,fp8,0,1.155893325805664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,128,0,1,fp8,fp8,0,7.228218714396159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,128,0,1,fp8,fp8,0,7.2935943603515625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,float16,0,10.50052261352539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,fp8,0,10.475557327270508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,float16,0,10.624042510986328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,fp8,0,10.504714965820312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,float16,0,10.643877029418945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,fp8,0,10.432661056518555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,4,128,0,1,fp8,fp8,0,7.286736170450847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,128,0,1,fp8,fp8,0,3.927146593729655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,float16,0,5.563733418782552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,fp8,0,5.511349360148112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,float16,0,5.206144014994304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,128,0,1,fp8,fp8,0,7.421834945678711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,float16,0,10.505184173583984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,fp8,0,10.637882868448893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,128,0,1,fp8,fp8,0,3.694016138712565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,fp8,0,5.183546702067058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,128,0,1,fp8,fp8,0,3.6653226216634116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,float16,0,5.395615895589192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,fp8,0,5.233039855957031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,float16,0,5.28441588083903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,128,0,1,fp8,fp8,0,3.6749706268310547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,fp8,0,5.230266571044922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,float16,0,2.766074816385905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,128,0,1,fp8,fp8,0,3.704832077026367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,fp8,0,2.729311943054199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,float16,0,5.332549413045247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,fp8,0,5.23582394917806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,32,128,0,1,fp8,fp8,0,1.9331893920898438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,float16,0,2.582021395365397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,fp8,0,2.4982080459594727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,1,128,0,1,fp8,fp8,0,1.8395519256591797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,float16,0,2.6109546025594077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,128,0,1,fp8,fp8,0,1.8235467274983723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,fp8,0,2.582101345062256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,128,0,1,fp8,fp8,0,1.8818507194519043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,float16,0,2.5827733675638833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,fp8,0,2.5848053296407065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,float16,0,2.636352062225342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,float16,0,1.3354934056599934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,fp8,0,2.8151254653930664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,32,8,128,0,1,fp8,fp8,0,1.8507893880208333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,fp8,0,1.3766773541768391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,32,128,0,1,fp8,fp8,0,1.0295999844868977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,float16,0,1.3177440166473389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,128,0,1,fp8,fp8,0,0.9756960074106852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,fp8,0,1.2947039604187012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,float16,0,1.3091519673665364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,128,0,1,fp8,fp8,0,0.9547413190205892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,fp8,0,1.3057226339975994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,float16,0,1.294373353322347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,fp8,0,1.301968018213908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,4,128,0,1,fp8,fp8,0,0.9947306315104166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,128,0,1,fp8,fp8,0,0.9575253327687582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,float16,0,1.2890880107879639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,float16,0,0.7252000172932943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,fp8,0,1.3088586330413818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,fp8,0,0.7372586727142334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,32,128,0,1,fp8,fp8,0,0.5345120032628378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,float16,0,0.7161866823832194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,fp8,0,0.7167466481526693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,1,128,0,1,fp8,fp8,0,0.502133329709371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,float16,0,0.7227253119150797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,fp8,0,0.7150506973266602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,2,128,0,1,fp8,fp8,0,0.5120746692021688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,float16,0,0.7160267035166422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,fp8,0,0.7179520130157471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,4,128,0,1,fp8,fp8,0,0.5072266658147176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,float16,0,0.7169226805369059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,fp8,0,0.7191999753316244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,32,8,128,0,1,fp8,fp8,0,0.5152053435643514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,128,0,1,fp8,fp8,0,7.15171750386556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,128,0,1,fp8,fp8,0,7.1996103922526045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,float16,0,9.993301391601562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,fp8,0,10.19166374206543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,float16,0,10.082511901855469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,fp8,0,10.033119837443033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,float16,0,10.007909138997396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,128,0,1,fp8,fp8,0,3.86080010732015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,128,0,1,fp8,fp8,0,7.239834467569987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,float16,0,5.311797459920247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,fp8,0,5.264538764953613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,fp8,0,10.144543965657553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,128,0,1,fp8,fp8,0,7.450223922729492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,float16,0,10.075957616170248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,fp8,0,10.3023681640625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,128,0,1,fp8,fp8,0,3.510437329610189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,float16,0,4.991392135620117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,fp8,0,5.049413363138835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,128,0,1,fp8,fp8,0,3.614687919616699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,float16,0,5.010608037312825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,fp8,0,5.048373222351074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,float16,0,5.026874542236328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,fp8,0,5.141839981079102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,4,128,0,1,fp8,fp8,0,3.571125348409017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,float16,0,2.534623940785726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,fp8,0,2.6258293787638345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,128,0,1,fp8,fp8,0,3.687199910481771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,32,128,0,1,fp8,fp8,0,1.9118506113688152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,float16,0,5.082544008890788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,fp8,0,5.1779359181722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,float16,0,2.429663976033529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,128,0,1,fp8,fp8,0,1.7565439542134602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,fp8,0,2.419413407643636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,128,0,1,fp8,fp8,0,1.7598506609598796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,float16,0,2.432448069254557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,fp8,0,2.4706293741861978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,float16,0,2.394053300221761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,fp8,0,2.506122589111328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,4,128,0,1,fp8,fp8,0,1.9020586013793945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,float16,0,1.2813173135121663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,128,0,1,fp8,fp8,0,1.85153595606486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,128,0,1,fp8,fp8,0,1.019173304239909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,float16,0,2.476384003957113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,fp8,0,1.3221226533253987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,fp8,0,2.4242399533589682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,float16,0,1.228000005086263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,128,0,1,fp8,fp8,0,0.9528533617655436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,fp8,0,1.229845364888509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,float16,0,1.2092586358388264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,fp8,0,1.2137119770050049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,2,128,0,1,fp8,fp8,0,0.9306346575419108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,float16,0,1.2188053131103516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,128,0,1,fp8,fp8,0,0.9590240319569906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,fp8,0,1.2249173323313396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,float16,0,1.2225066820780437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,fp8,0,1.2331146399180095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,32,8,128,0,1,fp8,fp8,0,0.932090679804484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,float16,0,0.6794026692708334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,128,0,1,fp8,fp8,0,0.5362079938252767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,fp8,0,0.6847306887308756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,float16,0,0.6495680014292399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,fp8,0,0.6551733414332072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,128,0,1,fp8,fp8,0,0.5049440066019694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,fp8,0,0.6516533295313517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,float16,0,0.6565013329188029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,2,128,0,1,fp8,fp8,0,0.5036906798680624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,float16,0,0.6536586682001749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,fp8,0,0.6558346748352051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,4,128,0,1,fp8,fp8,0,0.502128005027771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,float16,0,0.6548106670379639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,fp8,0,0.6594239870707194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,32,8,128,0,1,fp8,fp8,0,0.5105653206507365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,float16,0,0.3852906624476115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,fp8,0,0.39192001024882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,32,128,0,1,fp8,fp8,0,0.2913386623064677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,float16,0,0.3774293263753255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,fp8,0,0.3787093162536621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,1,128,0,1,fp8,fp8,0,0.2725653251012166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,float16,0,0.3782613277435303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,128,0,1,fp8,fp8,0,0.2736746668815613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,fp8,0,0.37836265563964844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,float16,0,0.3811839818954468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,fp8,0,0.3803093433380127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,4,128,0,1,fp8,fp8,0,0.2762239972750346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,float16,0,0.3803360064824422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,fp8,0,0.3796159823735555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,32,8,128,0,1,fp8,fp8,0,0.27938665946324664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,128,0,1,fp8,fp8,0,4.396181424458821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,128,0,1,fp8,fp8,0,4.473391850789388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,float16,0,6.136778513590495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,fp8,0,6.061546961466472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,float16,0,6.149487813313802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,fp8,0,6.074703852335612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,float16,0,6.112815856933594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,fp8,0,6.167866388956706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,4,128,0,1,fp8,fp8,0,4.378554662068685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,float16,0,3.2377761205037436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,fp8,0,3.2205654780069985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,32,128,0,1,fp8,fp8,0,2.422394593556722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,128,0,1,fp8,fp8,0,4.507594744364421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,float16,0,6.213706970214844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,fp8,0,6.339066823323567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,float16,0,2.9710238774617515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,128,0,1,fp8,fp8,0,2.178448041280111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,fp8,0,3.015194574991862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,float16,0,2.968165397644043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,fp8,0,2.943471908569336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,2,128,0,1,fp8,fp8,0,2.2048586209615073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,float16,0,3.021087964375814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,fp8,0,2.9520371754964194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,4,128,0,1,fp8,fp8,0,2.2109227180480957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,float16,0,3.0912532806396484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,float16,0,1.5402132670084636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,fp8,0,1.543941338857015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,32,128,0,1,fp8,fp8,0,1.2595946788787842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,128,0,1,fp8,fp8,0,2.244053363800049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,fp8,0,3.05077330271403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,float16,0,1.4401334126790364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,fp8,0,1.4312586784362793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,1,128,0,1,fp8,fp8,0,1.1295359929402669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,float16,0,1.4492640495300293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,128,0,1,fp8,fp8,0,1.1087466875712078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,fp8,0,1.4244747161865234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,float16,0,1.4313599268595378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,128,0,1,fp8,fp8,0,1.1125919818878174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,fp8,0,1.4390986760457356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,float16,0,0.7936960061391195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,float16,0,1.441109339396159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,128,0,1,fp8,fp8,0,1.13809068997701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,fp8,0,1.4891146024068196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,fp8,0,0.7953759829203287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,32,128,0,1,fp8,fp8,0,0.6469066540400187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,float16,0,0.7511040369669596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,fp8,0,0.767685333887736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,1,128,0,1,fp8,fp8,0,0.5855840047200521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,float16,0,0.7594079971313477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,fp8,0,0.7533013025919596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,fp8,0,0.7598240375518799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,2,128,0,1,fp8,fp8,0,0.5872213443120321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,float16,0,0.7462133566538492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,4,128,0,1,fp8,fp8,0,0.5928959846496582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,float16,0,0.7565813064575195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,fp8,0,0.763802687327067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,32,8,128,0,1,fp8,fp8,0,0.599946657816569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,float16,0,0.4286880095799764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,fp8,0,0.4395413398742676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,32,128,0,1,fp8,fp8,0,0.34509865442911786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,float16,0,0.41325334707895917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,fp8,0,0.4172373215357463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,1,128,0,1,fp8,fp8,0,0.31169599294662476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,float16,0,0.4156053463617961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,fp8,0,0.41730133692423504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,2,128,0,1,fp8,fp8,0,0.31152000029881793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,float16,0,0.4158773422241211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,fp8,0,0.4172533353169759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,4,128,0,1,fp8,fp8,0,0.3116319974263509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,float16,0,0.4161866505940755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,fp8,0,0.42132266362508136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,32,8,128,0,1,fp8,fp8,0,0.3156426747639974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,float16,0,0.2295786738395691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,fp8,0,0.23440533876419067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,32,128,0,1,fp8,fp8,0,0.19495999813079834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,float16,0,0.21969066063563028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,fp8,0,0.21978133916854858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,1,128,0,1,fp8,fp8,0,0.18037867546081543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,float16,0,0.22158400217692056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,fp8,0,0.22007467349370322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,2,128,0,1,fp8,fp8,0,0.18190399805704752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,float16,0,0.2196106712023417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,fp8,0,0.22064000368118286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,4,128,0,1,fp8,fp8,0,0.18010665973027548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,float16,0,0.219760000705719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,fp8,0,0.22230400641759238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,32,8,128,0,1,fp8,fp8,0,0.1821813384691874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,128,0,1,fp8,fp8,0,4.677994728088379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,float16,0,6.230277379353841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,128,0,1,fp8,fp8,0,4.660922686258952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,fp8,0,6.057701110839844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,float16,0,6.150506973266602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,fp8,0,6.15506108601888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,float16,0,6.18507194519043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,fp8,0,6.168917338053386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,4,128,0,1,fp8,fp8,0,4.784799893697103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,128,0,1,fp8,fp8,0,2.6322666803995767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,float16,0,3.310474713643392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,fp8,0,3.349232037862142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,128,0,1,fp8,fp8,0,4.811248143513997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,float16,0,2.8699518839518228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,float16,0,6.304138819376628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,fp8,0,6.211631774902344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,128,0,1,fp8,fp8,0,2.3422773679097495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,fp8,0,3.0033439000447593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,128,0,1,fp8,fp8,0,2.3275359471639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,float16,0,3.00382391611735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,fp8,0,2.9814399083455405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,float16,0,3.041520118713379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,128,0,1,fp8,fp8,0,2.3426292737325034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,fp8,0,2.9421173731486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,float16,0,3.100341478983561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,float16,0,1.5850826899210613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,fp8,0,3.1259946823120117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,128,0,1,fp8,fp8,0,1.3285120328267415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,32,8,128,0,1,fp8,fp8,0,2.386336008707682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,fp8,0,1.605573336283366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,float16,0,1.47051207224528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,128,0,1,fp8,fp8,0,1.19050137201945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,fp8,0,1.4507145881652832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,float16,0,1.4842880566914876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,fp8,0,1.4406293233235676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,2,128,0,1,fp8,fp8,0,1.175493319829305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,float16,0,1.4370026588439941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,128,0,1,fp8,fp8,0,1.1877333323160808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,fp8,0,1.4701120058695476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,float16,0,1.455125331878662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,128,0,1,fp8,fp8,0,1.2073012987772624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,float16,0,0.8055093288421631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,fp8,0,1.482133388519287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,fp8,0,0.8199840386708578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,32,128,0,1,fp8,fp8,0,0.6870880126953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,float16,0,0.7500480016072592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,fp8,0,0.7487040360768636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,1,128,0,1,fp8,fp8,0,0.6090506712595621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,float16,0,0.7481066385904948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,fp8,0,0.7468533515930176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,2,128,0,1,fp8,fp8,0,0.6125653187433878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,float16,0,0.746298631032308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,fp8,0,0.7529280185699463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,float16,0,0.7469866275787354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,4,128,0,1,fp8,fp8,0,0.6236480077107748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,fp8,0,0.7616159915924072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,float16,0,0.42553067207336426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,32,8,128,0,1,fp8,fp8,0,0.6260639826456705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,fp8,0,0.43372801939646405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,32,128,0,1,fp8,fp8,0,0.3696639935175578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,float16,0,0.39828264713287354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,fp8,0,0.40679999192555744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,1,128,0,1,fp8,fp8,0,0.3288266658782959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,float16,0,0.4021279811859131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,float16,0,0.401311993598938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,fp8,0,0.4036266803741455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,2,128,0,1,fp8,fp8,0,0.33267199993133545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,fp8,0,0.4048159917195638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,4,128,0,1,fp8,fp8,0,0.33876800537109375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,float16,0,0.40321600437164307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,fp8,0,0.41152532895406085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,float16,0,0.2398293415705363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,32,8,128,0,1,fp8,fp8,0,0.34061865011850995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,fp8,0,0.2457866668701172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,32,128,0,1,fp8,fp8,0,0.1971199909845988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,float16,0,0.22610666354497275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,fp8,0,0.2270560065905253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,1,128,0,1,fp8,fp8,0,0.17732266585032144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,float16,0,0.2293813427289327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,fp8,0,0.22915732860565186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,2,128,0,1,fp8,fp8,0,0.1775253415107727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,float16,0,0.22919466098149618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,fp8,0,0.22937599817911783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,4,128,0,1,fp8,fp8,0,0.1783519983291626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,float16,0,0.23064533869425455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,fp8,0,0.23265065749486288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,fp8,0,0.12326400478680928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,32,8,128,0,1,fp8,fp8,0,0.1804800033569336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,float16,0,0.12965866923332214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,fp8,0,0.1332373321056366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,32,128,0,1,fp8,fp8,0,0.1164479951063792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,float16,0,0.1232319970925649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,float16,0,0.12336533268292744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,1,128,0,1,fp8,fp8,0,0.1051626702149709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,fp8,0,0.12427199880282085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,float16,0,0.124208003282547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,2,128,0,1,fp8,fp8,0,0.10543466607729594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,fp8,0,0.12377066413561504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,4,128,0,1,fp8,fp8,0,0.10712533195813496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,float16,0,0.12411200006802876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,fp8,0,0.12571733196576437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,32,8,128,0,1,fp8,fp8,0,0.11028266946474712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,float16,0,3.7207838694254556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,128,0,1,fp8,fp8,0,3.0802186330159507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,fp8,0,3.7816267013549805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,128,0,1,fp8,fp8,0,3.0941012700398765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,float16,0,3.781034787495931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,fp8,0,3.8224852879842124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,float16,0,3.883472124735514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,fp8,0,3.8733174006144204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,4,128,0,1,fp8,fp8,0,3.1344480514526367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,128,0,1,fp8,fp8,0,1.7866454124450684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,float16,0,2.0629067420959473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,float16,0,1.8230400085449219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,fp8,0,2.0503573417663574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,128,0,1,fp8,fp8,0,3.1641918818155923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,float16,0,3.8889547983805337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,fp8,0,3.910170555114746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,fp8,0,1.8266773223876953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,1,128,0,1,fp8,fp8,0,1.5539679527282715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,float16,0,1.8192480405171711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,fp8,0,1.8231786092122395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,2,128,0,1,fp8,fp8,0,1.5605440139770508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,float16,0,1.836191972096761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,fp8,0,1.8580106099446614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,4,128,0,1,fp8,fp8,0,1.576837380727132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,float16,0,1.8900532722473145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,float16,0,1.0277173519134521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,128,0,1,fp8,fp8,0,1.6181492805480957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,fp8,0,1.8791626294453938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,fp8,0,1.0366986592610676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,32,128,0,1,fp8,fp8,0,0.9099626541137695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,float16,0,0.9193653265635172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,fp8,0,0.9241600036621094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,1,128,0,1,fp8,fp8,0,0.7910772959391276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,float16,0,0.9168639977773031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,fp8,0,0.9382773240407308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,2,128,0,1,fp8,fp8,0,0.796890656153361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,float16,0,0.924079974492391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,fp8,0,0.933685302734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,4,128,0,1,fp8,fp8,0,0.8032320340474447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,float16,0,0.9304426511128744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,fp8,0,0.9413866996765137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,float16,0,0.5308159987131754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,32,8,128,0,1,fp8,fp8,0,0.818506638209025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,fp8,0,0.5379733244578043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,32,128,0,1,fp8,fp8,0,0.47490131855010986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,float16,0,0.48286934693654376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,fp8,0,0.49048535029093426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,1,128,0,1,fp8,fp8,0,0.4197973410288493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,float16,0,0.48292267322540283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,fp8,0,0.48708800474802655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,2,128,0,1,fp8,fp8,0,0.4188266595204671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,float16,0,0.4883519808451335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,fp8,0,0.48872534434000653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,float16,0,0.283786674340566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,4,128,0,1,fp8,fp8,0,0.4216853380203247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,float16,0,0.48748799165089923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,fp8,0,0.49764267603556317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,32,8,128,0,1,fp8,fp8,0,0.42955199877421063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,fp8,0,0.29179733991622925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,32,128,0,1,fp8,fp8,0,0.25085333983103436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,float16,0,0.263264000415802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,fp8,0,0.2673226594924927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,1,128,0,1,fp8,fp8,0,0.2147093415260315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,float16,0,0.2650880018870036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,fp8,0,0.26781866947809857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,2,128,0,1,fp8,fp8,0,0.21686400969823202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,float16,0,0.2641493280728658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,fp8,0,0.2664746642112732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,4,128,0,1,fp8,fp8,0,0.21811733643213907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,float16,0,0.2670240004857381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,fp8,0,0.2717919945716858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,32,8,128,0,1,fp8,fp8,0,0.22273600101470947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,float16,0,0.15448533495267233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,fp8,0,0.1590666671593984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,32,128,0,1,fp8,fp8,0,0.14014933506647745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,float16,0,0.14097066720326742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,float16,0,0.14147733648618063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,fp8,0,0.1415733297665914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,1,128,0,1,fp8,fp8,0,0.12319466471672058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,float16,0,0.14181333780288696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,fp8,0,0.14129066467285156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,2,128,0,1,fp8,fp8,0,0.12273066242535909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,fp8,0,0.14338133732477823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,4,128,0,1,fp8,fp8,0,0.12353600064913432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,float16,0,0.1429333289464315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,fp8,0,0.14503467082977295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,32,8,128,0,1,fp8,fp8,0,0.12743999560674033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,float16,0,0.09107200304667155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,fp8,0,0.09285866220792134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,32,128,0,1,fp8,fp8,0,0.08502399921417236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,float16,0,0.08685866991678874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,float16,0,0.08673600355784099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,fp8,0,0.08634666601816814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,1,128,0,1,fp8,fp8,0,0.07648000121116638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,float16,0,0.08739733695983887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,float16,0,0.08623466889063518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,fp8,0,0.08724266290664673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,2,128,0,1,fp8,fp8,0,0.07634666562080383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,fp8,0,0.08693333466847737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,4,128,0,1,fp8,fp8,0,0.07780799766381581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,fp8,0,0.08757866422335307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,32,8,128,0,1,fp8,fp8,0,0.0784800002972285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,float16,0,3.8823038736979165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,128,0,1,fp8,fp8,0,3.4013760884602866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,fp8,0,3.7016372680664062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,float16,0,3.940575917561849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,fp8,0,3.910090764363607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,2,128,0,1,fp8,fp8,0,3.478351910909017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,float16,0,4.009231885274251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,fp8,0,3.9304641087849936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,float16,0,2.2873546282450357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,4,128,0,1,fp8,fp8,0,3.6133387883504233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,fp8,0,2.246335983276367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,32,128,0,1,fp8,fp8,0,1.9960427284240723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,float16,0,4.175488154093425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,128,0,1,fp8,fp8,0,3.673914591471354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,float16,0,1.8924533526102703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,fp8,0,4.063530604044597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,fp8,0,1.8756799697875977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,1,128,0,1,fp8,fp8,0,1.7331040700276692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,float16,0,1.8892107009887695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,fp8,0,1.9056800206502278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,2,128,0,1,fp8,fp8,0,1.7741440137227376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,float16,0,1.9062026341756184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,fp8,0,1.9134400685628254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,4,128,0,1,fp8,fp8,0,1.7824212710062664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,float16,0,1.9549813270568848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,float16,0,1.145248015721639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,fp8,0,1.9334719975789387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,32,8,128,0,1,fp8,fp8,0,1.8426399230957031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,fp8,0,1.121333360671997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,32,128,0,1,fp8,fp8,0,1.0045973459879558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,float16,0,0.9562826951344808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,fp8,0,0.956549326578776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,1,128,0,1,fp8,fp8,0,0.8597173690795898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,float16,0,0.9597226778666178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,fp8,0,0.9607946872711182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,2,128,0,1,fp8,fp8,0,0.8888373374938965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,float16,0,0.963530699412028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,fp8,0,0.9637173016866049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,4,128,0,1,fp8,fp8,0,0.9103573163350424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,float16,0,0.9785652955373129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,fp8,0,0.9760426680246989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,float16,0,0.5804160038630167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,32,8,128,0,1,fp8,fp8,0,0.9268319606781006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,fp8,0,0.5675466855367025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,32,128,0,1,fp8,fp8,0,0.5089333454767863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,float16,0,0.4889119863510132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,fp8,0,0.49187199274698895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,1,128,0,1,fp8,fp8,0,0.43615468343098956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,float16,0,0.4920320113499959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,fp8,0,0.49088001251220703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,2,128,0,1,fp8,fp8,0,0.45394134521484375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,float16,0,0.49430398146311444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,fp8,0,0.49779733022054035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,4,128,0,1,fp8,fp8,0,0.4514133135477702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,float16,0,0.5026400089263916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,fp8,0,0.5014933347702026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,float16,0,0.3040693402290344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,32,8,128,0,1,fp8,fp8,0,0.47707732518513996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,fp8,0,0.2981119950612386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,float16,0,0.2593653400739034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,32,128,0,1,fp8,fp8,0,0.263264000415802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,128,0,1,fp8,fp8,0,0.23455466826756796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,float16,0,0.25538132588068646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,fp8,0,0.25623999039332074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,1,128,0,1,fp8,fp8,0,0.23326400915781656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,fp8,0,0.25940799713134766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,float16,0,0.25900799036026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,fp8,0,0.2614506681760152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,4,128,0,1,fp8,fp8,0,0.23825067281723022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,float16,0,0.2632159988085429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,fp8,0,0.26280534267425537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,32,8,128,0,1,fp8,fp8,0,0.2441493272781372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,float16,0,0.16589333613713583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,fp8,0,0.1627840002377828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,32,128,0,1,fp8,fp8,0,0.12980799873669943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,float16,0,0.13942399621009827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,fp8,0,0.14057599504788718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,1,128,0,1,fp8,fp8,0,0.11187733213106792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,float16,0,0.14098667105038962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,fp8,0,0.14129066467285156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,2,128,0,1,fp8,fp8,0,0.11351466178894043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,float16,0,0.14334932963053384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,fp8,0,0.14316800236701965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,4,128,0,1,fp8,fp8,0,0.11404266953468323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,float16,0,0.14645333091417947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,fp8,0,0.14565333724021912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,32,8,128,0,1,fp8,fp8,0,0.11801066994667053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,float16,0,0.08581866820653279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,fp8,0,0.08471999565760295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,32,128,0,1,fp8,fp8,0,0.07439466814200084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,float16,0,0.07270933190981548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,fp8,0,0.07275733351707458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,1,128,0,1,fp8,fp8,0,0.06117333471775055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,float16,0,0.0730453332265218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,fp8,0,0.07228266696135204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,2,128,0,1,fp8,fp8,0,0.06161599854628245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,float16,0,0.07322133580843608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,fp8,0,0.07363200187683105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,4,128,0,1,fp8,fp8,0,0.062309334675470986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,float16,0,0.07529066503047943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,fp8,0,0.0767626663049062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,32,8,128,0,1,fp8,fp8,0,0.06642666459083557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,float16,0,0.04696000119050344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,fp8,0,0.04665599763393402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,32,128,0,1,fp8,fp8,0,0.04256533086299896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,float16,0,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,fp8,0,0.04297066728274027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,1,128,0,1,fp8,fp8,0,0.036687999963760376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,float16,0,0.04196799794832865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,fp8,0,0.04292266567548116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,2,128,0,1,fp8,fp8,0,0.036357333262761436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,float16,0,0.04225599765777588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,4,128,0,1,fp8,fp8,0,0.03670933345953623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,float16,0,0.042992000778516136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,fp8,0,0.04324266811211904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,32,8,128,0,1,fp8,fp8,0,0.03853866706291834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,1,128,0,1,float16,float16,0,2.8944905598958335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,1,128,0,1,float16,fp8,0,2.8759679794311523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,1,128,0,1,fp8,fp8,0,2.756629308064779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,2,128,0,1,float16,float16,0,2.972778638203939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,2,128,0,1,float16,fp8,0,2.945717175801595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,2,128,0,1,fp8,fp8,0,2.815546671549479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,4,128,0,1,float16,float16,0,2.9896958669026694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,4,128,0,1,float16,fp8,0,3.043951988220215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,4,128,0,1,fp8,fp8,0,2.8909600575764975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,32,128,0,1,float16,float16,0,1.8533120155334473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,8,128,0,1,float16,float16,0,3.1373494466145835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,8,128,0,1,float16,fp8,0,3.0868212381998696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,32,128,0,1,float16,fp8,0,1.812943935394287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,32,8,128,0,1,fp8,fp8,0,3.026975949605306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,32,128,0,1,fp8,fp8,0,1.6776587168375652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,1,128,0,1,float16,float16,0,1.445797284444173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,1,128,0,1,float16,fp8,0,1.4464586575826008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,1,128,0,1,fp8,fp8,0,1.3873173395792644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,2,128,0,1,float16,float16,0,1.4739519755045574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,2,128,0,1,float16,fp8,0,1.4729760487874348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,2,128,0,1,fp8,fp8,0,1.4326027234395344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,4,128,0,1,float16,float16,0,1.4957760175069172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,4,128,0,1,float16,fp8,0,1.4935359954833984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,4,128,0,1,fp8,fp8,0,1.4446345965067546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,8,128,0,1,float16,float16,0,1.5217653910319011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,32,128,0,1,float16,float16,0,0.9276853402455648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,8,128,0,1,float16,fp8,0,1.5330133438110352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,32,128,0,1,float16,fp8,0,0.8935893376668295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,32,8,128,0,1,fp8,fp8,0,1.5177812576293945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,32,128,0,1,fp8,fp8,0,0.847055991490682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,1,128,0,1,float16,float16,0,0.7359306812286377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,1,128,0,1,float16,fp8,0,0.733125368754069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,1,128,0,1,fp8,fp8,0,0.6860746542612711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,2,128,0,1,float16,float16,0,0.7440213362375895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,2,128,0,1,float16,fp8,0,0.7475039958953857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,2,128,0,1,fp8,fp8,0,0.707482655843099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,4,128,0,1,float16,float16,0,0.7525280316670736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,4,128,0,1,float16,fp8,0,0.7508906523386637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,4,128,0,1,fp8,fp8,0,0.7244746685028076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,8,128,0,1,float16,float16,0,0.7592480182647705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,8,128,0,1,float16,fp8,0,0.7646559874216715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,32,128,0,1,float16,float16,0,0.47071464856465656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,32,8,128,0,1,fp8,fp8,0,0.7573386828104655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,32,128,0,1,float16,fp8,0,0.45951465765635174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,32,128,0,1,fp8,fp8,0,0.42637864748636883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,1,128,0,1,float16,float16,0,0.3775999943415324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,1,128,0,1,float16,fp8,0,0.3786986668904622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,1,128,0,1,fp8,fp8,0,0.3493653138478597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,2,128,0,1,float16,float16,0,0.3818826675415039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,2,128,0,1,float16,fp8,0,0.38062934080759686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,2,128,0,1,fp8,fp8,0,0.36090131600697833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,4,128,0,1,float16,float16,0,0.38448532422383624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,4,128,0,1,float16,fp8,0,0.38682134946187335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,4,128,0,1,fp8,fp8,0,0.3704213301340739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,8,128,0,1,float16,float16,0,0.39185599486033124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,8,128,0,1,float16,fp8,0,0.39053865273793537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,32,8,128,0,1,fp8,fp8,0,0.38822933038075763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,32,128,0,1,float16,float16,0,0.24779200553894043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,32,128,0,1,float16,fp8,0,0.24268800020217896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,32,128,0,1,fp8,fp8,0,0.21906665960947672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,1,128,0,1,float16,float16,0,0.19934932390848795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,1,128,0,1,float16,fp8,0,0.20086934169133505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,1,128,0,1,fp8,fp8,0,0.1853813330332438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,2,128,0,1,float16,float16,0,0.20100265741348267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,2,128,0,1,float16,fp8,0,0.20039467016855875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,2,128,0,1,fp8,fp8,0,0.1904159982999166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,4,128,0,1,float16,float16,0,0.20365333557128906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,4,128,0,1,float16,fp8,0,0.2037173310915629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,4,128,0,1,fp8,fp8,0,0.1927679975827535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,8,128,0,1,float16,float16,0,0.20614933967590332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,8,128,0,1,float16,fp8,0,0.2072533369064331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,32,8,128,0,1,fp8,fp8,0,0.1986186703046163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,32,128,0,1,float16,float16,0,0.13461866974830627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,32,128,0,1,float16,fp8,0,0.13147200147310892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,32,128,0,1,fp8,fp8,0,0.11302399635314941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,1,128,0,1,float16,float16,0,0.10980266332626343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,1,128,0,1,float16,fp8,0,0.1088693340619405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,4,128,0,1,float16,fp8,0,0.1106826663017273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,4,128,0,1,fp8,fp8,0,0.09864532947540283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,1,128,0,1,fp8,fp8,0,0.09377599755922954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,2,128,0,1,float16,float16,0,0.11013333002726237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,2,128,0,1,float16,fp8,0,0.10993066430091858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,2,128,0,1,fp8,fp8,0,0.0953546663125356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,4,128,0,1,float16,float16,0,0.11170132954915364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,8,128,0,1,float16,float16,0,0.11427733302116394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,8,128,0,1,float16,fp8,0,0.11362666885058086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,1,128,0,1,float16,fp8,0,0.05860800047715505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,32,8,128,0,1,fp8,fp8,0,0.09983999530474345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,32,128,0,1,float16,float16,0,0.07329600056012471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,32,128,0,1,float16,fp8,0,0.07145600020885468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,32,128,0,1,fp8,fp8,0,0.06445333361625671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,1,128,0,1,float16,float16,0,0.059205333391825356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,1,128,0,1,fp8,fp8,0,0.051557332277297974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,2,128,0,1,float16,float16,0,0.058975999553998314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,2,128,0,1,float16,fp8,0,0.058592001597086586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,2,128,0,1,fp8,fp8,0,0.05259733398755392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,4,128,0,1,float16,float16,0,0.06124266485373179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,4,128,0,1,float16,fp8,0,0.0606826643149058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,4,128,0,1,fp8,fp8,0,0.05352533360322317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,32,128,0,1,fp8,fp8,0,0.037946666280428566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,8,128,0,1,float16,float16,0,0.06181333462397257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,8,128,0,1,float16,fp8,0,0.06223999957243601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,32,8,128,0,1,fp8,fp8,0,0.057674666245778404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,32,128,0,1,float16,float16,0,0.04043200115362803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,32,128,0,1,float16,fp8,0,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,1,128,0,1,float16,float16,0,0.03586133321126302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,1,128,0,1,float16,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,1,128,0,1,fp8,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,2,128,0,1,float16,float16,0,0.03571200122435888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,2,128,0,1,float16,fp8,0,0.03596800069014231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,2,128,0,1,fp8,fp8,0,0.031888000667095184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,4,128,0,1,float16,float16,0,0.036117332677046456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,4,128,0,1,float16,fp8,0,0.036101333796978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,4,128,0,1,fp8,fp8,0,0.03268266717592875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,8,128,0,1,float16,float16,0,0.03616533428430557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,8,128,0,1,float16,fp8,0,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,32,8,128,0,1,fp8,fp8,0,0.03321066747109095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,32,128,0,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,32,128,0,1,float16,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,32,128,0,1,fp8,fp8,0,0.02607999990383784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,1,128,0,1,float16,float16,0,0.025733334322770435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,1,128,0,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,1,128,0,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,2,128,0,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,2,128,0,1,float16,fp8,0,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,2,128,0,1,fp8,fp8,0,0.02293333411216736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,4,128,0,1,float16,float16,0,0.02624000112215678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,4,128,0,1,float16,fp8,0,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,4,128,0,1,fp8,fp8,0,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,8,128,0,1,float16,float16,0,0.025637333591779072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,8,128,0,1,float16,fp8,0,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,32,8,128,0,1,fp8,fp8,0,0.024197332561016083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,1,128,0,1,float16,float16,0,1.2403199672698975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,1,128,0,1,fp8,fp8,0,1.096127986907959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,1,128,0,1,float16,fp8,0,1.2357973257700603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,2,128,0,1,float16,float16,0,1.251530647277832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,2,128,0,1,float16,fp8,0,1.2541226545969646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,4,128,0,1,float16,float16,0,1.2694239616394043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,2,128,0,1,fp8,fp8,0,1.1352799733479817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,4,128,0,1,float16,fp8,0,1.2910292943318684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,4,128,0,1,fp8,fp8,0,1.1718826293945312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,8,128,0,1,float16,float16,0,1.30348801612854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,32,128,0,1,float16,float16,0,0.822165330251058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,8,128,0,1,fp8,fp8,0,1.2303573290507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,32,128,0,1,float16,fp8,0,0.7896052996317545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,32,8,128,0,1,float16,fp8,0,1.307535966237386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,32,128,0,1,fp8,fp8,0,0.7089973290761312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,1,128,0,1,float16,float16,0,0.6265920003255209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,1,128,0,1,float16,fp8,0,0.6287893454233805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,1,128,0,1,fp8,fp8,0,0.5468159914016724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,2,128,0,1,float16,float16,0,0.6330346663792928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,2,128,0,1,float16,fp8,0,0.638538678487142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,2,128,0,1,fp8,fp8,0,0.572218656539917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,4,128,0,1,float16,float16,0,0.6405066649119059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,4,128,0,1,float16,fp8,0,0.6434773206710815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,4,128,0,1,fp8,fp8,0,0.5919680198033651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,8,128,0,1,float16,float16,0,0.6582186619440714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,8,128,0,1,float16,fp8,0,0.6519840161005656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,32,8,128,0,1,fp8,fp8,0,0.6223040024439493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,32,128,0,1,float16,float16,0,0.4169653256734212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,32,128,0,1,float16,fp8,0,0.4054400126139323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,32,128,0,1,fp8,fp8,0,0.35983999570210773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,1,128,0,1,float16,float16,0,0.3236959973971049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,1,128,0,1,float16,fp8,0,0.32285867134730023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,1,128,0,1,fp8,fp8,0,0.2829280098279317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,2,128,0,1,float16,float16,0,0.3274506727854411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,2,128,0,1,float16,fp8,0,0.3256693283716838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,2,128,0,1,fp8,fp8,0,0.29731200138727826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,8,128,0,1,float16,fp8,0,0.33739733695983887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,4,128,0,1,float16,float16,0,0.33026667435963947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,4,128,0,1,float16,fp8,0,0.3309173385302226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,4,128,0,1,fp8,fp8,0,0.29894934097925824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,8,128,0,1,float16,float16,0,0.3365226586659749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,32,8,128,0,1,fp8,fp8,0,0.32235199213027954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,32,128,0,1,float16,float16,0,0.22021865844726562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,32,128,0,1,float16,fp8,0,0.21358933051427206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,32,128,0,1,fp8,fp8,0,0.1877066691716512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,1,128,0,1,float16,float16,0,0.17286932468414307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,1,128,0,1,float16,fp8,0,0.1722453236579895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,1,128,0,1,fp8,fp8,0,0.15123200416564941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,2,128,0,1,float16,float16,0,0.171941339969635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,2,128,0,1,float16,fp8,0,0.1742080052693685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,2,128,0,1,fp8,fp8,0,0.15408000349998474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,4,128,0,1,float16,float16,0,0.1746506690979004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,4,128,0,1,float16,fp8,0,0.17540266116460165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,4,128,0,1,fp8,fp8,0,0.1579093337059021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,8,128,0,1,float16,float16,0,0.17922665675481161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,8,128,0,1,float16,fp8,0,0.17989333470662436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,32,8,128,0,1,fp8,fp8,0,0.1653279960155487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,32,128,0,1,float16,float16,0,0.12104533116022746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,32,128,0,1,float16,fp8,0,0.11788266897201538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,32,128,0,1,fp8,fp8,0,0.10549333691596985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,1,128,0,1,float16,float16,0,0.09429867068926494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,1,128,0,1,float16,fp8,0,0.09471999605496724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,1,128,0,1,fp8,fp8,0,0.08339732885360718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,2,128,0,1,float16,float16,0,0.0946560005346934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,2,128,0,1,float16,fp8,0,0.09557867050170898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,2,128,0,1,fp8,fp8,0,0.08501866459846497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,4,128,0,1,float16,float16,0,0.09674132863680522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,4,128,0,1,float16,fp8,0,0.09548266728719075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,4,128,0,1,fp8,fp8,0,0.08719467123349507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,8,128,0,1,float16,float16,0,0.09902933239936829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,8,128,0,1,float16,fp8,0,0.09981866677602132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,32,8,128,0,1,fp8,fp8,0,0.08964799841245015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,32,128,0,1,float16,float16,0,0.07070399820804596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,1,128,0,1,fp8,fp8,0,0.046757335464159645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,32,128,0,1,float16,fp8,0,0.06902933120727539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,32,128,0,1,fp8,fp8,0,0.06070933242638906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,1,128,0,1,float16,float16,0,0.05332266787687937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,1,128,0,1,float16,fp8,0,0.05407466491063436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,2,128,0,1,float16,float16,0,0.05449600021044413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,2,128,0,1,float16,fp8,0,0.053898667295773826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,8,128,0,1,float16,float16,0,0.057578667998313904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,2,128,0,1,fp8,fp8,0,0.04833599925041199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,4,128,0,1,float16,float16,0,0.05400000015894572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,4,128,0,1,float16,fp8,0,0.05574933191140493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,4,128,0,1,fp8,fp8,0,0.04840533435344696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,8,128,0,1,float16,fp8,0,0.05729066828886668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,32,8,128,0,1,fp8,fp8,0,0.053173333406448364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,32,128,0,1,float16,float16,0,0.038032000263532005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,32,128,0,1,float16,fp8,0,0.03703466554482778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,32,128,0,1,fp8,fp8,0,0.0354720006386439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,1,128,0,1,float16,float16,0,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,1,128,0,1,float16,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,1,128,0,1,fp8,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,2,128,0,1,float16,float16,0,0.03378133227427801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,2,128,0,1,float16,fp8,0,0.03380800038576126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,2,128,0,1,fp8,fp8,0,0.030042665700117748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,4,128,0,1,float16,float16,0,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,4,128,0,1,float16,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,4,128,0,1,fp8,fp8,0,0.029717333614826202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,8,128,0,1,float16,float16,0,0.033887999753157295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,8,128,0,1,float16,fp8,0,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,32,8,128,0,1,fp8,fp8,0,0.03182933231194814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,32,128,0,1,float16,float16,0,0.024714666108290356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,1,128,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,32,128,0,1,float16,fp8,0,0.02443733314673106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,32,128,0,1,fp8,fp8,0,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,1,128,0,1,float16,float16,0,0.022143999735514324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,4,128,0,1,float16,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,1,128,0,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,2,128,0,1,float16,float16,0,0.0223786657055219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,2,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,2,128,0,1,fp8,fp8,0,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,4,128,0,1,float16,float16,0,0.023872000475724537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,4,128,0,1,fp8,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,8,128,0,1,float16,float16,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,8,128,0,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,32,8,128,0,1,fp8,fp8,0,0.022272000710169475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,32,128,0,1,float16,float16,0,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,32,128,0,1,float16,fp8,0,0.02075200031201045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,32,128,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,2,128,0,1,fp8,fp8,0,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,1,128,0,1,float16,float16,0,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,1,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,1,128,0,1,fp8,fp8,0,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,8,128,0,1,float16,float16,0,0.020448000480731327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,2,128,0,1,float16,float16,0,0.019733333339293797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,2,128,0,1,float16,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,4,128,0,1,float16,float16,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,4,128,0,1,float16,fp8,0,0.02004266654451688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,4,128,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,8,128,0,1,float16,fp8,0,0.020330666253964107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,32,8,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,1,128,0,1,float16,float16,0,0.575823982556661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,1,128,0,1,float16,fp8,0,0.574842651685079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,1,128,0,1,fp8,fp8,0,0.5443360010782877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,2,128,0,1,float16,float16,0,0.5832373301188151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,2,128,0,1,float16,fp8,0,0.5851626793543497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,2,128,0,1,fp8,fp8,0,0.5695893367131551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,4,128,0,1,float16,float16,0,0.5880906581878662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,4,128,0,1,float16,fp8,0,0.592960000038147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,4,128,0,1,fp8,fp8,0,0.5953813393910726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,8,128,0,1,fp8,fp8,0,0.6196800072987875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,8,128,0,1,float16,float16,0,0.6058346827824911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,32,8,128,0,1,float16,fp8,0,0.6112159887949625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,32,128,0,1,float16,float16,0,0.39878400166829425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,32,128,0,1,float16,fp8,0,0.38469334443410236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,32,128,0,1,fp8,fp8,0,0.3590186834335327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,1,128,0,1,float16,float16,0,0.2980746626853943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,1,128,0,1,float16,fp8,0,0.2978559931119283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,1,128,0,1,fp8,fp8,0,0.28226667642593384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,4,128,0,1,float16,fp8,0,0.30584534009297687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,2,128,0,1,float16,float16,0,0.3001386721928914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,2,128,0,1,float16,fp8,0,0.2999573349952698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,2,128,0,1,fp8,fp8,0,0.2956906755765279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,4,128,0,1,float16,float16,0,0.3049013415972392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,4,128,0,1,fp8,fp8,0,0.29950400193532306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,8,128,0,1,float16,float16,0,0.3137493332227071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,8,128,0,1,float16,fp8,0,0.31246399879455566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,32,8,128,0,1,fp8,fp8,0,0.32042133808135986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,32,128,0,1,float16,float16,0,0.2160373330116272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,32,128,0,1,float16,fp8,0,0.20787199338277182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,32,128,0,1,fp8,fp8,0,0.18428800503412882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,1,128,0,1,float16,float16,0,0.15895467003186545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,1,128,0,1,float16,fp8,0,0.15916267037391663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,1,128,0,1,fp8,fp8,0,0.15186666448911032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,2,128,0,1,float16,float16,0,0.16107199589411417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,2,128,0,1,float16,fp8,0,0.15947733322779337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,2,128,0,1,fp8,fp8,0,0.15537066260973612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,4,128,0,1,float16,float16,0,0.16364266475041708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,4,128,0,1,float16,fp8,0,0.1637226641178131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,4,128,0,1,fp8,fp8,0,0.15892799695332846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,8,128,0,1,float16,float16,0,0.1689280072848002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,8,128,0,1,float16,fp8,0,0.16810667514801025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,1,128,0,1,float16,float16,0,0.0876693328221639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,32,8,128,0,1,fp8,fp8,0,0.1646773318449656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,32,128,0,1,float16,float16,0,0.12171733379364014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,32,128,0,1,float16,fp8,0,0.1167626678943634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,32,128,0,1,fp8,fp8,0,0.10245866576830547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,1,128,0,1,float16,fp8,0,0.08892800410588582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,1,128,0,1,fp8,fp8,0,0.08496000369389851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,2,128,0,1,float16,float16,0,0.08933867017428081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,2,128,0,1,float16,fp8,0,0.09005866448084514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,2,128,0,1,fp8,fp8,0,0.0842133363087972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,4,128,0,1,float16,float16,0,0.09113599856694539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,4,128,0,1,float16,fp8,0,0.09115733702977498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,4,128,0,1,fp8,fp8,0,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,8,128,0,1,float16,float16,0,0.09344533085823059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,8,128,0,1,float16,fp8,0,0.09327999750773112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,32,8,128,0,1,fp8,fp8,0,0.09080533186594646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,32,128,0,1,float16,float16,0,0.07047999898592631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,32,128,0,1,float16,fp8,0,0.06723733246326447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,32,128,0,1,fp8,fp8,0,0.06005333364009857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,1,128,0,1,float16,float16,0,0.05156266689300537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,1,128,0,1,float16,fp8,0,0.05208000044027964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,1,128,0,1,fp8,fp8,0,0.0465280016263326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,2,128,0,1,float16,float16,0,0.05179733534653982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,4,128,0,1,fp8,fp8,0,0.04937600096066793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,2,128,0,1,float16,fp8,0,0.0521919975678126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,2,128,0,1,fp8,fp8,0,0.04749333361784617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,4,128,0,1,float16,float16,0,0.05235200126965841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,4,128,0,1,float16,fp8,0,0.05258666475613912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,8,128,0,1,float16,float16,0,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,8,128,0,1,float16,fp8,0,0.053488001227378845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,32,8,128,0,1,fp8,fp8,0,0.05213333169619242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,32,128,0,1,float16,float16,0,0.03697066754102707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,32,128,0,1,float16,fp8,0,0.03640533238649368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,32,128,0,1,fp8,fp8,0,0.03515200068553289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,1,128,0,1,float16,float16,0,0.03260799994071325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,1,128,0,1,float16,fp8,0,0.03224000086386999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,1,128,0,1,fp8,fp8,0,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,2,128,0,1,float16,float16,0,0.032325332363446556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,2,128,0,1,float16,fp8,0,0.03268799930810928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,2,128,0,1,fp8,fp8,0,0.030320001145203907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,4,128,0,1,float16,float16,0,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,4,128,0,1,float16,fp8,0,0.03292799989382426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,4,128,0,1,fp8,fp8,0,0.030069333811601002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,8,128,0,1,float16,float16,0,0.033029332756996155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,8,128,0,1,float16,fp8,0,0.03299733251333237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,32,8,128,0,1,fp8,fp8,0,0.030752000709374745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,32,128,0,1,float16,float16,0,0.023647998770078022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,32,128,0,1,float16,fp8,0,0.023978665471076965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,32,128,0,1,fp8,fp8,0,0.022367998957633972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,4,128,0,1,float16,float16,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,1,128,0,1,float16,float16,0,0.021509334444999695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,1,128,0,1,float16,fp8,0,0.022143999735514324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,1,128,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,2,128,0,1,float16,float16,0,0.02195200075705846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,2,128,0,1,float16,fp8,0,0.021754667162895203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,2,128,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,4,128,0,1,float16,fp8,0,0.022069332500298817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,4,128,0,1,fp8,fp8,0,0.022384000321229298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,8,128,0,1,float16,float16,0,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,8,128,0,1,float16,fp8,0,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,32,8,128,0,1,fp8,fp8,0,0.02223466585079829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,32,128,0,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,32,128,0,1,float16,fp8,0,0.018437333405017853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,32,128,0,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,1,128,0,1,float16,float16,0,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,1,128,0,1,float16,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,1,128,0,1,fp8,fp8,0,0.018053332964579265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,2,128,0,1,float16,float16,0,0.018191999445358913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,2,128,0,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,2,128,0,1,fp8,fp8,0,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,4,128,0,1,float16,float16,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,4,128,0,1,float16,fp8,0,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,4,128,0,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,8,128,0,1,float16,float16,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,8,128,0,1,float16,fp8,0,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,32,8,128,0,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,32,128,0,1,float16,float16,0,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,32,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,32,128,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,1,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,1,128,0,1,float16,fp8,0,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,1,128,0,1,fp8,fp8,0,0.01782400036851565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,2,128,0,1,float16,float16,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,2,128,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,2,128,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,4,128,0,1,float16,float16,0,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,4,128,0,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,4,128,0,1,fp8,fp8,0,0.018160000443458557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,8,128,0,1,float16,float16,0,0.016442666451136272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,8,128,0,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,32,8,128,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,1,128,0,1,float16,fp8,0,0.34466667970021564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,1,128,0,1,float16,float16,0,0.34510401884714764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,1,128,0,1,fp8,fp8,0,0.39105598131815594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,4,128,0,1,float16,fp8,0,0.35370667775472003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,2,128,0,1,float16,float16,0,0.34834667046864826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,2,128,0,1,float16,fp8,0,0.34994133313496906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,2,128,0,1,fp8,fp8,0,0.4008479913075765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,4,128,0,1,float16,float16,0,0.3531893491744995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,4,128,0,1,fp8,fp8,0,0.4095360040664673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,8,128,0,1,float16,float16,0,0.36179200808207196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,8,128,0,1,float16,fp8,0,0.3603146473566691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,32,8,128,0,1,fp8,fp8,0,0.43036266167958576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,32,128,0,1,float16,float16,0,0.23465599616368613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,32,128,0,1,float16,fp8,0,0.22750399510065714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,32,128,0,1,fp8,fp8,0,0.24219733476638794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,1,128,0,1,float16,float16,0,0.18061333894729614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,1,128,0,1,float16,fp8,0,0.18036266167958578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,1,128,0,1,fp8,fp8,0,0.20676799615224203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,2,128,0,1,float16,float16,0,0.18131200472513834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,2,128,0,1,float16,fp8,0,0.1827413241068522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,2,128,0,1,fp8,fp8,0,0.21185600757598877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,4,128,0,1,float16,float16,0,0.18544532855351767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,4,128,0,1,float16,fp8,0,0.18406933546066284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,4,128,0,1,fp8,fp8,0,0.21443732579549155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,8,128,0,1,float16,float16,0,0.19008533159891763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,8,128,0,1,float16,fp8,0,0.18818666537602743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,32,8,128,0,1,fp8,fp8,0,0.21969600518544516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,32,128,0,1,float16,float16,0,0.1277653376261393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,32,128,0,1,float16,fp8,0,0.12325867017110188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,32,128,0,1,fp8,fp8,0,0.13006400068600973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,1,128,0,1,float16,float16,0,0.09844266374905904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,1,128,0,1,float16,fp8,0,0.09777067104975383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,1,128,0,1,fp8,fp8,0,0.11313066879908244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,2,128,0,1,float16,float16,0,0.09895466764767964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,2,128,0,1,float16,fp8,0,0.09829333424568176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,2,128,0,1,fp8,fp8,0,0.1150933305422465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,4,128,0,1,float16,float16,0,0.10090667009353638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,4,128,0,1,float16,fp8,0,0.10002666711807251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,4,128,0,1,fp8,fp8,0,0.116976002852122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,8,128,0,1,float16,float16,0,0.10385599732398987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,8,128,0,1,float16,fp8,0,0.10246400038401286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,32,8,128,0,1,fp8,fp8,0,0.11876799662907918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,32,128,0,1,float16,float16,0,0.0703413337469101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,32,128,0,1,float16,fp8,0,0.07029333213965099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,32,128,0,1,fp8,fp8,0,0.0745066652695338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,1,128,0,1,float16,float16,0,0.054560000697771706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,1,128,0,1,float16,fp8,0,0.05527999997138977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,1,128,0,1,fp8,fp8,0,0.06201600035031637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,2,128,0,1,float16,float16,0,0.05508799850940704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,2,128,0,1,float16,fp8,0,0.05555733541647593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,2,128,0,1,fp8,fp8,0,0.06266133487224579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,4,128,0,1,float16,float16,0,0.05630933245023092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,4,128,0,1,float16,fp8,0,0.0565280020236969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,4,128,0,1,fp8,fp8,0,0.0640533318122228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,8,128,0,1,float16,float16,0,0.05778666834036509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,8,128,0,1,float16,fp8,0,0.057061334451039634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,32,8,128,0,1,fp8,fp8,0,0.06679466863473256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,32,128,0,1,float16,float16,0,0.03861333429813385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,32,128,0,1,float16,fp8,0,0.03686933219432831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,32,128,0,1,fp8,fp8,0,0.04338666796684265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,1,128,0,1,float16,float16,0,0.034154665966828666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,1,128,0,1,float16,fp8,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,1,128,0,1,fp8,fp8,0,0.03718400001525879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,2,128,0,1,float16,float16,0,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,2,128,0,1,float16,fp8,0,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,2,128,0,1,fp8,fp8,0,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,4,128,0,1,float16,float16,0,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,4,128,0,1,float16,fp8,0,0.03417066733042399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,4,128,0,1,fp8,fp8,0,0.03833066672086716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,8,128,0,1,float16,float16,0,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,8,128,0,1,float16,fp8,0,0.03484266748030981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,32,8,128,0,1,fp8,fp8,0,0.03815466662247976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,32,128,0,1,float16,float16,0,0.02363733450571696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,32,128,0,1,float16,fp8,0,0.024725332856178284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,32,128,0,1,fp8,fp8,0,0.025946666797002155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,1,128,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,1,128,0,1,float16,fp8,0,0.023647998770078022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,1,128,0,1,fp8,fp8,0,0.024469333390394848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,2,128,0,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,2,128,0,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,2,128,0,1,fp8,fp8,0,0.02386133372783661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,4,128,0,1,float16,float16,0,0.02386133372783661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,4,128,0,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,4,128,0,1,fp8,fp8,0,0.025839999318122864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,8,128,0,1,float16,float16,0,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,8,128,0,1,float16,fp8,0,0.023631999890009563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,32,8,128,0,1,fp8,fp8,0,0.026330667237440746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,32,128,0,1,float16,float16,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,32,128,0,1,float16,fp8,0,0.01811733345190684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,32,128,0,1,fp8,fp8,0,0.02011200040578842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,1,128,0,1,float16,float16,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,1,128,0,1,float16,fp8,0,0.01833600054184596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,1,128,0,1,fp8,fp8,0,0.01855466639002164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,2,128,0,1,float16,float16,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,2,128,0,1,float16,fp8,0,0.01814933369557063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,2,128,0,1,fp8,fp8,0,0.018277333428462345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,4,128,0,1,float16,float16,0,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,4,128,0,1,float16,fp8,0,0.01836799954374631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,4,128,0,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,8,128,0,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,8,128,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,32,8,128,0,1,fp8,fp8,0,0.01860800012946129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,32,128,0,1,float16,float16,0,0.016117333124081295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,32,128,0,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,32,128,0,1,fp8,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,1,128,0,1,float16,float16,0,0.015717333803574245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,1,128,0,1,float16,fp8,0,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,1,128,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,2,128,0,1,float16,float16,0,0.01634666696190834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,2,128,0,1,float16,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,2,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,4,128,0,1,float16,float16,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,4,128,0,1,float16,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,4,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,8,128,0,1,float16,float16,0,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,8,128,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,32,8,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,32,128,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,32,128,0,1,float16,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,32,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,1,128,0,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,1,128,0,1,float16,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,1,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,2,128,0,1,float16,float16,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,2,128,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,2,128,0,1,fp8,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,4,128,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,4,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,4,128,0,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,8,128,0,1,float16,float16,0,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,8,128,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,32,8,128,0,1,fp8,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,1,128,0,1,float16,float16,0,0.23959465821584067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,1,128,0,1,float16,fp8,0,0.2411466638247172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,2,128,0,1,float16,fp8,0,0.24611733357111612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,1,128,0,1,fp8,fp8,0,0.3158026734987895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,2,128,0,1,float16,float16,0,0.24373332659403482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,2,128,0,1,fp8,fp8,0,0.321941335995992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,4,128,0,1,float16,float16,0,0.2453493277231852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,4,128,0,1,float16,fp8,0,0.24475200970967612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,4,128,0,1,fp8,fp8,0,0.32338666915893555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,8,128,0,1,float16,float16,0,0.25362666447957355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,8,128,0,1,float16,fp8,0,0.25200533866882324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,32,128,0,1,float16,float16,0,0.15846932927767435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,32,8,128,0,1,fp8,fp8,0,0.32917332649230957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,32,128,0,1,float16,fp8,0,0.15563199917475382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,32,128,0,1,fp8,fp8,0,0.1839039921760559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,1,128,0,1,float16,float16,0,0.12821333607037863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,4,128,0,1,float16,fp8,0,0.1302773356437683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,1,128,0,1,float16,fp8,0,0.12917332847913107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,1,128,0,1,fp8,fp8,0,0.16800532738367716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,2,128,0,1,float16,float16,0,0.129120002190272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,2,128,0,1,float16,fp8,0,0.13014933466911316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,8,128,0,1,float16,fp8,0,0.13462932904561362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,2,128,0,1,fp8,fp8,0,0.16910399993260702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,4,128,0,1,float16,float16,0,0.13025066256523132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,4,128,0,1,fp8,fp8,0,0.17150932550430298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,8,128,0,1,float16,float16,0,0.13435199856758118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,32,8,128,0,1,fp8,fp8,0,0.17349867026011148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,32,128,0,1,float16,float16,0,0.08738133311271667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,32,128,0,1,float16,fp8,0,0.08690667152404785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,32,128,0,1,fp8,fp8,0,0.10231467088063557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,1,128,0,1,float16,float16,0,0.06887466708819072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,1,128,0,1,float16,fp8,0,0.07020266850789388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,1,128,0,1,fp8,fp8,0,0.090421328941981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,2,128,0,1,float16,float16,0,0.07010133564472198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,2,128,0,1,float16,fp8,0,0.07035199801127116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,2,128,0,1,fp8,fp8,0,0.09007466832796733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,4,128,0,1,float16,float16,0,0.07144533097743988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,4,128,0,1,float16,fp8,0,0.0722453345855077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,4,128,0,1,fp8,fp8,0,0.09143466750780742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,8,128,0,1,float16,float16,0,0.0727040022611618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,8,128,0,1,float16,fp8,0,0.07291199763615926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,32,8,128,0,1,fp8,fp8,0,0.09474133451779683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,32,128,0,1,float16,float16,0,0.044677332043647766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,32,128,0,1,float16,fp8,0,0.044293334086736046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,32,128,0,1,fp8,fp8,0,0.0572320024172465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,1,128,0,1,float16,float16,0,0.041290665666262306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,1,128,0,1,float16,fp8,0,0.04088533421357473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,1,128,0,1,fp8,fp8,0,0.05454400181770325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,2,128,0,1,float16,float16,0,0.041002665956815086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,2,128,0,1,float16,fp8,0,0.04102933406829834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,2,128,0,1,fp8,fp8,0,0.05113600194454193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,4,128,0,1,float16,float16,0,0.04072533299525579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,4,128,0,1,float16,fp8,0,0.04142399877309799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,4,128,0,1,fp8,fp8,0,0.05259199937184652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,8,128,0,1,float16,float16,0,0.04119999955097834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,8,128,0,1,float16,fp8,0,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,32,8,128,0,1,fp8,fp8,0,0.05333866675694784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,32,128,0,1,float16,float16,0,0.028016000986099243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,32,128,0,1,float16,fp8,0,0.028058665494124096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,32,128,0,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,1,128,0,1,float16,float16,0,0.026362667481104534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,1,128,0,1,float16,fp8,0,0.026522666215896606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,1,128,0,1,fp8,fp8,0,0.03201599915822347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,2,128,0,1,float16,float16,0,0.026799999177455902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,2,128,0,1,float16,fp8,0,0.026533332963784535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,2,128,0,1,fp8,fp8,0,0.03238933285077413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,4,128,0,1,float16,float16,0,0.02649066597223282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,4,128,0,1,float16,fp8,0,0.02672533442576726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,4,128,0,1,fp8,fp8,0,0.03299733251333237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,8,128,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,1,128,0,1,float16,float16,0,0.018533332894245785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,8,128,0,1,float16,fp8,0,0.026421333352724712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,32,8,128,0,1,fp8,fp8,0,0.03377600014209747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,32,128,0,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,2,128,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,32,128,0,1,float16,fp8,0,0.01979200045267741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,32,128,0,1,fp8,fp8,0,0.02369066576162974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,4,128,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,1,128,0,1,float16,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,1,128,0,1,fp8,fp8,0,0.022853332261244457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,8,128,0,1,float16,float16,0,0.019472000499566395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,2,128,0,1,float16,float16,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,2,128,0,1,fp8,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,4,128,0,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,4,128,0,1,fp8,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,8,128,0,1,float16,fp8,0,0.019904000063737232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,32,8,128,0,1,fp8,fp8,0,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,32,128,0,1,float16,float16,0,0.01617066686352094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,32,128,0,1,float16,fp8,0,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,32,128,0,1,fp8,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,2,128,0,1,float16,fp8,0,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,1,128,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,1,128,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,1,128,0,1,fp8,fp8,0,0.017722666263580322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,2,128,0,1,float16,float16,0,0.01569066693385442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,2,128,0,1,fp8,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,4,128,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,4,128,0,1,float16,fp8,0,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,4,128,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,8,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,8,128,0,1,float16,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,32,8,128,0,1,fp8,fp8,0,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,32,128,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,32,128,0,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,2,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,32,128,0,1,fp8,fp8,0,0.017781333376963932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,1,128,0,1,float16,float16,0,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,1,128,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,1,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,2,128,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,2,128,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,4,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,4,128,0,1,float16,fp8,0,0.014581333845853806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,4,128,0,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,8,128,0,1,float16,float16,0,0.014682666709025701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,8,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,32,8,128,0,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,32,128,0,1,float16,float16,0,0.01422400027513504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,32,128,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,32,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,1,128,0,1,float16,float16,0,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,1,128,0,1,float16,fp8,0,0.014682666709025701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,1,128,0,1,fp8,fp8,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,2,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,2,128,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,2,128,0,1,fp8,fp8,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,4,128,0,1,float16,float16,0,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,4,128,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,4,128,0,1,fp8,fp8,0,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,8,128,0,1,float16,float16,0,0.01444799949725469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,8,128,0,1,float16,fp8,0,0.014442666123310724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,32,8,128,0,1,fp8,fp8,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,1,128,0,1,float16,float16,0,0.20017600059509277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,1,128,0,1,float16,fp8,0,0.2002453406651815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,4,128,0,1,float16,fp8,0,0.20286399126052856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,1,128,0,1,fp8,fp8,0,0.27987732489903766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,2,128,0,1,float16,float16,0,0.2027733325958252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,2,128,0,1,float16,fp8,0,0.20154666900634766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,2,128,0,1,fp8,fp8,0,0.2797226707140605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,4,128,0,1,float16,float16,0,0.20388267437616983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,4,128,0,1,fp8,fp8,0,0.2811093330383301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,8,128,0,1,float16,float16,0,0.20759467283884683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,8,128,0,1,float16,fp8,0,0.20670400063196817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,32,8,128,0,1,fp8,fp8,0,0.2863679925600688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,32,128,0,1,float16,float16,0,0.12370666861534119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,32,128,0,1,float16,fp8,0,0.12081066767374675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,32,128,0,1,fp8,fp8,0,0.15756799777348837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,1,128,0,1,float16,float16,0,0.10674132903416951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,1,128,0,1,float16,fp8,0,0.10560533404350281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,2,128,0,1,float16,float16,0,0.10598933696746826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,1,128,0,1,fp8,fp8,0,0.14627733826637268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,2,128,0,1,float16,fp8,0,0.10598400235176086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,2,128,0,1,fp8,fp8,0,0.14855999747912088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,4,128,0,1,float16,float16,0,0.10684266686439514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,4,128,0,1,float16,fp8,0,0.10732266306877136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,4,128,0,1,fp8,fp8,0,0.1479200025399526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,8,128,0,1,float16,float16,0,0.1076639990011851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,8,128,0,1,float16,fp8,0,0.1083679993947347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,32,8,128,0,1,fp8,fp8,0,0.15105600158373514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,32,128,0,1,float16,float16,0,0.06387733419736226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,32,128,0,1,float16,fp8,0,0.06307733555634816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,32,128,0,1,fp8,fp8,0,0.08503466844558716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,1,128,0,1,float16,float16,0,0.059605335195859276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,4,128,0,1,float16,float16,0,0.05979733169078827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,1,128,0,1,float16,fp8,0,0.05867200096448263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,1,128,0,1,fp8,fp8,0,0.07995733122030894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,2,128,0,1,float16,float16,0,0.059343998630841575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,2,128,0,1,float16,fp8,0,0.059024001161257424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,2,128,0,1,fp8,fp8,0,0.08059733112653096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,32,128,0,1,float16,float16,0,0.036464000741640724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,4,128,0,1,float16,fp8,0,0.059215997656186424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,4,128,0,1,fp8,fp8,0,0.08050133287906647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,8,128,0,1,float16,float16,0,0.06053866446018219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,8,128,0,1,float16,fp8,0,0.0591893345117569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,32,8,128,0,1,fp8,fp8,0,0.08163733283678691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,32,128,0,1,float16,fp8,0,0.03681600093841553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,32,128,0,1,fp8,fp8,0,0.0492799977461497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,1,128,0,1,float16,float16,0,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,1,128,0,1,float16,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,1,128,0,1,fp8,fp8,0,0.04828266799449921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,2,128,0,1,float16,float16,0,0.03580799947182337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,2,128,0,1,float16,fp8,0,0.036661334335803986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,2,128,0,1,fp8,fp8,0,0.045941332976023354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,4,128,0,1,float16,float16,0,0.03638399889071783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,4,128,0,1,float16,fp8,0,0.03594133257865906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,4,128,0,1,fp8,fp8,0,0.04781866570313772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,8,128,0,1,float16,float16,0,0.03649600098530451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,8,128,0,1,float16,fp8,0,0.036271999279658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,32,8,128,0,1,fp8,fp8,0,0.04875733455022176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,32,128,0,1,float16,float16,0,0.024720000723997753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,32,128,0,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,32,128,0,1,fp8,fp8,0,0.0305226668715477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,1,128,0,1,float16,float16,0,0.02436800052722295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,1,128,0,1,float16,fp8,0,0.02390933285156886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,1,128,0,1,fp8,fp8,0,0.029738667110602062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,2,128,0,1,float16,float16,0,0.024682665864626568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,2,128,0,1,float16,fp8,0,0.024432001014550526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,2,128,0,1,fp8,fp8,0,0.02991466720898946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,4,128,0,1,float16,float16,0,0.02369600037733714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,4,128,0,1,float16,fp8,0,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,4,128,0,1,fp8,fp8,0,0.030666666726271313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,8,128,0,1,float16,float16,0,0.023925334215164185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,8,128,0,1,float16,fp8,0,0.024613333245118458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,32,8,128,0,1,fp8,fp8,0,0.030565333863099415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,32,128,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,32,128,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,32,128,0,1,fp8,fp8,0,0.02277333289384842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,1,128,0,1,float16,float16,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,1,128,0,1,float16,fp8,0,0.01841066653529803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,1,128,0,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,2,128,0,1,float16,float16,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,2,128,0,1,float16,fp8,0,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,2,128,0,1,fp8,fp8,0,0.021898667017618816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,4,128,0,1,float16,float16,0,0.017765333255132038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,4,128,0,1,float16,fp8,0,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,4,128,0,1,fp8,fp8,0,0.022250667214393616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,8,128,0,1,float16,float16,0,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,8,128,0,1,float16,fp8,0,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,32,8,128,0,1,fp8,fp8,0,0.0229066660006841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,32,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,2,128,0,1,float16,float16,0,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,32,128,0,1,float16,fp8,0,0.016069332758585613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,32,128,0,1,fp8,fp8,0,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,1,128,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,1,128,0,1,float16,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,1,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,2,128,0,1,float16,fp8,0,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,2,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,4,128,0,1,float16,float16,0,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,4,128,0,1,float16,fp8,0,0.015728000551462173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,4,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,8,128,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,8,128,0,1,float16,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,32,8,128,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,32,128,0,1,float16,float16,0,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,32,128,0,1,float16,fp8,0,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,32,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,1,128,0,1,float16,float16,0,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,1,128,0,1,float16,fp8,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,1,128,0,1,fp8,fp8,0,0.01635733370979627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,2,128,0,1,float16,float16,0,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,2,128,0,1,float16,fp8,0,0.014607999473810196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,2,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,4,128,0,1,float16,float16,0,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,4,128,0,1,float16,fp8,0,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,4,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,1,128,0,1,float16,float16,0,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,8,128,0,1,float16,float16,0,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,8,128,0,1,float16,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,32,8,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,32,128,0,1,float16,float16,0,0.014085333794355392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,32,128,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,32,128,0,1,fp8,fp8,0,0.016037333756685257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,1,128,0,1,float16,fp8,0,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,1,128,0,1,fp8,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,2,128,0,1,float16,float16,0,0.013642666240533194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,2,128,0,1,float16,fp8,0,0.014357333381970724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,2,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,4,128,0,1,float16,float16,0,0.013973332941532135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,4,128,0,1,float16,fp8,0,0.014490666488806406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,4,128,0,1,fp8,fp8,0,0.01628799984852473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,8,128,0,1,float16,float16,0,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,8,128,0,1,float16,fp8,0,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,32,8,128,0,1,fp8,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,128,0,1,fp8,fp8,0,16.607264200846355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,128,0,1,fp8,fp8,0,16.725173950195312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,float16,0,24.903406778971355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,fp8,0,25.01060740152995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,float16,0,25.013280232747395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,fp8,0,25.28814442952474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,float16,0,25.313392639160156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,fp8,0,25.094187418619793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,4,128,0,1,fp8,fp8,0,17.99225107828776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,128,0,1,fp8,fp8,0,9.902608235677084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,float16,0,13.028245290120443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,fp8,0,13.2181765238444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,128,0,1,fp8,fp8,0,16.88858159383138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,float16,0,25.66967519124349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,128,0,1,fp8,fp8,0,8.435882568359375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,float16,0,12.63577651977539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,fp8,0,12.665167490641275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,fp8,0,25.325302124023438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,128,0,1,fp8,fp8,0,8.411338806152344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,float16,0,12.728949228922525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,fp8,0,12.924544016520182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,128,0,1,fp8,fp8,0,8.46125348409017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,float16,0,12.78780746459961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,fp8,0,12.63387680053711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,float16,0,6.58460807800293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,fp8,0,6.691253026326497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,24,128,0,1,fp8,fp8,0,4.412543932596843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,128,0,1,fp8,fp8,0,8.460522969563803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,float16,0,12.690320332845053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,fp8,0,12.733163197835287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,float16,0,6.416794459025065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,128,0,1,fp8,fp8,0,4.205535888671875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,fp8,0,6.4016157786051435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,128,0,1,fp8,fp8,0,4.231456120808919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,float16,0,6.549941380818685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,fp8,0,6.483338673909505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,float16,0,7.285962422688802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,128,0,1,fp8,fp8,0,4.230031967163086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,fp8,0,6.6157175699869795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,float16,0,3.4080158869425454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,128,0,1,fp8,fp8,0,4.28110408782959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,128,0,1,fp8,fp8,0,2.266005357106527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,fp8,0,3.3071839014689126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,float16,0,6.429962793986003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,fp8,0,6.654661178588867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,float16,0,3.2684427897135415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,fp8,0,3.228032112121582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,1,128,0,1,fp8,fp8,0,2.2188000679016113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,128,0,1,fp8,fp8,0,2.157898743947347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,float16,0,3.2775627772013345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,fp8,0,3.3172054290771484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,128,0,1,fp8,fp8,0,2.173898696899414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,float16,0,3.2274131774902344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,fp8,0,3.2859681447347007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,128,0,1,fp8,fp8,0,2.190165360768636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,float16,0,3.3579254150390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,fp8,0,3.3345120747884116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,128,0,1,fp8,fp8,0,9.777669270833334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,128,0,1,fp8,fp8,0,9.743775685628256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,float16,0,14.48306655883789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,fp8,0,14.61147689819336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,float16,0,14.534656524658203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,fp8,0,14.55145009358724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,float16,0,14.447909037272135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,fp8,0,14.776106516520182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,4,128,0,1,fp8,fp8,0,9.84553591410319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,128,0,1,fp8,fp8,0,5.062485376993815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,float16,0,7.513797124226888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,fp8,0,7.460266749064128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,128,0,1,fp8,fp8,0,10.068682352701822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,float16,0,7.386112213134766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,float16,0,14.442549387613932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,128,0,1,fp8,fp8,0,4.917711893717448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,fp8,0,7.327930450439453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,fp8,0,15.424084981282553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,128,0,1,fp8,fp8,0,5.085952123006185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,float16,0,7.297232309977214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,fp8,0,7.323285420735677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,float16,0,7.535818735758464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,128,0,1,fp8,fp8,0,5.02891190846761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,fp8,0,7.940095901489258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,128,0,1,fp8,fp8,0,5.038469314575195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,float16,0,3.8000640869140625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,float16,0,7.658698399861653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,128,0,1,fp8,fp8,0,2.5711466471354165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,fp8,0,3.51417605082194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,fp8,0,7.398111979166667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,float16,0,3.7225494384765625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,fp8,0,3.6432746251424155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,1,128,0,1,fp8,fp8,0,2.4298453330993652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,128,0,1,fp8,fp8,0,2.492389361063639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,float16,0,3.664581298828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,128,0,1,fp8,fp8,0,2.455146630605062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,fp8,0,3.6810506184895835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,float16,0,3.6593494415283203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,fp8,0,3.715365409851074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,float16,0,3.699573198954264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,fp8,0,3.813141187032064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,24,8,128,0,1,fp8,fp8,0,2.543989340464274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,float16,0,1.8312373161315918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,fp8,0,1.9288214047749836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,24,128,0,1,fp8,fp8,0,1.3473386764526367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,float16,0,1.8383572896321614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,128,0,1,fp8,fp8,0,1.4116800626118977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,fp8,0,1.8297173182169597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,float16,0,1.840437412261963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,fp8,0,1.9020427068074544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,2,128,0,1,fp8,fp8,0,1.2985546588897705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,float16,0,1.8712159792582195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,128,0,1,fp8,fp8,0,1.3121600151062012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,fp8,0,1.8203199704488118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,float16,0,1.8361706733703613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,128,0,1,fp8,fp8,0,1.3230453332265217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,fp8,0,1.8249386151631672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,128,0,1,fp8,fp8,0,6.913829167683919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,128,0,1,fp8,fp8,0,6.988351821899414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,float16,0,10.192970911661783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,fp8,0,10.25056521097819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,fp8,0,10.135061264038086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,float16,0,10.291568120320639
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,float16,0,10.276917139689127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,128,0,1,fp8,fp8,0,7.004816055297852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,128,0,1,fp8,fp8,0,3.6418774922688804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,float16,0,5.22160530090332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,fp8,0,5.393429438273112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,128,0,1,fp8,fp8,0,7.01917839050293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,fp8,0,10.29475212097168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,float16,0,10.45798428853353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,128,0,1,fp8,fp8,0,3.453157424926758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,fp8,0,10.276069641113281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,float16,0,5.225797335306804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,fp8,0,5.158133188883464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,128,0,1,fp8,fp8,0,3.4623680114746094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,float16,0,5.191647847493489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,fp8,0,5.19653860727946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,128,0,1,fp8,fp8,0,3.378602663675944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,float16,0,5.241552035013835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,fp8,0,5.201829274495442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,float16,0,2.6282347043355307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,128,0,1,fp8,fp8,0,3.4721813201904297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,fp8,0,2.6595306396484375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,float16,0,5.251018524169922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,fp8,0,5.271466573079427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,24,128,0,1,fp8,fp8,0,1.8271466890970867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,128,0,1,fp8,fp8,0,1.7665066719055176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,float16,0,2.642031987508138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,fp8,0,2.603877385457357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,128,0,1,fp8,fp8,0,1.7540213267008464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,float16,0,2.5808374087015786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,fp8,0,2.619706630706787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,float16,0,2.5769119262695312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,fp8,0,2.6244853337605796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,4,128,0,1,fp8,fp8,0,1.7567092577616374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,float16,0,1.3383253415425618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,128,0,1,fp8,fp8,0,1.7502506573994954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,float16,0,2.6668052673339844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,fp8,0,2.703514734903971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,fp8,0,1.344330628712972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,24,128,0,1,fp8,fp8,0,0.9437599976857504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,float16,0,1.3715039889017742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,fp8,0,1.3214560349782307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,1,128,0,1,fp8,fp8,0,0.9047679901123047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,float16,0,1.3247946898142497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,128,0,1,fp8,fp8,0,0.922000010808309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,fp8,0,1.3816426595052083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,float16,0,1.3186826705932617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,128,0,1,fp8,fp8,0,0.9029813607533773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,fp8,0,1.3200213114420574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,fp8,0,1.3067359924316406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,float16,0,1.3195786476135254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,24,8,128,0,1,fp8,fp8,0,0.8999893665313721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,128,0,1,fp8,fp8,0,9.105290730794271
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,128,0,1,fp8,fp8,0,9.141034444173178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,float16,0,13.220458984375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,fp8,0,13.582698822021484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,float16,0,13.57314682006836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,fp8,0,13.426447550455729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,float16,0,13.545711517333984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,fp8,0,13.646811167399088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,4,128,0,1,fp8,fp8,0,9.161888122558594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,128,0,1,fp8,fp8,0,4.916586558024089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,float16,0,6.982389450073242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,fp8,0,7.046613057454427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,float16,0,6.800240198771159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,128,0,1,fp8,fp8,0,9.351066589355469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,float16,0,13.525588989257812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,128,0,1,fp8,fp8,0,4.6282345453898115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,fp8,0,6.748837153116862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,fp8,0,13.453685760498047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,128,0,1,fp8,fp8,0,4.68448543548584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,fp8,0,6.771439870198567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,float16,0,6.796133041381836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,float16,0,7.006730397542317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,128,0,1,fp8,fp8,0,4.683504104614258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,fp8,0,6.818357467651367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,float16,0,3.9276746114095054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,128,0,1,fp8,fp8,0,4.726101239522298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,fp8,0,3.852191925048828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,24,128,0,1,fp8,fp8,0,2.64302396774292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,float16,0,7.465311686197917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,fp8,0,7.575642903645833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,float16,0,3.4700905481974282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,fp8,0,3.556943893432617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,1,128,0,1,fp8,fp8,0,2.2653706868489585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,128,0,1,fp8,fp8,0,2.230618635813395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,float16,0,3.438096046447754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,fp8,0,3.290874799092611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,float16,0,3.4510294596354165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,fp8,0,3.358501434326172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,4,128,0,1,fp8,fp8,0,2.2980640729268393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,float16,0,1.7262132962544758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,float16,0,3.4594507217407227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,128,0,1,fp8,fp8,0,2.3302559852600098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,fp8,0,3.439274787902832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,fp8,0,1.6919946670532227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,24,128,0,1,fp8,fp8,0,1.229199965794881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,float16,0,1.6308372815450032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,fp8,0,1.6533013979593914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,1,128,0,1,fp8,fp8,0,1.1804107030232747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,float16,0,1.632970650990804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,128,0,1,fp8,fp8,0,1.1902080376942952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,fp8,0,1.6455787022908528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,128,0,1,fp8,fp8,0,1.1813226540883381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,float16,0,1.6447092692057292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,fp8,0,1.6664640108744304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,float16,0,1.6297972997029622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,128,0,1,fp8,fp8,0,1.1914079984029133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,float16,0,0.9066186745961508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,fp8,0,1.6850825945536296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,fp8,0,0.9305439790089926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,24,128,0,1,fp8,fp8,0,0.648202657699585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,fp8,0,0.888709306716919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,float16,0,0.8999573389689127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,float16,0,0.8930400212605795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,1,128,0,1,fp8,fp8,0,0.6134986480077108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,128,0,1,fp8,fp8,0,0.6118293205897013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,fp8,0,0.9084533055623373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,float16,0,0.8903573354085287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,fp8,0,0.9003787040710449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,fp8,0,0.8888586362202963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,4,128,0,1,fp8,fp8,0,0.6136746803919474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,float16,0,0.895967960357666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,24,8,128,0,1,fp8,fp8,0,0.6194986502329508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,128,0,1,fp8,fp8,0,5.4818776448567705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,128,0,1,fp8,fp8,0,5.613136291503906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,float16,0,8.031557083129883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,fp8,0,7.768954594930013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,float16,0,7.861349105834961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,fp8,0,7.902026494344075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,float16,0,7.9558664957682295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,fp8,0,8.006256103515625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,4,128,0,1,fp8,fp8,0,5.525391896565755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,128,0,1,fp8,fp8,0,2.883280118306478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,float16,0,4.1399946212768555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,fp8,0,4.063098589579265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,128,0,1,fp8,fp8,0,5.532335917154948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,float16,0,3.984421412150065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,float16,0,8.041578928629557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,128,0,1,fp8,fp8,0,2.669402758280436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,fp8,0,8.10379727681478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,fp8,0,3.930570602416992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,128,0,1,fp8,fp8,0,2.696949323018392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,float16,0,3.92958927154541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,fp8,0,3.9729385375976562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,float16,0,3.9005438486735025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,128,0,1,fp8,fp8,0,2.7496585845947266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,fp8,0,4.026080131530762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,float16,0,1.9863413174947102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,fp8,0,2.0027732849121094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,128,0,1,fp8,fp8,0,2.766421318054199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,24,128,0,1,fp8,fp8,0,1.595743974049886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,float16,0,4.0863040288289385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,fp8,0,3.900767962137858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,float16,0,1.922287940979004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,128,0,1,fp8,fp8,0,1.3724266688028972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,fp8,0,1.9371733665466309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,float16,0,1.8754879633585613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,128,0,1,fp8,fp8,0,1.366005261739095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,fp8,0,1.9027946790059407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,float16,0,1.876431941986084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,128,0,1,fp8,fp8,0,1.4037334124247234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,fp8,0,1.8829280535380046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,float16,0,1.0159839789072673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,128,0,1,fp8,fp8,0,1.3990079561869304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,float16,0,1.9620213508605957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,fp8,0,1.022602637608846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,fp8,0,1.9086720148722331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,24,128,0,1,fp8,fp8,0,0.7768533229827881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,float16,0,0.9842399756113688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,128,0,1,fp8,fp8,0,0.7257653077443441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,fp8,0,0.9941333134969076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,float16,0,0.980837345123291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,fp8,0,0.9858346780141195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,2,128,0,1,fp8,fp8,0,0.72762131690979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,float16,0,0.9978613058725992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,fp8,0,0.9973440170288086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,4,128,0,1,fp8,fp8,0,0.7346239884694418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,float16,0,0.9968533515930176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,128,0,1,fp8,fp8,0,0.739786704381307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,fp8,0,0.9991733233133951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,fp8,0,0.5581973393758138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,float16,0,0.5635999838511149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,fp8,0,0.5670453310012817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,24,128,0,1,fp8,fp8,0,0.40476266543070477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,float16,0,0.5546559890111288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,1,128,0,1,fp8,fp8,0,0.38465599219004315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,float16,0,0.558895985285441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,fp8,0,0.5581706762313843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,2,128,0,1,fp8,fp8,0,0.38763733704884845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,float16,0,0.5555733442306519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,fp8,0,0.5584746599197388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,4,128,0,1,fp8,fp8,0,0.3864479859670003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,float16,0,0.5629599889119467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,fp8,0,0.5617653528849283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,24,8,128,0,1,fp8,fp8,0,0.39192001024882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,128,0,1,fp8,fp8,0,5.385898590087891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,float16,0,7.557781219482422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,128,0,1,fp8,fp8,0,5.494714736938477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,fp8,0,7.558122634887695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,float16,0,7.658512115478516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,fp8,0,7.726896286010742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,float16,0,7.598965326944987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,128,0,1,fp8,fp8,0,5.5020802815755205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,float16,0,4.063813209533691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,128,0,1,fp8,fp8,0,2.8638881047566733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,fp8,0,4.094703992207845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,128,0,1,fp8,fp8,0,5.427770614624023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,fp8,0,7.650341033935547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,float16,0,7.721696217854817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,fp8,0,7.7973283131917315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,128,0,1,fp8,fp8,0,2.6497012774149575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,float16,0,3.5996907552083335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,fp8,0,3.724757194519043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,128,0,1,fp8,fp8,0,2.6052586237589517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,float16,0,3.794432004292806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,fp8,0,3.7743094762166343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,float16,0,3.825141270955404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,fp8,0,3.8338187535603843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,4,128,0,1,fp8,fp8,0,2.7128426233927407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,float16,0,1.9372639656066895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,fp8,0,1.8870347340901692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,128,0,1,fp8,fp8,0,2.7341972986857095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,float16,0,3.854384104410807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,24,128,0,1,fp8,fp8,0,1.4787786801656086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,fp8,0,3.8931894302368164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,float16,0,1.8413653373718262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,fp8,0,1.7832266489664714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,1,128,0,1,fp8,fp8,0,1.3508639335632324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,float16,0,1.8263840675354004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,128,0,1,fp8,fp8,0,1.3493866920471191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,fp8,0,1.7712480227152507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,float16,0,1.8061919212341309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,128,0,1,fp8,fp8,0,1.3457493782043457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,fp8,0,1.8007680575052898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,128,0,1,fp8,fp8,0,1.3653920491536458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,float16,0,1.8800266583760579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,float16,0,0.9646346569061279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,fp8,0,1.8655680020650227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,128,0,1,fp8,fp8,0,0.7713440259297689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,fp8,0,0.9737333456675211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,float16,0,0.9241387049357096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,128,0,1,fp8,fp8,0,0.7069439888000488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,128,0,1,fp8,fp8,0,0.7151839733123779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,fp8,0,0.9242453575134277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,fp8,0,0.9429759979248047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,float16,0,0.9303680260976156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,float16,0,0.9270079930623373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,fp8,0,0.9253333409627279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,4,128,0,1,fp8,fp8,0,0.7051680088043213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,float16,0,0.9323999881744385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,fp8,0,0.9468533198038737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,24,8,128,0,1,fp8,fp8,0,0.7182400226593018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,float16,0,0.5217333237330118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,128,0,1,fp8,fp8,0,0.39827199776967365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,float16,0,0.5097973346710205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,fp8,0,0.5261333386103312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,float16,0,0.5043359994888306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,fp8,0,0.5064586798350016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,1,128,0,1,fp8,fp8,0,0.3646986484527588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,fp8,0,0.5109493335088094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,2,128,0,1,fp8,fp8,0,0.36451200644175213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,float16,0,0.5075039863586426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,fp8,0,0.5097546577453613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,4,128,0,1,fp8,fp8,0,0.369978666305542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,float16,0,0.5064213275909424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,fp8,0,0.5129706859588623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,24,8,128,0,1,fp8,fp8,0,0.37212268511454266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,float16,0,0.2727253238360087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,fp8,0,0.2765760024388631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,24,128,0,1,fp8,fp8,0,0.22382932901382446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,float16,0,0.2624266743659973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,fp8,0,0.2642186681429545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,1,128,0,1,fp8,fp8,0,0.2093120018641154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,float16,0,0.2608319918314616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,fp8,0,0.26289600133895874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,2,128,0,1,fp8,fp8,0,0.21067732572555542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,float16,0,0.2646239995956421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,fp8,0,0.2637333273887634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,4,128,0,1,fp8,fp8,0,0.21196266015370688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,float16,0,0.26368000109990436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,fp8,0,0.26527466376622516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,24,8,128,0,1,fp8,fp8,0,0.21051732699076334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,128,0,1,fp8,fp8,0,3.3261760075887046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,float16,0,4.5439252853393555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,fp8,0,4.55896536509196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,128,0,1,fp8,fp8,0,3.287045478820801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,float16,0,4.507376035054524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,fp8,0,4.469162623087565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,float16,0,4.57151985168457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,fp8,0,4.668581326802571
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,4,128,0,1,fp8,fp8,0,3.382490793863932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,float16,0,2.4021493593851724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,128,0,1,fp8,fp8,0,3.366005261739095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,128,0,1,fp8,fp8,0,1.801813284556071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,float16,0,4.67085329691569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,fp8,0,2.3006985982259116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,fp8,0,4.708101272583008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,float16,0,2.11900266011556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,128,0,1,fp8,fp8,0,1.6450293858846028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,fp8,0,2.2141332626342773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,float16,0,2.224789301554362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,128,0,1,fp8,fp8,0,1.6357706387837727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,fp8,0,2.2190346717834473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,float16,0,2.1544906298319497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,fp8,0,2.221264044443766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,4,128,0,1,fp8,fp8,0,1.6522666613260906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,float16,0,1.178597370783488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,float16,0,2.2805867195129395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,128,0,1,fp8,fp8,0,1.683029333750407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,fp8,0,1.1806293328603108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,fp8,0,2.296016057332357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,24,128,0,1,fp8,fp8,0,0.9370826880137125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,float16,0,1.085920015970866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,128,0,1,fp8,fp8,0,0.8569653034210205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,fp8,0,1.088368018468221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,float16,0,1.0966133276621501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,fp8,0,1.094271977742513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,2,128,0,1,fp8,fp8,0,0.876858631769816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,float16,0,1.0948692957560222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,128,0,1,fp8,fp8,0,0.8489973545074463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,fp8,0,1.1018933455149333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,128,0,1,fp8,fp8,0,0.8662613232930502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,float16,0,1.0983359813690186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,float16,0,0.6057546536127726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,fp8,0,1.1151680151621501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,fp8,0,0.5801706711451212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,fp8,0,0.6186346610387167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,24,128,0,1,fp8,fp8,0,0.49407466252644855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,float16,0,0.5753066539764404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,1,128,0,1,fp8,fp8,0,0.4516426722208659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,float16,0,0.5774773359298706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,float16,0,0.5746933221817017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,fp8,0,0.5797813336054484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,2,128,0,1,fp8,fp8,0,0.4583626588185628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,fp8,0,0.5825333197911581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,4,128,0,1,fp8,fp8,0,0.4564533233642578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,float16,0,0.5832213163375854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,fp8,0,0.5891733169555664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,24,8,128,0,1,fp8,fp8,0,0.463701327641805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,float16,0,0.33398934205373126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,fp8,0,0.34226131439208984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,24,128,0,1,fp8,fp8,0,0.26332799593607586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,float16,0,0.32170132795969647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,fp8,0,0.3245226740837097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,1,128,0,1,fp8,fp8,0,0.23634666204452515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,float16,0,0.3222399950027466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,fp8,0,0.32706133524576825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,2,128,0,1,fp8,fp8,0,0.23762667179107666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,float16,0,0.32365866502126056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,fp8,0,0.3288106719652812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,4,128,0,1,fp8,fp8,0,0.23894399404525757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,float16,0,0.32920533418655396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,fp8,0,0.3285813331604004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,24,8,128,0,1,fp8,fp8,0,0.2421813408533732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,float16,0,0.17997866868972778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,fp8,0,0.1810879906018575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,24,128,0,1,fp8,fp8,0,0.15101866920789084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,float16,0,0.17162134250005087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,fp8,0,0.1713599960009257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,1,128,0,1,fp8,fp8,0,0.139765332142512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,float16,0,0.17098132769266763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,fp8,0,0.17219199736913046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,2,128,0,1,fp8,fp8,0,0.13917866349220276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,float16,0,0.17068266868591309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,fp8,0,0.17230399449666342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,4,128,0,1,fp8,fp8,0,0.14245866735776266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,float16,0,0.17272533973058066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,fp8,0,0.1747573415438334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,24,8,128,0,1,fp8,fp8,0,0.1434933344523112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,128,0,1,fp8,fp8,0,3.4973440170288086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,float16,0,4.607354799906413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,fp8,0,4.47704537709554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,128,0,1,fp8,fp8,0,3.521984100341797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,float16,0,4.646410624186198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,fp8,0,4.608997344970703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,float16,0,4.750938733418782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,fp8,0,4.7129866282145185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,4,128,0,1,fp8,fp8,0,3.568549474080404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,float16,0,2.441626707712809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,fp8,0,2.475503921508789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,24,128,0,1,fp8,fp8,0,1.9742186864217122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,float16,0,2.2781119346618652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,128,0,1,fp8,fp8,0,3.596149444580078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,float16,0,4.733509381612142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,fp8,0,4.757765452067058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,128,0,1,fp8,fp8,0,1.7811840375264485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,fp8,0,2.214314619700114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,float16,0,2.237823963165283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,128,0,1,fp8,fp8,0,1.7782986958821614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,fp8,0,2.233861287434896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,float16,0,2.2953227361043296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,128,0,1,fp8,fp8,0,1.791162649790446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,fp8,0,2.234858671824137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,float16,0,2.241429328918457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,float16,0,1.205674648284912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,128,0,1,fp8,fp8,0,1.8149174054463704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,128,0,1,fp8,fp8,0,1.001914660135905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,fp8,0,2.309391975402832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,fp8,0,1.2196640173594158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,float16,0,1.1032479604085286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,fp8,0,1.102405309677124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,1,128,0,1,fp8,fp8,0,0.8881066640218099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,float16,0,1.0960373083750408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,fp8,0,1.1024426619211833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,2,128,0,1,fp8,fp8,0,0.9099466800689697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,float16,0,1.1012372970581055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,128,0,1,fp8,fp8,0,0.9121706485748291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,fp8,0,1.1108960310618083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,float16,0,1.1148906548817952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,float16,0,0.6193333466847738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,fp8,0,1.1361440022786458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,24,8,128,0,1,fp8,fp8,0,0.922223965326945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,fp8,0,0.6267146666844686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,24,128,0,1,fp8,fp8,0,0.5269813140233358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,float16,0,0.5679839849472046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,float16,0,0.5787733395894369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,128,0,1,fp8,fp8,0,0.4668000141779582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,fp8,0,0.5765173435211182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,fp8,0,0.5775893529256185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,2,128,0,1,fp8,fp8,0,0.4676266511281331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,float16,0,0.5776853164037069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,fp8,0,0.5794666608174642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,4,128,0,1,fp8,fp8,0,0.4729173183441162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,float16,0,0.5811893145243326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,fp8,0,0.5891679922739664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,24,8,128,0,1,fp8,fp8,0,0.4816533327102661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,float16,0,0.33079999685287476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,fp8,0,0.3373440106709798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,24,128,0,1,fp8,fp8,0,0.27777065833409625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,float16,0,0.308624009291331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,float16,0,0.3094559907913208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,fp8,0,0.31429866949717206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,1,128,0,1,fp8,fp8,0,0.2416426738103231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,fp8,0,0.31436800956726074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,2,128,0,1,fp8,fp8,0,0.24265599250793457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,float16,0,0.3146880070368449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,fp8,0,0.316048006216685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,4,128,0,1,fp8,fp8,0,0.24433066447575888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,float16,0,0.31695467233657837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,fp8,0,0.31995199124018353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,24,8,128,0,1,fp8,fp8,0,0.24915200471878052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,128,0,1,fp8,fp8,0,0.13748266299565634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,float16,0,0.17493333419164023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,fp8,0,0.18013334274291992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,24,128,0,1,fp8,fp8,0,0.15260266264279684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,float16,0,0.164192001024882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,fp8,0,0.16296533743540445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,float16,0,0.16379732886950174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,fp8,0,0.16455466548601785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,2,128,0,1,fp8,fp8,0,0.13826666275660196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,float16,0,0.16391467054684958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,fp8,0,0.16658133268356323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,4,128,0,1,fp8,fp8,0,0.13863466183344522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,float16,0,0.16596800088882446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,fp8,0,0.167903999487559
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,24,8,128,0,1,fp8,fp8,0,0.14206399520238241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,float16,0,0.1034346620241801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,fp8,0,0.10545600454012553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,24,128,0,1,fp8,fp8,0,0.09276266892751057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,float16,0,0.09900266925493877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,fp8,0,0.09965333342552185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,fp8,0,0.10033599535624187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,1,128,0,1,fp8,fp8,0,0.08410666386286418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,float16,0,0.09960533181826274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,fp8,0,0.09994666775067647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,2,128,0,1,fp8,fp8,0,0.08399466673533122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,float16,0,0.09915733337402344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,4,128,0,1,fp8,fp8,0,0.08387200037638347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,float16,0,0.10006933410962422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,fp8,0,0.10174933075904846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,24,8,128,0,1,fp8,fp8,0,0.0860053300857544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,float16,0,2.8403358459472656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,128,0,1,fp8,fp8,0,2.318607966105143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,fp8,0,2.855274518330892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,float16,0,2.865546544392904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,128,0,1,fp8,fp8,0,2.3686399459838867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,fp8,0,2.7979307174682617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,float16,0,2.833871841430664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,fp8,0,2.810672124226888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,4,128,0,1,fp8,fp8,0,2.3750880559285483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,float16,0,1.5419732729593914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,fp8,0,1.5575520197550456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,24,128,0,1,fp8,fp8,0,1.3483999570210774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,float16,0,1.3901813824971516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,float16,0,2.988186518351237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,fp8,0,2.958266576131185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,24,8,128,0,1,fp8,fp8,0,2.459658622741699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,fp8,0,1.3934240341186523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,1,128,0,1,fp8,fp8,0,1.2042453289031982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,float16,0,1.3894933064778645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,fp8,0,1.3976906140645344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,2,128,0,1,fp8,fp8,0,1.1820800304412842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,float16,0,1.4063679377237956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,fp8,0,1.408448060353597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,4,128,0,1,fp8,fp8,0,1.1992479960123699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,float16,0,1.4427893956502278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,float16,0,0.7811040083567301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,128,0,1,fp8,fp8,0,1.2338666915893555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,fp8,0,1.437178611755371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,128,0,1,fp8,fp8,0,0.6892480055491129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,fp8,0,0.7917813460032145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,float16,0,0.7048160235087076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,fp8,0,0.7170026302337646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,1,128,0,1,fp8,fp8,0,0.6030506690343221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,float16,0,0.7071253458658854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,fp8,0,0.7175412972768148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,2,128,0,1,fp8,fp8,0,0.6081333160400391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,float16,0,0.7109546661376953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,fp8,0,0.7205813725789388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,4,128,0,1,fp8,fp8,0,0.6224266688028971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,float16,0,0.720858653386434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,fp8,0,0.7313653628031412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,24,8,128,0,1,fp8,fp8,0,0.6296159823735555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,float16,0,0.4086986780166626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,fp8,0,0.41328001022338867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,24,128,0,1,fp8,fp8,0,0.36188801129659015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,float16,0,0.37486398220062256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,fp8,0,0.37701865037282306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,1,128,0,1,fp8,fp8,0,0.3203253348668416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,float16,0,0.3730400005976359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,fp8,0,0.38060800234476727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,2,128,0,1,fp8,fp8,0,0.3219626744588216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,float16,0,0.37588798999786377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,fp8,0,0.38235731919606525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,4,128,0,1,fp8,fp8,0,0.32528533538182575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,float16,0,0.379472017288208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,fp8,0,0.38628800710042316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,24,8,128,0,1,fp8,fp8,0,0.33581332365671795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,float16,0,0.22477867205937704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,fp8,0,0.227290670077006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,24,128,0,1,fp8,fp8,0,0.1906933387120565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,float16,0,0.20660799741744995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,fp8,0,0.20642133553822836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,1,128,0,1,fp8,fp8,0,0.16499732931454977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,fp8,0,0.21202133099238077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,float16,0,0.20779200394948324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,fp8,0,0.2085813283920288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,2,128,0,1,fp8,fp8,0,0.16698666413625082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,float16,0,0.2081813414891561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,4,128,0,1,fp8,fp8,0,0.16962667306264242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,float16,0,0.21130132675170898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,fp8,0,0.21491734186808267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,24,8,128,0,1,fp8,fp8,0,0.17243200540542603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,float16,0,0.12110933661460876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,fp8,0,0.12272533774375916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,24,128,0,1,fp8,fp8,0,0.10971732934315999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,float16,0,0.11079999804496765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,fp8,0,0.11193066835403442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,1,128,0,1,fp8,fp8,0,0.09654933214187622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,float16,0,0.1123306651910146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,fp8,0,0.11356799801190694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,2,128,0,1,fp8,fp8,0,0.09614400068918864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,float16,0,0.11267200112342834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,fp8,0,0.1136799951394399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,4,128,0,1,fp8,fp8,0,0.09966933727264404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,float16,0,0.11459733049074809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,fp8,0,0.11507733662923177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,24,8,128,0,1,fp8,fp8,0,0.10113599896430969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,float16,0,0.07445333401362102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,fp8,0,0.07540266712506612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,24,128,0,1,fp8,fp8,0,0.06853866577148438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,float16,0,0.07138133545716603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,float16,0,0.0714026689529419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,fp8,0,0.07154133419195811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,1,128,0,1,fp8,fp8,0,0.06154666841030121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,float16,0,0.07171733180681865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,fp8,0,0.0716480016708374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,2,128,0,1,fp8,fp8,0,0.06227200229962667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,fp8,0,0.0724533349275589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,4,128,0,1,fp8,fp8,0,0.06227200229962667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,float16,0,0.07108800113201141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,fp8,0,0.07218133409818013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,24,8,128,0,1,fp8,fp8,0,0.06250133117039998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,float16,0,2.8389012018839517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,fp8,0,2.9156106313069663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,1,128,0,1,fp8,fp8,0,2.6218132972717285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,float16,0,2.9401172002156577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,128,0,1,fp8,fp8,0,2.6338292757670083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,fp8,0,2.866485277811686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,float16,0,3.085594813028971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,fp8,0,3.172698656717936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,4,128,0,1,fp8,fp8,0,2.888234774271647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,float16,0,1.731269359588623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,128,0,1,fp8,fp8,0,1.504458745320638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,fp8,0,1.6981706619262695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,float16,0,1.4580480257670085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,float16,0,3.1441386540730796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,128,0,1,fp8,fp8,0,2.9225387573242188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,fp8,0,3.137589454650879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,fp8,0,1.4450507164001465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,1,128,0,1,fp8,fp8,0,1.3162986437479656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,float16,0,1.4642186164855957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,fp8,0,1.456650733947754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,2,128,0,1,fp8,fp8,0,1.3533973693847656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,float16,0,1.5519733428955078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,fp8,0,1.5376747449239094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,4,128,0,1,fp8,fp8,0,1.453221321105957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,float16,0,1.4978453318277996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,float16,0,0.8734613259633383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,fp8,0,1.4867466290791829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,24,8,128,0,1,fp8,fp8,0,1.4674347241719563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,fp8,0,0.8491520086924235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,24,128,0,1,fp8,fp8,0,0.7595307032267252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,float16,0,0.7331413427988688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,fp8,0,0.7383413314819336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,1,128,0,1,fp8,fp8,0,0.6712480386098226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,fp8,0,0.7404320240020752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,float16,0,0.7450026671091715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,2,128,0,1,fp8,fp8,0,0.672048012415568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,float16,0,0.7701120376586914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,fp8,0,0.760213295618693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,4,128,0,1,fp8,fp8,0,0.7388853232065836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,float16,0,0.76800537109375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,fp8,0,0.7570239702860514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,24,8,128,0,1,fp8,fp8,0,0.7435733477274576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,float16,0,0.4434880018234253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,fp8,0,0.4363946517308553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,24,128,0,1,fp8,fp8,0,0.38520534833272296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,float16,0,0.38012266159057617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,fp8,0,0.3842293421427409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,128,0,1,fp8,fp8,0,0.3470720052719116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,1,128,0,1,fp8,fp8,0,0.34273600578308105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,float16,0,0.38277868429819745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,fp8,0,0.3845706780751546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,float16,0,0.39288000265757245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,fp8,0,0.3949439922968547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,4,128,0,1,fp8,fp8,0,0.38647464911142987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,float16,0,0.3954133192698161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,fp8,0,0.39339200655619305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,24,8,128,0,1,fp8,fp8,0,0.37664000193277997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,float16,0,0.23695466915766397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,fp8,0,0.2291733423868815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,24,128,0,1,fp8,fp8,0,0.19156799713770548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,float16,0,0.2013173302014669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,fp8,0,0.20492267608642578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,1,128,0,1,fp8,fp8,0,0.16150400042533875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,128,0,1,fp8,fp8,0,0.1783413290977478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,float16,0,0.20465066035588583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,fp8,0,0.20293333133061728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,2,128,0,1,fp8,fp8,0,0.163482666015625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,float16,0,0.2091360092163086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,fp8,0,0.20870399475097656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,float16,0,0.2125599980354309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,fp8,0,0.2107893427213033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,24,8,128,0,1,fp8,fp8,0,0.1813066601753235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,float16,0,0.12756799658139548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,fp8,0,0.12429866194725037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,fp8,0,0.10837866862614949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,24,128,0,1,fp8,fp8,0,0.10255466898282369
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,float16,0,0.11077866951624553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,fp8,0,0.11125333110491435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,4,128,0,1,fp8,fp8,0,0.09714667002360027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,float16,0,0.10673066973686218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,fp8,0,0.1072106659412384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,1,128,0,1,fp8,fp8,0,0.08996267120043437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,float16,0,0.10806933045387268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,2,128,0,1,fp8,fp8,0,0.0902346670627594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,128,0,1,fp8,fp8,0,0.05949333310127258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,float16,0,0.11275200049082439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,fp8,0,0.11286933223406474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,24,8,128,0,1,fp8,fp8,0,0.09709866841634114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,float16,0,0.06735466420650482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,fp8,0,0.06596266726652782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,float16,0,0.0587360014518102
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,fp8,0,0.05862933397293091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,1,128,0,1,fp8,fp8,0,0.050767997900644936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,float16,0,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,fp8,0,0.05959466596444448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,2,128,0,1,fp8,fp8,0,0.05000533163547516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,float16,0,0.06113066772619883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,fp8,0,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,fp8,0,0.037045332292715706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,4,128,0,1,fp8,fp8,0,0.05403733253479004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,float16,0,0.06193066636721293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,fp8,0,0.06266666452089946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,24,8,128,0,1,fp8,fp8,0,0.054287999868392944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,float16,0,0.037791999677817024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,24,128,0,1,fp8,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,float16,0,0.035002666215101876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,fp8,0,0.03524799893299738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,1,128,0,1,fp8,fp8,0,0.032730666299661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,float16,0,0.035530666510264076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,2,128,0,1,fp8,fp8,0,0.03201066702604294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,float16,0,0.03596800069014231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,fp8,0,0.03682666768630346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,4,128,0,1,fp8,fp8,0,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,float16,0,0.03646933287382126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,fp8,0,0.037061333656311035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,24,8,128,0,1,fp8,fp8,0,0.03452266752719879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,1,128,0,1,float16,float16,0,2.2155839602152505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,1,128,0,1,float16,fp8,0,2.2104479471842446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,1,128,0,1,fp8,fp8,0,2.149082660675049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,2,128,0,1,float16,float16,0,2.3542025883992515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,2,128,0,1,float16,fp8,0,2.3607519467671714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,2,128,0,1,fp8,fp8,0,2.1641546885172525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,4,128,0,1,float16,float16,0,2.485050678253174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,4,128,0,1,fp8,fp8,0,2.383253256479899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,4,128,0,1,float16,fp8,0,2.4805760383605957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,8,128,0,1,float16,float16,0,2.49507204691569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,8,128,0,1,float16,fp8,0,2.4487147331237793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,24,128,0,1,float16,float16,0,1.3997066815694172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,24,128,0,1,float16,fp8,0,1.3683306376139324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,24,8,128,0,1,fp8,fp8,0,2.410325368245443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,24,128,0,1,fp8,fp8,0,1.2493173281351726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,1,128,0,1,float16,float16,0,1.1189013322194417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,1,128,0,1,fp8,fp8,0,1.058784008026123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,1,128,0,1,float16,fp8,0,1.1277173360188801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,2,128,0,1,float16,float16,0,1.158783992131551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,2,128,0,1,float16,fp8,0,1.1441013018290203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,2,128,0,1,fp8,fp8,0,1.083679993947347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,4,128,0,1,float16,float16,0,1.2297546863555908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,4,128,0,1,float16,fp8,0,1.2283519903818767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,4,128,0,1,fp8,fp8,0,1.1947680314381917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,8,128,0,1,float16,float16,0,1.1954399744669597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,24,128,0,1,float16,float16,0,0.7092853387196859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,8,128,0,1,fp8,fp8,0,1.202624003092448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,24,8,128,0,1,float16,fp8,0,1.2219466368357341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,24,128,0,1,fp8,fp8,0,0.6290239890416464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,24,128,0,1,float16,fp8,0,0.6854346593221029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,1,128,0,1,float16,float16,0,0.5711146593093872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,1,128,0,1,float16,fp8,0,0.5673493146896362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,1,128,0,1,fp8,fp8,0,0.5380693276723226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,2,128,0,1,float16,float16,0,0.5865493218104044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,2,128,0,1,float16,fp8,0,0.5753866831461588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,2,128,0,1,fp8,fp8,0,0.5634133418401083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,4,128,0,1,float16,float16,0,0.6083306471506754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,4,128,0,1,float16,fp8,0,0.6074186563491821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,4,128,0,1,fp8,fp8,0,0.610586682955424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,8,128,0,1,float16,float16,0,0.5993493398030599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,8,128,0,1,float16,fp8,0,0.6052159865697225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,24,128,0,1,float16,float16,0,0.3638026714324951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,24,8,128,0,1,fp8,fp8,0,0.6163040002187093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,24,128,0,1,float16,fp8,0,0.3502773443857829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,24,128,0,1,fp8,fp8,0,0.32076799869537354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,1,128,0,1,float16,float16,0,0.2975733280181885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,2,128,0,1,fp8,fp8,0,0.2778506676355998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,1,128,0,1,float16,fp8,0,0.29948800802230835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,1,128,0,1,fp8,fp8,0,0.2740746736526489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,4,128,0,1,fp8,fp8,0,0.31274133920669556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,2,128,0,1,float16,float16,0,0.30222400029500324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,2,128,0,1,float16,fp8,0,0.29872000217437744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,4,128,0,1,float16,float16,0,0.31215999523798627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,4,128,0,1,float16,fp8,0,0.3073173364003499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,8,128,0,1,float16,float16,0,0.31361067295074463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,8,128,0,1,float16,fp8,0,0.3084266583124797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,24,8,128,0,1,fp8,fp8,0,0.3110293348630269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,24,128,0,1,float16,float16,0,0.19430400927861533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,24,128,0,1,float16,fp8,0,0.1874986688296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,24,128,0,1,fp8,fp8,0,0.1600053310394287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,1,128,0,1,float16,float16,0,0.15706666310628256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,1,128,0,1,float16,fp8,0,0.1579093337059021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,1,128,0,1,fp8,fp8,0,0.1349440018335978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,2,128,0,1,float16,float16,0,0.16099199652671814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,2,128,0,1,float16,fp8,0,0.1591039995352427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,2,128,0,1,fp8,fp8,0,0.13731200496355692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,4,128,0,1,float16,float16,0,0.1659999986489614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,4,128,0,1,float16,fp8,0,0.1638826628526052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,4,128,0,1,fp8,fp8,0,0.14910933375358582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,8,128,0,1,float16,float16,0,0.16582399606704712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,8,128,0,1,float16,fp8,0,0.16451199849446616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,24,8,128,0,1,fp8,fp8,0,0.15263467033704123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,24,128,0,1,float16,float16,0,0.10612266262372334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,24,128,0,1,float16,fp8,0,0.10408000151316325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,24,128,0,1,fp8,fp8,0,0.08808533350626628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,1,128,0,1,float16,float16,0,0.08555733164151509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,1,128,0,1,float16,fp8,0,0.086517333984375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,1,128,0,1,fp8,fp8,0,0.07474666833877563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,2,128,0,1,float16,float16,0,0.08778666456540425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,2,128,0,1,float16,fp8,0,0.087909330924352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,2,128,0,1,fp8,fp8,0,0.07608533402283986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,4,128,0,1,float16,float16,0,0.09032000104586284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,4,128,0,1,float16,fp8,0,0.09125333031018575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,4,128,0,1,fp8,fp8,0,0.08160533507664998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,8,128,0,1,float16,float16,0,0.09173867106437683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,1,128,0,1,float16,float16,0,0.04906666775544485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,8,128,0,1,float16,fp8,0,0.0906933347384135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,24,8,128,0,1,fp8,fp8,0,0.08319466809431712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,24,128,0,1,float16,float16,0,0.058320000767707825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,24,128,0,1,float16,fp8,0,0.05653866628805796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,24,128,0,1,fp8,fp8,0,0.052746668457984924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,1,128,0,1,float16,fp8,0,0.04863999783992767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,1,128,0,1,fp8,fp8,0,0.04268800218900045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,2,128,0,1,float16,float16,0,0.04931733508904775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,2,128,0,1,float16,fp8,0,0.04896533489227295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,2,128,0,1,fp8,fp8,0,0.0435146689414978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,4,128,0,1,float16,float16,0,0.05146133402983347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,4,128,0,1,float16,fp8,0,0.05051200091838837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,4,128,0,1,fp8,fp8,0,0.04714666803677877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,8,128,0,1,float16,float16,0,0.05162133276462555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,8,128,0,1,float16,fp8,0,0.05123200019200643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,24,8,128,0,1,fp8,fp8,0,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,24,128,0,1,float16,float16,0,0.034154665966828666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,24,128,0,1,float16,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,24,128,0,1,fp8,fp8,0,0.03226666649182638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,1,128,0,1,float16,float16,0,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,1,128,0,1,float16,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,1,128,0,1,fp8,fp8,0,0.028912000358104706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,2,128,0,1,float16,float16,0,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,2,128,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,2,128,0,1,fp8,fp8,0,0.03017599880695343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,4,128,0,1,float16,float16,0,0.03268799930810928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,4,128,0,1,float16,fp8,0,0.0329120010137558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,4,128,0,1,fp8,fp8,0,0.030826665461063385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,8,128,0,1,float16,float16,0,0.032373333970705666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,8,128,0,1,float16,fp8,0,0.03275733441114426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,24,8,128,0,1,fp8,fp8,0,0.030623999734719593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,24,128,0,1,float16,float16,0,0.02643733223279317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,2,128,0,1,float16,float16,0,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,24,128,0,1,float16,fp8,0,0.026378666361172993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,2,128,0,1,float16,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,2,128,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,24,128,0,1,fp8,fp8,0,0.02426133304834366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,1,128,0,1,float16,float16,0,0.02481599897146225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,1,128,0,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,1,128,0,1,fp8,fp8,0,0.022783999641736347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,4,128,0,1,float16,float16,0,0.025818665822347004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,4,128,0,1,float16,fp8,0,0.025839999318122864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,4,128,0,1,fp8,fp8,0,0.023978665471076965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,8,128,0,1,float16,float16,0,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,8,128,0,1,float16,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,24,8,128,0,1,fp8,fp8,0,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,1,128,0,1,float16,float16,0,0.9561013380686442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,1,128,0,1,float16,fp8,0,0.942191998163859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,1,128,0,1,fp8,fp8,0,0.8510560194651285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,2,128,0,1,float16,float16,0,1.0175360043843586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,2,128,0,1,float16,fp8,0,0.9832800229390463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,2,128,0,1,fp8,fp8,0,0.8745866616566976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,4,128,0,1,float16,float16,0,1.077936013539632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,4,128,0,1,float16,fp8,0,1.0787733395894368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,4,128,0,1,fp8,fp8,0,0.9873493512471517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,24,128,0,1,float16,float16,0,0.6287999947865804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,8,128,0,1,float16,float16,0,1.0599946975708008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,8,128,0,1,fp8,fp8,0,0.9968106746673584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,24,8,128,0,1,float16,fp8,0,1.0834506352742512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,24,128,0,1,float16,fp8,0,0.6036266485850016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,24,128,0,1,fp8,fp8,0,0.5247626701990763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,1,128,0,1,float16,float16,0,0.48441600799560547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,1,128,0,1,float16,fp8,0,0.48311467965443927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,1,128,0,1,fp8,fp8,0,0.42659199237823486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,2,128,0,1,float16,float16,0,0.4984960158665975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,2,128,0,1,float16,fp8,0,0.4946986834208171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,2,128,0,1,fp8,fp8,0,0.43917866547902423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,4,128,0,1,float16,float16,0,0.538266658782959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,4,128,0,1,float16,fp8,0,0.5247626701990763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,4,128,0,1,fp8,fp8,0,0.5022079944610596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,8,128,0,1,float16,float16,0,0.5201226472854614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,8,128,0,1,float16,fp8,0,0.5135680039723715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,24,128,0,1,float16,float16,0,0.3205653429031372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,1,128,0,1,fp8,fp8,0,0.2227733333905538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,24,8,128,0,1,fp8,fp8,0,0.5057493448257446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,24,128,0,1,float16,fp8,0,0.31164799133936566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,24,128,0,1,fp8,fp8,0,0.27240000168482464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,1,128,0,1,float16,float16,0,0.25332266092300415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,4,128,0,1,float16,float16,0,0.270687997341156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,4,128,0,1,float16,fp8,0,0.2709546685218811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,1,128,0,1,float16,fp8,0,0.2512800097465515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,2,128,0,1,float16,float16,0,0.26125866174697876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,2,128,0,1,float16,fp8,0,0.2575146754582723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,2,128,0,1,fp8,fp8,0,0.22782933712005615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,4,128,0,1,fp8,fp8,0,0.26017600297927856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,8,128,0,1,float16,float16,0,0.26843732595443726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,8,128,0,1,float16,fp8,0,0.2709813316663106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,24,8,128,0,1,fp8,fp8,0,0.2633066574732463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,24,128,0,1,float16,float16,0,0.16972267627716064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,24,128,0,1,float16,fp8,0,0.16698133945465088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,24,128,0,1,fp8,fp8,0,0.1459999978542328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,1,128,0,1,float16,float16,0,0.13564800222714743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,1,128,0,1,float16,fp8,0,0.13495999574661255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,1,128,0,1,fp8,fp8,0,0.11939733227094014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,2,128,0,1,float16,float16,0,0.13809600472450256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,2,128,0,1,float16,fp8,0,0.1386293371518453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,2,128,0,1,fp8,fp8,0,0.12180800239245097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,4,128,0,1,float16,float16,0,0.1437013347943624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,4,128,0,1,float16,fp8,0,0.14291200041770935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,4,128,0,1,fp8,fp8,0,0.13507733742396036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,8,128,0,1,float16,float16,0,0.145306666692098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,8,128,0,1,float16,fp8,0,0.14452800154685974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,24,8,128,0,1,fp8,fp8,0,0.13854400316874185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,24,128,0,1,float16,float16,0,0.0993173321088155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,2,128,0,1,float16,float16,0,0.07919999957084656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,24,128,0,1,float16,fp8,0,0.09602133433024089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,24,128,0,1,fp8,fp8,0,0.08107199768225352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,1,128,0,1,float16,float16,0,0.07710933188597362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,1,128,0,1,float16,fp8,0,0.07843733330567677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,1,128,0,1,fp8,fp8,0,0.06748266518115997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,2,128,0,1,float16,fp8,0,0.07925866544246674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,2,128,0,1,fp8,fp8,0,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,4,128,0,1,float16,float16,0,0.08196266492207845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,4,128,0,1,float16,fp8,0,0.08147199948628743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,4,128,0,1,fp8,fp8,0,0.07506666580835979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,8,128,0,1,float16,float16,0,0.08298133313655853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,8,128,0,1,float16,fp8,0,0.08281066517035167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,24,8,128,0,1,fp8,fp8,0,0.07567999760309856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,24,128,0,1,float16,float16,0,0.052757332722345986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,24,128,0,1,float16,fp8,0,0.051594664653142296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,24,128,0,1,fp8,fp8,0,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,1,128,0,1,float16,float16,0,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,1,128,0,1,float16,fp8,0,0.04340266684691111
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,1,128,0,1,fp8,fp8,0,0.038773333032925926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,2,128,0,1,float16,float16,0,0.04490133126576742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,2,128,0,1,float16,fp8,0,0.04438399771849314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,2,128,0,1,fp8,fp8,0,0.03832533210515976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,4,128,0,1,float16,float16,0,0.04614399870236715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,8,128,0,1,fp8,fp8,0,0.044112001856168113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,24,128,0,1,float16,float16,0,0.03202133377393087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,4,128,0,1,float16,fp8,0,0.04555733501911163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,4,128,0,1,fp8,fp8,0,0.042021334171295166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,8,128,0,1,float16,float16,0,0.04682666560014089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,24,8,128,0,1,float16,fp8,0,0.047930667797724404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,24,128,0,1,float16,fp8,0,0.031845333675543465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,24,128,0,1,fp8,fp8,0,0.030799999833106995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,1,128,0,1,float16,float16,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,1,128,0,1,float16,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,1,128,0,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,2,128,0,1,float16,float16,0,0.030346666773160298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,2,128,0,1,float16,fp8,0,0.030986666679382324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,2,128,0,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,4,128,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,4,128,0,1,float16,fp8,0,0.03072533259789149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,4,128,0,1,fp8,fp8,0,0.028757333755493164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,8,128,0,1,float16,fp8,0,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,8,128,0,1,float16,float16,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,24,8,128,0,1,fp8,fp8,0,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,24,128,0,1,float16,float16,0,0.023605334262053173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,24,128,0,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,24,128,0,1,fp8,fp8,0,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,1,128,0,1,float16,float16,0,0.021962667504946392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,1,128,0,1,float16,fp8,0,0.021781332790851593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,1,128,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,2,128,0,1,float16,float16,0,0.02214933435122172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,2,128,0,1,float16,fp8,0,0.022128000855445862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,2,128,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,4,128,0,1,float16,float16,0,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,4,128,0,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,4,128,0,1,fp8,fp8,0,0.021669333179791767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,8,128,0,1,float16,float16,0,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,8,128,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,24,8,128,0,1,fp8,fp8,0,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,24,128,0,1,float16,float16,0,0.01987733319401741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,24,128,0,1,float16,fp8,0,0.019925333559513092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,24,128,0,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,1,128,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,1,128,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,1,128,0,1,fp8,fp8,0,0.017984000345071156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,2,128,0,1,float16,float16,0,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,2,128,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,2,128,0,1,fp8,fp8,0,0.0176959993938605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,4,128,0,1,float16,float16,0,0.019482667247454327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,4,128,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,4,128,0,1,fp8,fp8,0,0.019882666567961376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,8,128,0,1,float16,float16,0,0.019530666371186573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,8,128,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,24,8,128,0,1,fp8,fp8,0,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,1,128,0,1,float16,float16,0,0.43587199846903485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,1,128,0,1,float16,fp8,0,0.43748799959818524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,1,128,0,1,fp8,fp8,0,0.41418135166168213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,2,128,0,1,float16,float16,0,0.44757334391276044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,2,128,0,1,float16,fp8,0,0.4419573148091634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,2,128,0,1,fp8,fp8,0,0.43298133214314777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,4,128,0,1,float16,float16,0,0.49456000328063965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,4,128,0,1,float16,fp8,0,0.47095465660095215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,8,128,0,1,float16,fp8,0,0.4715840021769206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,4,128,0,1,fp8,fp8,0,0.49591998259226483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,8,128,0,1,float16,float16,0,0.48206400871276855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,24,8,128,0,1,fp8,fp8,0,0.504810651143392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,24,128,0,1,float16,float16,0,0.3044426639874776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,24,128,0,1,float16,fp8,0,0.2958453297615051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,24,128,0,1,fp8,fp8,0,0.2730986674626668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,1,128,0,1,float16,float16,0,0.2286133368810018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,1,128,0,1,float16,fp8,0,0.22964799404144287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,1,128,0,1,fp8,fp8,0,0.21689067284266153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,2,128,0,1,float16,float16,0,0.23594133059183756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,2,128,0,1,float16,fp8,0,0.23236799240112305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,2,128,0,1,fp8,fp8,0,0.22405866781870523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,4,128,0,1,float16,float16,0,0.2502826650937398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,4,128,0,1,float16,fp8,0,0.24579733610153198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,4,128,0,1,fp8,fp8,0,0.25804267326990765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,8,128,0,1,float16,float16,0,0.24995199839274088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,8,128,0,1,float16,fp8,0,0.24598934253056845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,24,8,128,0,1,fp8,fp8,0,0.2606559991836548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,24,128,0,1,float16,float16,0,0.16714133818944296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,24,128,0,1,float16,fp8,0,0.16245333353678384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,24,128,0,1,fp8,fp8,0,0.14505599935849509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,1,128,0,1,float16,float16,0,0.12507200241088867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,1,128,0,1,float16,fp8,0,0.1250986655553182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,4,128,0,1,float16,float16,0,0.1367946664492289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,1,128,0,1,fp8,fp8,0,0.11626666784286499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,2,128,0,1,float16,float16,0,0.1283253331979116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,2,128,0,1,float16,fp8,0,0.12819733222325644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,2,128,0,1,fp8,fp8,0,0.12007466952006023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,4,128,0,1,float16,fp8,0,0.13343999783198038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,4,128,0,1,fp8,fp8,0,0.13296000162760416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,24,128,0,1,float16,fp8,0,0.08994133273760478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,8,128,0,1,float16,float16,0,0.139055997133255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,8,128,0,1,float16,fp8,0,0.13633599877357483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,24,8,128,0,1,fp8,fp8,0,0.13566399614016214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,24,128,0,1,float16,float16,0,0.09633599718411763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,24,128,0,1,fp8,fp8,0,0.0803413341442744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,1,128,0,1,float16,float16,0,0.07141866783301036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,1,128,0,1,float16,fp8,0,0.07065600156784058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,1,128,0,1,fp8,fp8,0,0.06683200101057689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,2,128,0,1,float16,float16,0,0.07322666545708974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,2,128,0,1,float16,fp8,0,0.07256533205509186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,2,128,0,1,fp8,fp8,0,0.06760000189145406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,4,128,0,1,float16,float16,0,0.07642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,4,128,0,1,float16,fp8,0,0.07657066484292348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,4,128,0,1,fp8,fp8,0,0.07432533303896587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,8,128,0,1,float16,float16,0,0.0774186650911967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,8,128,0,1,float16,fp8,0,0.07791466514269511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,24,8,128,0,1,fp8,fp8,0,0.0751146674156189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,24,128,0,1,float16,float16,0,0.05231999854246775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,24,128,0,1,float16,fp8,0,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,24,128,0,1,fp8,fp8,0,0.04690133531888326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,1,128,0,1,float16,float16,0,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,2,128,0,1,fp8,fp8,0,0.03852800031503042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,1,128,0,1,float16,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,1,128,0,1,fp8,fp8,0,0.03819733361403147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,2,128,0,1,float16,float16,0,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,2,128,0,1,float16,fp8,0,0.04278400043646494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,4,128,0,1,float16,float16,0,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,4,128,0,1,float16,fp8,0,0.04306666553020477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,4,128,0,1,fp8,fp8,0,0.042117332418759666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,8,128,0,1,float16,float16,0,0.04450666904449463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,8,128,0,1,float16,fp8,0,0.044677332043647766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,24,8,128,0,1,fp8,fp8,0,0.04331733286380768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,24,128,0,1,float16,float16,0,0.03105599929889043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,24,128,0,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,24,128,0,1,fp8,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,1,128,0,1,float16,float16,0,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,1,128,0,1,float16,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,1,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,2,128,0,1,float16,float16,0,0.02906133234500885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,2,128,0,1,float16,fp8,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,2,128,0,1,fp8,fp8,0,0.027722666660944622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,8,128,0,1,fp8,fp8,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,4,128,0,1,float16,float16,0,0.030261332790056866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,4,128,0,1,float16,fp8,0,0.03073066721359889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,4,128,0,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,8,128,0,1,float16,float16,0,0.030234667162100475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,24,8,128,0,1,float16,fp8,0,0.03031466652949651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,24,128,0,1,float16,float16,0,0.022015998760859173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,24,128,0,1,float16,fp8,0,0.02216000109910965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,24,128,0,1,fp8,fp8,0,0.022309333086013794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,1,128,0,1,float16,float16,0,0.02048533285657565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,1,128,0,1,float16,fp8,0,0.020725333442290623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,1,128,0,1,fp8,fp8,0,0.020784000555674236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,2,128,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,2,128,0,1,float16,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,2,128,0,1,fp8,fp8,0,0.020746666938066483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,4,128,0,1,float16,float16,0,0.02186666677395503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,4,128,0,1,float16,fp8,0,0.022613334159056347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,4,128,0,1,fp8,fp8,0,0.02186133215824763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,8,128,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,8,128,0,1,float16,fp8,0,0.0223786657055219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,24,8,128,0,1,fp8,fp8,0,0.022533332308133442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,24,128,0,1,float16,float16,0,0.01770666614174843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,24,128,0,1,float16,fp8,0,0.018165333817402523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,24,128,0,1,fp8,fp8,0,0.01830400029818217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,1,128,0,1,float16,float16,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,1,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,4,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,1,128,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,2,128,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,2,128,0,1,float16,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,2,128,0,1,fp8,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,4,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,4,128,0,1,fp8,fp8,0,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,8,128,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,8,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,24,8,128,0,1,fp8,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,24,128,0,1,float16,float16,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,24,128,0,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,24,128,0,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,1,128,0,1,float16,float16,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,1,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,1,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,2,128,0,1,float16,float16,0,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,8,128,0,1,float16,float16,0,0.016650666793187458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,2,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,2,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,4,128,0,1,float16,float16,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,4,128,0,1,float16,fp8,0,0.016634666671355564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,4,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,8,128,0,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,24,8,128,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,1,128,0,1,float16,fp8,0,0.26525332530339557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,1,128,0,1,float16,float16,0,0.26577067375183105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,1,128,0,1,fp8,fp8,0,0.3025866746902466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,2,128,0,1,float16,float16,0,0.2679520050684611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,2,128,0,1,float16,fp8,0,0.26705066363016766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,2,128,0,1,fp8,fp8,0,0.30880000193913776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,4,128,0,1,float16,float16,0,0.27794132630030316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,4,128,0,1,float16,fp8,0,0.28066666920979816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,8,128,0,1,float16,float16,0,0.28445865710576373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,4,128,0,1,fp8,fp8,0,0.3418826659520467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,8,128,0,1,float16,fp8,0,0.28252800305684406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,24,128,0,1,float16,float16,0,0.17821866273880005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,24,8,128,0,1,fp8,fp8,0,0.34619200229644775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,1,128,0,1,float16,fp8,0,0.14081600308418274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,24,128,0,1,float16,fp8,0,0.17331733306248984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,24,128,0,1,fp8,fp8,0,0.18728532393773398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,1,128,0,1,float16,float16,0,0.14054399728775024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,1,128,0,1,fp8,fp8,0,0.1600106656551361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,2,128,0,1,float16,float16,0,0.14216533303260803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,2,128,0,1,float16,fp8,0,0.14136000474294028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,2,128,0,1,fp8,fp8,0,0.16395733753840128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,4,128,0,1,float16,float16,0,0.14732266465822855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,4,128,0,1,float16,fp8,0,0.14799466729164124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,4,128,0,1,fp8,fp8,0,0.1768959959348043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,24,128,0,1,float16,fp8,0,0.09453333417574565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,8,128,0,1,float16,float16,0,0.15204800168673197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,8,128,0,1,float16,fp8,0,0.14946666359901428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,24,8,128,0,1,fp8,fp8,0,0.1791200041770935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,24,128,0,1,float16,float16,0,0.09533333778381348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,24,128,0,1,fp8,fp8,0,0.10192533334096272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,1,128,0,1,float16,float16,0,0.07754133145014445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,1,128,0,1,float16,fp8,0,0.07807999849319458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,1,128,0,1,fp8,fp8,0,0.08844799796740214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,2,128,0,1,float16,float16,0,0.07899733384450276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,2,128,0,1,float16,fp8,0,0.07851199805736542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,2,128,0,1,fp8,fp8,0,0.09059733152389526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,4,128,0,1,float16,float16,0,0.0817386656999588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,4,128,0,1,float16,fp8,0,0.08140266438325246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,4,128,0,1,fp8,fp8,0,0.09744532903035481
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,8,128,0,1,float16,float16,0,0.08290666838486989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,8,128,0,1,float16,fp8,0,0.08422399560610454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,24,8,128,0,1,fp8,fp8,0,0.09731200337409973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,24,128,0,1,float16,float16,0,0.05393599967161814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,24,128,0,1,float16,fp8,0,0.052613332867622375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,2,128,0,1,float16,fp8,0,0.04498666524887085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,24,128,0,1,fp8,fp8,0,0.05861333509286245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,1,128,0,1,float16,float16,0,0.04401599864164988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,1,128,0,1,float16,fp8,0,0.044863998889923096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,1,128,0,1,fp8,fp8,0,0.049327999353408813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,2,128,0,1,float16,float16,0,0.04562133550643921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,2,128,0,1,fp8,fp8,0,0.050026665131251015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,4,128,0,1,float16,float16,0,0.046538665890693665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,4,128,0,1,float16,fp8,0,0.046906664967536926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,4,128,0,1,fp8,fp8,0,0.055359999338785805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,8,128,0,1,float16,float16,0,0.047466665506362915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,8,128,0,1,float16,fp8,0,0.047322665651639305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,24,8,128,0,1,fp8,fp8,0,0.05478399991989136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,24,128,0,1,float16,float16,0,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,24,128,0,1,float16,fp8,0,0.03030933439731598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,24,128,0,1,fp8,fp8,0,0.03395200024048487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,1,128,0,1,float16,float16,0,0.028218666712443035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,1,128,0,1,float16,fp8,0,0.02847466617822647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,1,128,0,1,fp8,fp8,0,0.030943999687830608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,2,128,0,1,float16,float16,0,0.02886933336655299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,2,128,0,1,float16,fp8,0,0.02867199977238973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,2,128,0,1,fp8,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,4,128,0,1,float16,float16,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,4,128,0,1,float16,fp8,0,0.029951999584833782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,4,128,0,1,fp8,fp8,0,0.03265066693226496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,8,128,0,1,float16,float16,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,1,128,0,1,float16,float16,0,0.02279466638962428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,8,128,0,1,float16,fp8,0,0.029893333713213604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,1,128,0,1,fp8,fp8,0,0.024336000283559162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,24,8,128,0,1,fp8,fp8,0,0.033071999748547874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,24,128,0,1,float16,float16,0,0.02385599911212921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,24,128,0,1,float16,fp8,0,0.02425066630045573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,24,128,0,1,fp8,fp8,0,0.026074667771657307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,1,128,0,1,float16,fp8,0,0.022682666778564453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,2,128,0,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,2,128,0,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,2,128,0,1,fp8,fp8,0,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,4,128,0,1,float16,float16,0,0.023610666394233704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,4,128,0,1,float16,fp8,0,0.023813332120577495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,4,128,0,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,8,128,0,1,float16,float16,0,0.02365333338578542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,8,128,0,1,float16,fp8,0,0.023946667710940044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,24,8,128,0,1,fp8,fp8,0,0.025807999074459076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,24,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,24,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,24,128,0,1,fp8,fp8,0,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,1,128,0,1,float16,float16,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,1,128,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,1,128,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,2,128,0,1,float16,float16,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,2,128,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,2,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,4,128,0,1,float16,float16,0,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,4,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,4,128,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,8,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,8,128,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,24,8,128,0,1,fp8,fp8,0,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,24,128,0,1,float16,float16,0,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,24,128,0,1,float16,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,24,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,2,128,0,1,float16,fp8,0,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,1,128,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,1,128,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,1,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,2,128,0,1,float16,float16,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,2,128,0,1,fp8,fp8,0,0.016480000068744022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,4,128,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,4,128,0,1,float16,fp8,0,0.015925332903862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,4,128,0,1,fp8,fp8,0,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,8,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,8,128,0,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,24,8,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,24,128,0,1,float16,float16,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,24,128,0,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,24,128,0,1,fp8,fp8,0,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,1,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,1,128,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,1,128,0,1,fp8,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,2,128,0,1,float16,float16,0,0.014570667097965876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,2,128,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,2,128,0,1,fp8,fp8,0,0.016458666572968166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,4,128,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,4,128,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,4,128,0,1,fp8,fp8,0,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,8,128,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,1,128,0,1,float16,fp8,0,0.18644267320632935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,8,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,24,8,128,0,1,fp8,fp8,0,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,1,128,0,1,float16,float16,0,0.1864159901936849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,1,128,0,1,fp8,fp8,0,0.24503999948501587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,2,128,0,1,float16,float16,0,0.18846400578816733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,2,128,0,1,float16,fp8,0,0.1879040002822876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,4,128,0,1,float16,float16,0,0.19086933135986328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,2,128,0,1,fp8,fp8,0,0.2461706598599752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,4,128,0,1,float16,fp8,0,0.1925706664721171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,4,128,0,1,fp8,fp8,0,0.26023467381795246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,8,128,0,1,float16,float16,0,0.19734932978947958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,8,128,0,1,float16,fp8,0,0.19773866732915243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,24,8,128,0,1,fp8,fp8,0,0.26201067368189496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,24,128,0,1,float16,float16,0,0.12041067083676656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,24,128,0,1,float16,fp8,0,0.1183519959449768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,24,128,0,1,fp8,fp8,0,0.1446399986743927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,1,128,0,1,float16,float16,0,0.10029333829879761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,1,128,0,1,float16,fp8,0,0.10085333387056987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,1,128,0,1,fp8,fp8,0,0.13060800234476724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,2,128,0,1,float16,float16,0,0.10230400164922078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,8,128,0,1,float16,float16,0,0.10677867134412129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,2,128,0,1,float16,fp8,0,0.10157333811124165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,2,128,0,1,fp8,fp8,0,0.13211733102798462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,4,128,0,1,float16,float16,0,0.1042133371035258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,4,128,0,1,float16,fp8,0,0.10508267084757487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,4,128,0,1,fp8,fp8,0,0.13843199610710144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,8,128,0,1,float16,fp8,0,0.10642133156458537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,24,8,128,0,1,fp8,fp8,0,0.1395786702632904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,24,128,0,1,float16,float16,0,0.06540800134340923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,24,128,0,1,float16,fp8,0,0.06380799909432729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,24,128,0,1,fp8,fp8,0,0.07956799864768982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,1,128,0,1,float16,float16,0,0.05509866774082184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,1,128,0,1,float16,fp8,0,0.05621333420276642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,1,128,0,1,fp8,fp8,0,0.06964266796906789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,2,128,0,1,float16,float16,0,0.05583466589450836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,2,128,0,1,float16,fp8,0,0.05638400216897329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,2,128,0,1,fp8,fp8,0,0.07178666690985362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,4,128,0,1,float16,float16,0,0.05709333221117655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,24,128,0,1,float16,float16,0,0.035749333600203194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,4,128,0,1,float16,fp8,0,0.05816000203291575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,4,128,0,1,fp8,fp8,0,0.0748586654663086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,8,128,0,1,float16,float16,0,0.05865600208441416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,8,128,0,1,float16,fp8,0,0.058602665861447654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,24,8,128,0,1,fp8,fp8,0,0.07729066908359528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,24,128,0,1,float16,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,24,128,0,1,fp8,fp8,0,0.044031997521718345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,1,128,0,1,float16,float16,0,0.03342399994532267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,1,128,0,1,float16,fp8,0,0.03384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,1,128,0,1,fp8,fp8,0,0.04229333500067393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,2,128,0,1,float16,float16,0,0.03410666684309641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,2,128,0,1,float16,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,2,128,0,1,fp8,fp8,0,0.04248000184694926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,4,128,0,1,float16,float16,0,0.033802665770053864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,4,128,0,1,float16,fp8,0,0.034330666065216064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,4,128,0,1,fp8,fp8,0,0.04417600234349569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,8,128,0,1,float16,float16,0,0.035061334570248924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,8,128,0,1,float16,fp8,0,0.034789333740870156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,24,8,128,0,1,fp8,fp8,0,0.04456533491611481
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,24,128,0,1,float16,float16,0,0.024911999702453613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,24,128,0,1,float16,fp8,0,0.02476799984773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,24,128,0,1,fp8,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,1,128,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,1,128,0,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,4,128,0,1,float16,fp8,0,0.023872000475724537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,1,128,0,1,fp8,fp8,0,0.028704000016053517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,2,128,0,1,float16,float16,0,0.023658665517965954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,2,128,0,1,float16,fp8,0,0.02385066697994868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,2,128,0,1,fp8,fp8,0,0.028837333122889202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,4,128,0,1,float16,float16,0,0.0242399995525678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,4,128,0,1,fp8,fp8,0,0.029663999875386555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,8,128,0,1,float16,float16,0,0.024666666984558105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,8,128,0,1,float16,fp8,0,0.024453334510326385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,24,8,128,0,1,fp8,fp8,0,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,24,128,0,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,24,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,24,128,0,1,fp8,fp8,0,0.022511998812357586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,1,128,0,1,float16,float16,0,0.017829333742459614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,1,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,1,128,0,1,fp8,fp8,0,0.021514666577180225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,2,128,0,1,float16,float16,0,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,2,128,0,1,float16,fp8,0,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,2,128,0,1,fp8,fp8,0,0.02181866765022278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,8,128,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,4,128,0,1,float16,float16,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,4,128,0,1,float16,fp8,0,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,4,128,0,1,fp8,fp8,0,0.022175999979178112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,8,128,0,1,float16,float16,0,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,24,8,128,0,1,fp8,fp8,0,0.021776000658671062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,24,128,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,24,128,0,1,float16,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,24,128,0,1,fp8,fp8,0,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,2,128,0,1,float16,fp8,0,0.016021333634853363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,1,128,0,1,float16,float16,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,1,128,0,1,float16,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,1,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,2,128,0,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,2,128,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,4,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,4,128,0,1,float16,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,4,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,8,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,8,128,0,1,float16,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,24,8,128,0,1,fp8,fp8,0,0.017786666750907898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,24,128,0,1,float16,float16,0,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,24,128,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,24,128,0,1,fp8,fp8,0,0.016682667036851246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,1,128,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,1,128,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,1,128,0,1,fp8,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,2,128,0,1,float16,float16,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,2,128,0,1,float16,fp8,0,0.014389333625634512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,2,128,0,1,fp8,fp8,0,0.016442666451136272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,4,128,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,4,128,0,1,float16,fp8,0,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,4,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,8,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,8,128,0,1,float16,fp8,0,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,24,8,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,24,128,0,1,float16,float16,0,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,24,128,0,1,float16,fp8,0,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,24,128,0,1,fp8,fp8,0,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,1,128,0,1,float16,float16,0,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,1,128,0,1,float16,fp8,0,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,4,128,0,1,float16,fp8,0,0.01443733274936676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,1,128,0,1,fp8,fp8,0,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,2,128,0,1,float16,float16,0,0.013424000392357508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,2,128,0,1,float16,fp8,0,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,2,128,0,1,fp8,fp8,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,4,128,0,1,float16,float16,0,0.01392000044385592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,4,128,0,1,fp8,fp8,0,0.016202667107184727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,8,128,0,1,float16,float16,0,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,8,128,0,1,float16,fp8,0,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,2,128,0,1,float16,float16,0,0.15432533621788025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,24,8,128,0,1,fp8,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,1,128,0,1,float16,float16,0,0.15275733669598898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,1,128,0,1,float16,fp8,0,0.15404799580574036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,1,128,0,1,fp8,fp8,0,0.21338133017222086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,2,128,0,1,float16,fp8,0,0.154639999071757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,2,128,0,1,fp8,fp8,0,0.21649066607157388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,4,128,0,1,float16,float16,0,0.15824000040690103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,4,128,0,1,float16,fp8,0,0.15651733676592508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,4,128,0,1,fp8,fp8,0,0.22150933742523193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,24,128,0,1,float16,fp8,0,0.09226133426030476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,8,128,0,1,float16,float16,0,0.1596160034338633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,8,128,0,1,float16,fp8,0,0.15970666209856668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,24,128,0,1,float16,float16,0,0.09248532851537068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,2,128,0,1,float16,float16,0,0.08309333523114522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,24,8,128,0,1,fp8,fp8,0,0.22369066874186197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,24,128,0,1,fp8,fp8,0,0.12303466598192851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,1,128,0,1,float16,float16,0,0.0831413318713506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,1,128,0,1,float16,fp8,0,0.08251733581225078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,1,128,0,1,fp8,fp8,0,0.11380799611409505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,2,128,0,1,float16,fp8,0,0.083146666487058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,2,128,0,1,fp8,fp8,0,0.11442133784294128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,4,128,0,1,float16,float16,0,0.08473066488901775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,4,128,0,1,float16,fp8,0,0.0860053300857544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,4,128,0,1,fp8,fp8,0,0.11914133032162984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,24,128,0,1,fp8,fp8,0,0.06646933158238728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,8,128,0,1,float16,float16,0,0.08695466319719951
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,8,128,0,1,float16,fp8,0,0.08633599678675334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,24,8,128,0,1,fp8,fp8,0,0.11934399604797363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,24,128,0,1,float16,float16,0,0.04946133494377136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,24,128,0,1,float16,fp8,0,0.049600000182787575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,1,128,0,1,float16,float16,0,0.04717866579691569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,1,128,0,1,float16,fp8,0,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,1,128,0,1,fp8,fp8,0,0.06320533156394958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,2,128,0,1,float16,float16,0,0.04757333298524221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,2,128,0,1,float16,fp8,0,0.04738133152325948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,2,128,0,1,fp8,fp8,0,0.06461866696675618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,4,128,0,1,float16,float16,0,0.048714667558670044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,4,128,0,1,float16,fp8,0,0.04878933231035868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,4,128,0,1,fp8,fp8,0,0.06509333352247874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,8,128,0,1,float16,float16,0,0.04836266736189524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,8,128,0,1,float16,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,24,8,128,0,1,fp8,fp8,0,0.06540800134340923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,24,128,0,1,float16,float16,0,0.03173333406448364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,24,128,0,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,24,128,0,1,fp8,fp8,0,0.04089066634575526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,1,128,0,1,float16,float16,0,0.030576000610987347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,1,128,0,1,float16,fp8,0,0.031066666046778362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,1,128,0,1,fp8,fp8,0,0.03932799895604452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,2,128,0,1,float16,float16,0,0.030421334008375805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,2,128,0,1,float16,fp8,0,0.030928000807762146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,2,128,0,1,fp8,fp8,0,0.0403466671705246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,4,128,0,1,float16,float16,0,0.031008000175158184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,4,128,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,4,128,0,1,fp8,fp8,0,0.04030400017897288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,8,128,0,1,float16,float16,0,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,8,128,0,1,float16,fp8,0,0.031194667021433514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,1,128,0,1,float16,float16,0,0.02080533280968666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,24,8,128,0,1,fp8,fp8,0,0.040752001106739044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,24,128,0,1,float16,float16,0,0.022218666970729828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,2,128,0,1,float16,float16,0,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,24,128,0,1,float16,fp8,0,0.021776000658671062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,24,128,0,1,fp8,fp8,0,0.02647999922434489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,1,128,0,1,float16,fp8,0,0.02075733368595441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,1,128,0,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,2,128,0,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,2,128,0,1,fp8,fp8,0,0.026346666117509205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,4,128,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,4,128,0,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,4,128,0,1,fp8,fp8,0,0.02595199892918269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,8,128,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,8,128,0,1,float16,fp8,0,0.021701333423455555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,24,8,128,0,1,fp8,fp8,0,0.02659733345111211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,24,128,0,1,float16,float16,0,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,24,128,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,24,128,0,1,fp8,fp8,0,0.022133332987626392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,1,128,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,1,128,0,1,float16,fp8,0,0.01810666670401891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,1,128,0,1,fp8,fp8,0,0.020618667205174763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,2,128,0,1,float16,float16,0,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,2,128,0,1,float16,fp8,0,0.018496000518401463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,2,128,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,4,128,0,1,float16,float16,0,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,4,128,0,1,float16,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,4,128,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,8,128,0,1,float16,float16,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,8,128,0,1,float16,fp8,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,24,8,128,0,1,fp8,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,24,128,0,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,24,128,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,24,128,0,1,fp8,fp8,0,0.017738666385412216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,1,128,0,1,float16,float16,0,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,1,128,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,1,128,0,1,fp8,fp8,0,0.016399999459584553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,2,128,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,2,128,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,2,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,4,128,0,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,4,128,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,4,128,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,8,128,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,8,128,0,1,float16,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,24,8,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,24,128,0,1,float16,float16,0,0.01413333291808764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,24,128,0,1,float16,fp8,0,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,24,128,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,1,128,0,1,float16,float16,0,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,1,128,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,1,128,0,1,fp8,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,4,128,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,2,128,0,1,float16,float16,0,0.013999999811251959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,2,128,0,1,float16,fp8,0,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,2,128,0,1,fp8,fp8,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,4,128,0,1,float16,float16,0,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,4,128,0,1,float16,fp8,0,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,8,128,0,1,float16,float16,0,0.014629332969586054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,8,128,0,1,float16,fp8,0,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,24,8,128,0,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,24,128,0,1,float16,float16,0,0.013909333695967993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,24,128,0,1,float16,fp8,0,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,24,128,0,1,fp8,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,1,128,0,1,float16,float16,0,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,1,128,0,1,float16,fp8,0,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,1,128,0,1,fp8,fp8,0,0.016202667107184727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,2,128,0,1,float16,float16,0,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,2,128,0,1,float16,fp8,0,0.014645333091417948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,2,128,0,1,fp8,fp8,0,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,4,128,0,1,float16,float16,0,0.013482666263977686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,4,128,0,1,float16,fp8,0,0.014256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,4,128,0,1,fp8,fp8,0,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,8,128,0,1,float16,float16,0,0.014309333016475042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,8,128,0,1,float16,fp8,0,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,24,8,128,0,1,fp8,fp8,0,0.016069332758585613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,128,0,1,fp8,fp8,0,11.065621693929037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,128,0,1,fp8,fp8,0,11.030442555745443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,float16,0,16.852784474690754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,fp8,0,16.600448608398438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,float16,0,16.746405283610027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,fp8,0,16.720043182373047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,fp8,0,16.726826985677082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,float16,0,17.047200520833332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,4,128,0,1,fp8,fp8,0,11.150933583577475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,128,0,1,fp8,fp8,0,5.856293360392253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,float16,0,8.57634162902832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,fp8,0,8.592469533284506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,128,0,1,fp8,fp8,0,11.272244771321615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,float16,0,16.79956817626953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,float16,0,8.50662930806478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,128,0,1,fp8,fp8,0,6.0250504811604815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,fp8,0,8.931845347086588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,fp8,0,18.8909174601237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,128,0,1,fp8,fp8,0,5.940682729085286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,float16,0,8.349525451660156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,fp8,0,9.38979721069336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,float16,0,8.628906885782877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,128,0,1,fp8,fp8,0,5.752010981241862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,float16,0,4.281215985616048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,fp8,0,8.600698471069336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,128,0,1,fp8,fp8,0,5.731482823689778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,fp8,0,4.410410563151042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,16,128,0,1,fp8,fp8,0,2.8794771830240884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,float16,0,8.6955197652181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,fp8,0,8.713984171549479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,float16,0,4.213717460632324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,128,0,1,fp8,fp8,0,2.7346827189127603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,128,0,1,fp8,fp8,0,2.776890754699707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,fp8,0,4.2946774164835615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,float16,0,4.289567947387695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,fp8,0,4.308101336161296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,128,0,1,fp8,fp8,0,2.8888158798217773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,float16,0,4.392570813496907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,fp8,0,4.21940263112386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,128,0,1,fp8,fp8,0,2.903663953145345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,float16,0,4.3105119069417315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,float16,0,2.24235200881958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,128,0,1,fp8,fp8,0,1.5085546175638835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,fp8,0,2.2373013496398926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,fp8,0,4.312741279602051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,float16,0,2.1239946683247886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,128,0,1,fp8,fp8,0,1.4670079549153645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,fp8,0,2.1237759590148926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,128,0,1,fp8,fp8,0,1.4590506553649902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,float16,0,2.1732959747314453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,fp8,0,2.1210080782572427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,float16,0,2.1570773124694824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,fp8,0,2.0747787157694497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,4,128,0,1,fp8,fp8,0,1.4875946044921875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,128,0,1,fp8,fp8,0,1.469861348470052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,float16,0,2.1761226654052734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,fp8,0,2.108949343363444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,128,0,1,fp8,fp8,0,6.511941274007161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,128,0,1,fp8,fp8,0,6.535189310709636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,float16,0,9.638975779215494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,fp8,0,9.664368311564127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,float16,0,9.629029591878256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,fp8,0,9.885775883992514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,float16,0,9.701983769734701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,fp8,0,9.816421508789062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,4,128,0,1,fp8,fp8,0,6.606047948201497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,float16,0,4.935914675394694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,128,0,1,fp8,fp8,0,3.4249919255574546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,fp8,0,5.120533307393392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,128,0,1,fp8,fp8,0,6.606709162394206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,float16,0,4.862895965576172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,float16,0,9.853338877360025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,fp8,0,9.776608149210611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,fp8,0,4.906384150187175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,1,128,0,1,fp8,fp8,0,3.2477547327677407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,128,0,1,fp8,fp8,0,3.101711908976237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,float16,0,4.855781237284343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,fp8,0,4.837221463521321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,float16,0,4.903744061787923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,128,0,1,fp8,fp8,0,3.274592081705729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,fp8,0,4.9741973876953125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,float16,0,2.51910400390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,128,0,1,fp8,fp8,0,3.3053385416666665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,fp8,0,2.556938648223877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,float16,0,4.9577226638793945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,fp8,0,4.817477226257324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,16,128,0,1,fp8,fp8,0,1.7158293724060059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,128,0,1,fp8,fp8,0,1.6448319753011067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,float16,0,2.306623935699463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,fp8,0,2.446623961130778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,float16,0,2.4634559949239097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,128,0,1,fp8,fp8,0,1.6459520657857258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,fp8,0,2.399082660675049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,128,0,1,fp8,fp8,0,1.6295413970947266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,float16,0,2.3390560150146484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,fp8,0,2.4934773445129395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,float16,0,2.434389273325602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,float16,0,1.2534293333689372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,128,0,1,fp8,fp8,0,1.6821866035461426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,128,0,1,fp8,fp8,0,0.854144016901652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,fp8,0,2.5422773361206055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,fp8,0,1.2734239896138508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,128,0,1,fp8,fp8,0,0.8388213316599528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,float16,0,1.234389305114746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,fp8,0,1.265775998433431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,float16,0,1.2434826691945393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,fp8,0,1.2474079926808674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,2,128,0,1,fp8,fp8,0,0.8291680018107096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,float16,0,1.2538879712422688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,fp8,0,1.2545013427734375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,4,128,0,1,fp8,fp8,0,0.8294880390167236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,float16,0,1.2461547056833904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,fp8,0,1.2500747044881184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,16,8,128,0,1,fp8,fp8,0,0.836021343866984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,128,0,1,fp8,fp8,0,4.706218719482422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,float16,0,6.88321050008138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,128,0,1,fp8,fp8,0,4.681978543599446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,fp8,0,6.902954737345378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,float16,0,6.88321050008138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,fp8,0,6.789178848266602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,float16,0,6.8816477457682295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,128,0,1,fp8,fp8,0,4.642933209737142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,128,0,1,fp8,fp8,0,2.4117652575174966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,float16,0,3.46941343943278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,fp8,0,3.5700906117757163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,fp8,0,7.0465437571207685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,128,0,1,fp8,fp8,0,4.653861363728841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,float16,0,6.955301284790039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,fp8,0,7.0274397532145185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,128,0,1,fp8,fp8,0,2.2596747080485025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,float16,0,3.4184799194335938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,fp8,0,3.315567970275879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,128,0,1,fp8,fp8,0,2.294330596923828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,float16,0,3.3930400212605796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,fp8,0,3.466911951700846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,128,0,1,fp8,fp8,0,2.3101332982381186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,float16,0,3.5195414225260415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,fp8,0,3.4871946970621743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,128,0,1,fp8,fp8,0,2.381498654683431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,float16,0,3.3262933095296225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,float16,0,1.7680479685465496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,fp8,0,3.432805379231771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,128,0,1,fp8,fp8,0,1.230517307917277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,fp8,0,1.7284213701883953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,float16,0,1.7133386929829915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,fp8,0,1.6782506306966145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,1,128,0,1,fp8,fp8,0,1.1875200271606445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,float16,0,1.6727627118428547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,fp8,0,1.6811092694600422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,2,128,0,1,fp8,fp8,0,1.1814346313476562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,float16,0,1.6464959780375164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,128,0,1,fp8,fp8,0,1.1845226287841797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,fp8,0,1.67522128423055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,float16,0,1.6805493036905925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,128,0,1,fp8,fp8,0,1.2062506675720215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,float16,0,0.9291306336720785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,fp8,0,1.7366560300191243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,128,0,1,fp8,fp8,0,0.6556586821873983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,fp8,0,0.9252586364746094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,float16,0,0.9039039611816406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,128,0,1,fp8,fp8,0,0.6258933146794637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,fp8,0,0.9150239626566569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,float16,0,0.9021920363108317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,fp8,0,0.9093173344930013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,2,128,0,1,fp8,fp8,0,0.609333316485087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,float16,0,0.9052639802296957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,128,0,1,fp8,fp8,0,0.6141546567281088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,fp8,0,0.9109546343485514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,float16,0,0.9128053188323975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,128,0,1,fp8,fp8,0,0.611135999361674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,fp8,0,0.9184106985727946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,128,0,1,fp8,fp8,0,6.0980478922526045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,128,0,1,fp8,fp8,0,6.185696283976237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,float16,0,8.993535995483398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,fp8,0,8.98906135559082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,float16,0,9.046480178833008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,fp8,0,9.058464050292969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,float16,0,9.007392247517904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,fp8,0,9.124741236368815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,4,128,0,1,fp8,fp8,0,6.110026677449544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,128,0,1,fp8,fp8,0,3.2032480239868164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,float16,0,4.616965293884277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,fp8,0,4.643237431844075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,128,0,1,fp8,fp8,0,6.240432103474935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,float16,0,4.581541379292806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,float16,0,9.080368041992188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,fp8,0,4.480090777079265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,1,128,0,1,fp8,fp8,0,3.045877456665039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,fp8,0,9.17746671040853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,128,0,1,fp8,fp8,0,3.0483039220174155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,float16,0,4.4997866948445635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,fp8,0,4.536650657653809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,128,0,1,fp8,fp8,0,3.0561278661092124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,float16,0,4.542352040608724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,fp8,0,4.558778762817383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,float16,0,2.2618719736735025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,fp8,0,2.266863981882731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,128,0,1,fp8,fp8,0,3.1527840296427407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,float16,0,4.559546788533528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,fp8,0,4.656576156616211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,16,128,0,1,fp8,fp8,0,1.6200373967488606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,128,0,1,fp8,fp8,0,1.5269865989685059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,float16,0,2.1823466618855796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,fp8,0,2.1913493474324546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,128,0,1,fp8,fp8,0,1.5185707410176594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,float16,0,2.1834774017333984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,fp8,0,2.2040586471557617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,float16,0,2.2075039545694985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,fp8,0,2.1855626106262207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,4,128,0,1,fp8,fp8,0,1.5167360305786133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,float16,0,2.258138656616211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,fp8,0,2.2977706591288247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,float16,0,1.1683519681294758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,16,8,128,0,1,fp8,fp8,0,1.5405333836873372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,128,0,1,fp8,fp8,0,0.8736426830291748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,fp8,0,1.210309346516927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,float16,0,1.1068320274353027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,128,0,1,fp8,fp8,0,0.8232800165812174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,fp8,0,1.1258666515350342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,float16,0,1.1216320196787517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,128,0,1,fp8,fp8,0,0.8240373134613037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,fp8,0,1.1280319690704346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,float16,0,1.1171092987060547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,fp8,0,1.1163573265075684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,4,128,0,1,fp8,fp8,0,0.8301493326822916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,float16,0,1.1264533201853435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,128,0,1,fp8,fp8,0,0.8230079809824625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,128,0,1,fp8,fp8,0,0.43562666575113934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,float16,0,0.6307200193405151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,fp8,0,1.1424853006998699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,fp8,0,0.6396906773249308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,float16,0,0.6200266679128011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,128,0,1,fp8,fp8,0,0.4163839817047119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,fp8,0,0.6351093451182047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,float16,0,0.6248586575190226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,fp8,0,0.6240853468577067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,2,128,0,1,fp8,fp8,0,0.4189066489537557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,float16,0,0.6248266696929932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,fp8,0,0.6248533328374227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,4,128,0,1,fp8,fp8,0,0.42094401518503827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,float16,0,0.6253386735916138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,fp8,0,0.6311786572138468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,16,8,128,0,1,fp8,fp8,0,0.425711989402771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,128,0,1,fp8,fp8,0,3.6807680130004883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,128,0,1,fp8,fp8,0,3.609893480936686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,float16,0,5.292991956075032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,fp8,0,5.368261337280273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,float16,0,5.313178698221843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,fp8,0,5.382106781005859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,float16,0,5.348389307657878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,fp8,0,5.275877316792806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,4,128,0,1,fp8,fp8,0,3.6915305455525718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,float16,0,2.6716747283935547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,128,0,1,fp8,fp8,0,1.9308853149414062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,fp8,0,2.7516425450642905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,128,0,1,fp8,fp8,0,3.7517013549804688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,float16,0,2.563477357228597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,float16,0,5.373253504435222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,fp8,0,5.369087855021159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,128,0,1,fp8,fp8,0,1.8184746106465657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,fp8,0,2.657637278238932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,128,0,1,fp8,fp8,0,1.8000319798787434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,float16,0,2.5623040199279785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,fp8,0,2.687098821004232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,float16,0,2.6303200721740723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,128,0,1,fp8,fp8,0,1.8174293835957844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,fp8,0,2.5674826304117837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,float16,0,1.3066720167795818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,float16,0,2.651807943979899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,128,0,1,fp8,fp8,0,1.8051573435465496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,128,0,1,fp8,fp8,0,1.008677323659261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,fp8,0,2.696666717529297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,fp8,0,1.340394655863444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,float16,0,1.2593973477681477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,fp8,0,1.3377599716186523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,1,128,0,1,fp8,fp8,0,0.9476853211720785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,float16,0,1.271498680114746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,fp8,0,1.2741920153299968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,2,128,0,1,fp8,fp8,0,0.9365920225779215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,float16,0,1.2833279768625896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,fp8,0,1.2763786315917969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,4,128,0,1,fp8,fp8,0,0.9329120318094889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,float16,0,1.2689066727956135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,128,0,1,fp8,fp8,0,0.952239990234375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,fp8,0,1.279530684153239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,float16,0,0.7033759752909342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,fp8,0,0.7044959863026937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,16,128,0,1,fp8,fp8,0,0.5020053386688232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,float16,0,0.678160031636556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,fp8,0,0.6805226802825928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,1,128,0,1,fp8,fp8,0,0.4718079964319865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,float16,0,0.6753333409627279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,128,0,1,fp8,fp8,0,0.4697120189666748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,fp8,0,0.6901439825693766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,float16,0,0.6817386945088705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,fp8,0,0.6892906824747721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,fp8,0,0.6849173704783121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,4,128,0,1,fp8,fp8,0,0.47515201568603516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,float16,0,0.6854453086853027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,16,8,128,0,1,fp8,fp8,0,0.4817013343175252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,float16,0,0.3494346539179484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,fp8,0,0.35234665870666504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,16,128,0,1,fp8,fp8,0,0.2775893410046895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,float16,0,0.3384586572647095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,fp8,0,0.33897598584493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,1,128,0,1,fp8,fp8,0,0.26312534014383954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,float16,0,0.3404586712519328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,fp8,0,0.33883198102315265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,2,128,0,1,fp8,fp8,0,0.26392533381779987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,float16,0,0.3410293261210124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,fp8,0,0.3431520064671834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,4,128,0,1,fp8,fp8,0,0.26562132438023883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,float16,0,0.34532801310221356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,fp8,0,0.3431520064671834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,16,8,128,0,1,fp8,fp8,0,0.2703733245531718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,128,0,1,fp8,fp8,0,3.5764373143514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,float16,0,5.023082733154297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,fp8,0,5.076800028483073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,128,0,1,fp8,fp8,0,3.6081600189208984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,float16,0,5.025194803873698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,fp8,0,5.148325284322103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,float16,0,4.994912147521973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,128,0,1,fp8,fp8,0,3.567392031351725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,128,0,1,fp8,fp8,0,1.8995307286580403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,float16,0,2.637690703074137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,fp8,0,2.545210679372152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,fp8,0,5.138863881429036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,128,0,1,fp8,fp8,0,3.690640131632487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,float16,0,5.217018763224284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,float16,0,2.450688044230143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,fp8,0,5.156703948974609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,128,0,1,fp8,fp8,0,1.7663307189941406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,fp8,0,2.398298740386963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,float16,0,2.3476160367329917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,fp8,0,2.415583928426107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,2,128,0,1,fp8,fp8,0,1.755141258239746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,128,0,1,fp8,fp8,0,1.784000078837077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,float16,0,2.408170700073242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,fp8,0,2.581376075744629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,float16,0,1.2591626644134521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,128,0,1,fp8,fp8,0,1.842026710510254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,float16,0,2.5389280319213867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,fp8,0,1.2664693196614583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,fp8,0,2.5055626233418784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,float16,0,1.206485350926717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,16,128,0,1,fp8,fp8,0,0.9769972960154215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,128,0,1,fp8,fp8,0,0.9047893683115641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,fp8,0,1.2149653434753418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,float16,0,1.1959946950276692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,128,0,1,fp8,fp8,0,0.9073386987050375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,fp8,0,1.2656319936116536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,float16,0,1.1971626281738281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,128,0,1,fp8,fp8,0,0.9129493236541748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,fp8,0,1.2237652937571208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,float16,0,1.2172959645589192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,128,0,1,fp8,fp8,0,0.9319946765899658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,float16,0,0.6556479930877686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,fp8,0,1.2289386590321858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,fp8,0,0.6677227020263672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,16,128,0,1,fp8,fp8,0,0.5178399880727133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,float16,0,0.6305386622746786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,fp8,0,0.6392480134963989
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,1,128,0,1,fp8,fp8,0,0.47935465971628827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,float16,0,0.6320000092188517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,fp8,0,0.6366506814956665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,2,128,0,1,fp8,fp8,0,0.4858773152033488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,float16,0,0.6329546769460043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,fp8,0,0.637877345085144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,4,128,0,1,fp8,fp8,0,0.48845334847768146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,float16,0,0.6433653434117635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,fp8,0,0.6468266646067301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,16,8,128,0,1,fp8,fp8,0,0.4970560073852539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,float16,0,0.36238932609558105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,fp8,0,0.3686666488647461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,16,128,0,1,fp8,fp8,0,0.27052799860636395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,float16,0,0.35131200154622394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,fp8,0,0.35156798362731934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,1,128,0,1,fp8,fp8,0,0.25072532892227173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,float16,0,0.35283732414245605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,fp8,0,0.3547786474227905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,2,128,0,1,fp8,fp8,0,0.2509866754213969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,float16,0,0.35452266534169513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,fp8,0,0.35869332154591876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,fp8,0,0.1861226757367452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,4,128,0,1,fp8,fp8,0,0.2520479957262675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,float16,0,0.35679999987284344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,fp8,0,0.36236798763275146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,16,8,128,0,1,fp8,fp8,0,0.2571306626001994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,float16,0,0.1829920013745626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,16,128,0,1,fp8,fp8,0,0.15416533748308817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,float16,0,0.17867199579874674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,fp8,0,0.1788853406906128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,1,128,0,1,fp8,fp8,0,0.1429333289464315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,float16,0,0.1771893302599589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,fp8,0,0.17945067087809244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,2,128,0,1,fp8,fp8,0,0.14426133036613464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,float16,0,0.17770665884017944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,fp8,0,0.18076266845067343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,4,128,0,1,fp8,fp8,0,0.1458506683508555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,float16,0,0.1797920068105062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,fp8,0,0.18140800793965658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,16,8,128,0,1,fp8,fp8,0,0.149509330590566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,128,0,1,fp8,fp8,0,2.153989315032959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,float16,0,2.900848070780436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,fp8,0,2.988250732421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,128,0,1,fp8,fp8,0,2.165269374847412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,float16,0,3.0150025685628257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,fp8,0,2.935082753499349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,float16,0,2.9369119008382163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,fp8,0,2.9584852854410806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,4,128,0,1,fp8,fp8,0,2.273029327392578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,float16,0,1.549957275390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,float16,0,3.156538645426432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,fp8,0,1.5423893928527832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,16,128,0,1,fp8,fp8,0,1.2175146738688152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,128,0,1,fp8,fp8,0,2.309658686319987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,fp8,0,3.1309814453125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,float16,0,1.4264319737752278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,fp8,0,1.4667894045511882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,1,128,0,1,fp8,fp8,0,1.122330665588379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,float16,0,1.413866678873698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,fp8,0,1.4482933680216472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,2,128,0,1,fp8,fp8,0,1.1074026425679524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,float16,0,1.4407679239908855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,fp8,0,1.4494400024414062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,4,128,0,1,fp8,fp8,0,1.1230613390604656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,float16,0,0.7849120299021403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,fp8,0,1.4996639887491863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,float16,0,1.467733383178711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,16,8,128,0,1,fp8,fp8,0,1.1551413536071777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,fp8,0,0.7870559692382812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,16,128,0,1,fp8,fp8,0,0.6411786476771036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,float16,0,0.7348000208536783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,fp8,0,0.7392746607462565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,1,128,0,1,fp8,fp8,0,0.5888106822967529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,float16,0,0.7422346274058024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,fp8,0,0.7405333518981934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,fp8,0,0.7490453720092773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,2,128,0,1,fp8,fp8,0,0.5754506587982178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,float16,0,0.7435519695281982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,4,128,0,1,fp8,fp8,0,0.5843146642049154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,float16,0,0.7571039994557699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,float16,0,0.41708266735076904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,fp8,0,0.7635467052459717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,16,8,128,0,1,fp8,fp8,0,0.5999039808909098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,fp8,0,0.42266666889190674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,16,128,0,1,fp8,fp8,0,0.3299573262532552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,float16,0,0.39982398351033527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,fp8,0,0.39868799845377606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,1,128,0,1,fp8,fp8,0,0.29630400737126666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,float16,0,0.3975306749343872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,fp8,0,0.4026453495025635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,2,128,0,1,fp8,fp8,0,0.29795199632644653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,float16,0,0.40116798877716064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,fp8,0,0.4041066567103068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,4,128,0,1,fp8,fp8,0,0.3004106680552165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,float16,0,0.4035946528116862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,fp8,0,0.41539732615152997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,16,8,128,0,1,fp8,fp8,0,0.30979732672373456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,float16,0,0.21276267369588217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,fp8,0,0.21716266870498657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,16,128,0,1,fp8,fp8,0,0.17870400349299112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,float16,0,0.19987199703852335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,fp8,0,0.20055999358495077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,1,128,0,1,fp8,fp8,0,0.16319466630617777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,float16,0,0.20197866360346475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,fp8,0,0.20305599768956503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,2,128,0,1,fp8,fp8,0,0.16473066806793213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,float16,0,0.20111999909083048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,fp8,0,0.2034719983736674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,4,128,0,1,fp8,fp8,0,0.16474666198094687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,float16,0,0.2042613426844279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,fp8,0,0.20784533023834229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,fp8,0,0.12008532881736755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,16,8,128,0,1,fp8,fp8,0,0.17017600933710733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,float16,0,0.12484799822171529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,fp8,0,0.12731732924779257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,16,128,0,1,fp8,fp8,0,0.10603732864061992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,float16,0,0.12011200189590454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,1,128,0,1,fp8,fp8,0,0.09826133648554485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,float16,0,0.12036266922950745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,fp8,0,0.1209493378798167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,2,128,0,1,fp8,fp8,0,0.09866133332252502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,float16,0,0.12157866358757019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,fp8,0,0.12130666772524516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,4,128,0,1,fp8,fp8,0,0.09875733653704326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,float16,0,0.12268267075220744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,fp8,0,0.1244053343931834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,16,8,128,0,1,fp8,fp8,0,0.10212266445159912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,128,0,1,fp8,fp8,0,2.3361120223999023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,float16,0,3.0428425470987954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,fp8,0,3.0170666376749673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,float16,0,2.9414453506469727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,128,0,1,fp8,fp8,0,2.35915199915568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,fp8,0,2.9417012532552085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,float16,0,2.9662132263183594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,fp8,0,3.0543254216512046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,4,128,0,1,fp8,fp8,0,2.4104960759480796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,float16,0,1.5870985984802246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,fp8,0,1.620741367340088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,128,0,1,fp8,fp8,0,2.485466639200846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,float16,0,3.1534878412882485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,16,128,0,1,fp8,fp8,0,1.3425866762797039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,float16,0,1.4561813672383626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,fp8,0,3.192490577697754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,128,0,1,fp8,fp8,0,1.190618673960368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,fp8,0,1.4620320002237956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,float16,0,1.4880213737487793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,fp8,0,1.4869972864786785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,2,128,0,1,fp8,fp8,0,1.2270987033843994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,float16,0,1.4919573465983074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,fp8,0,1.4810080528259277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,4,128,0,1,fp8,fp8,0,1.2135199705759685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,float16,0,1.529354731241862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,float16,0,0.8080426851908366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,128,0,1,fp8,fp8,0,0.6800426642100016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,fp8,0,0.8197387059529623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,fp8,0,1.5199947357177734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,128,0,1,fp8,fp8,0,0.6034773190816244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,16,8,128,0,1,fp8,fp8,0,1.2595252990722656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,float16,0,0.7397813002268473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,fp8,0,0.7512853145599365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,float16,0,0.7402719656626383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,fp8,0,0.7465919653574625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,2,128,0,1,fp8,fp8,0,0.6163680156071981
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,float16,0,0.7466399669647217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,fp8,0,0.752026637395223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,4,128,0,1,fp8,fp8,0,0.626917322476705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,float16,0,0.7609600226084391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,fp8,0,0.7691840330759684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,16,8,128,0,1,fp8,fp8,0,0.6420693397521973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,float16,0,0.4205919901529948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,fp8,0,0.42499200503031415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,16,128,0,1,fp8,fp8,0,0.36192532380421955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,float16,0,0.3903199831644694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,128,0,1,fp8,fp8,0,0.3229973316192627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,fp8,0,0.3930026690165202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,1,128,0,1,fp8,fp8,0,0.3208799958229065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,float16,0,0.3927520116170247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,fp8,0,0.39410134156545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,float16,0,0.404090682665507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,float16,0,0.39583468437194824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,fp8,0,0.3986186583836873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,4,128,0,1,fp8,fp8,0,0.3274186650911967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,fp8,0,0.40743998686472577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,16,8,128,0,1,fp8,fp8,0,0.3399306535720825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,float16,0,0.23084799448649088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,fp8,0,0.2367573380470276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,16,128,0,1,fp8,fp8,0,0.1888586680094401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,float16,0,0.21543467044830322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,fp8,0,0.21603200833002725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,1,128,0,1,fp8,fp8,0,0.16635732849438986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,float16,0,0.21797333161036173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,fp8,0,0.22103466590245566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,2,128,0,1,fp8,fp8,0,0.1686506668726603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,float16,0,0.22039467096328735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,fp8,0,0.22219733397165933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,4,128,0,1,fp8,fp8,0,0.1706506609916687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,float16,0,0.22285334269205728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,fp8,0,0.22619199752807617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,16,8,128,0,1,fp8,fp8,0,0.17718400557835898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,float16,0,0.11966400345166524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,fp8,0,0.12123733758926392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,16,128,0,1,fp8,fp8,0,0.10661333799362183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,float16,0,0.10945066809654236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,fp8,0,0.11076800028483073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,1,128,0,1,fp8,fp8,0,0.09299199779828389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,float16,0,0.1106773316860199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,fp8,0,0.11317867040634155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,2,128,0,1,fp8,fp8,0,0.0957493285338084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,float16,0,0.11209066708882649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,fp8,0,0.11432533462842305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,4,128,0,1,fp8,fp8,0,0.09836266438166301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,128,0,1,fp8,fp8,0,0.06493333478768666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,float16,0,0.11414933204650879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,fp8,0,0.1165706713994344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,16,8,128,0,1,fp8,fp8,0,0.10130133231480916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,float16,0,0.06834133466084798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,float16,0,0.0703413337469101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,fp8,0,0.07275199890136719
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,float16,0,0.0684799998998642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,fp8,0,0.06857599814732869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,1,128,0,1,fp8,fp8,0,0.05890133480230967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,fp8,0,0.06839466591676076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,fp8,0,0.07092800239721934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,2,128,0,1,fp8,fp8,0,0.05958933134873708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,float16,0,0.06846933563550313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,fp8,0,0.06879466772079468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,4,128,0,1,fp8,fp8,0,0.06002133091290792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,float16,0,0.0690773328145345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,16,8,128,0,1,fp8,fp8,0,0.06101333101590475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,float16,0,1.8774080276489258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,fp8,0,1.868618647257487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,1,128,0,1,fp8,fp8,0,1.5641279220581055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,float16,0,1.8599252700805664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,128,0,1,fp8,fp8,0,1.5827199618021648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,fp8,0,1.904138724009196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,float16,0,1.883061408996582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,fp8,0,1.8819252649943035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,4,128,0,1,fp8,fp8,0,1.6080907185872395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,float16,0,1.0361599922180176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,float16,0,1.9776479403177898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,128,0,1,fp8,fp8,0,1.669530709584554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,fp8,0,1.9347519874572754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,fp8,0,1.0425972938537598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,16,128,0,1,fp8,fp8,0,0.9150880177815756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,float16,0,0.9352266788482666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,fp8,0,0.9291893641153971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,1,128,0,1,fp8,fp8,0,0.8041600386301676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,float16,0,0.9453759988149008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,fp8,0,0.9409653345743815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,2,128,0,1,fp8,fp8,0,0.8025493621826172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,float16,0,0.9488480091094971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,fp8,0,0.952303965886434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,4,128,0,1,fp8,fp8,0,0.8155146439870199
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,float16,0,0.9789546330769857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,fp8,0,0.976026693979899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,float16,0,0.526037335395813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,16,8,128,0,1,fp8,fp8,0,0.8463253180185953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,fp8,0,0.5347573359807333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,16,128,0,1,fp8,fp8,0,0.47114133834838867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,float16,0,0.4753919839859009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,fp8,0,0.48551468054453534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,1,128,0,1,fp8,fp8,0,0.41894400119781494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,float16,0,0.4800106684366862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,fp8,0,0.49059200286865234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,fp8,0,0.4836053450902303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,2,128,0,1,fp8,fp8,0,0.41971198717753094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,float16,0,0.48370134830474854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,4,128,0,1,fp8,fp8,0,0.42562135060628253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,float16,0,0.49566400051116943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,fp8,0,0.5011200110117594
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,16,8,128,0,1,fp8,fp8,0,0.43625064690907794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,float16,0,0.28064000606536865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,fp8,0,0.2850346763928731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,16,128,0,1,fp8,fp8,0,0.24565333127975464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,float16,0,0.2589813272158305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,fp8,0,0.26018667221069336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,1,128,0,1,fp8,fp8,0,0.20890132586161295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,float16,0,0.2570986747741699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,fp8,0,0.26065067450205487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,2,128,0,1,fp8,fp8,0,0.21132266521453857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,float16,0,0.2603893280029297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,fp8,0,0.26263999938964844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,4,128,0,1,fp8,fp8,0,0.21404266357421875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,float16,0,0.2678080002466838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,fp8,0,0.2689066727956136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,16,8,128,0,1,fp8,fp8,0,0.22180267175038657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,float16,0,0.14684266845385233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,fp8,0,0.15110933780670166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,16,128,0,1,fp8,fp8,0,0.1332480013370514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,float16,0,0.13198933005332947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,fp8,0,0.1339946687221527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,1,128,0,1,fp8,fp8,0,0.1164533297220866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,float16,0,0.13315199812253317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,fp8,0,0.1349440018335978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,2,128,0,1,fp8,fp8,0,0.11667733391125996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,float16,0,0.13544000188509622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,fp8,0,0.13798933227856955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,4,128,0,1,fp8,fp8,0,0.11878933509190877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,float16,0,0.13845333456993103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,fp8,0,0.1399733324845632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,16,8,128,0,1,fp8,fp8,0,0.1232373317082723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,float16,0,0.08264000217119853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,fp8,0,0.08486933509508769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,16,128,0,1,fp8,fp8,0,0.0777759999036789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,float16,0,0.07653866708278656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,fp8,0,0.07623466849327087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,1,128,0,1,fp8,fp8,0,0.06620266536871593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,float16,0,0.07563200096289317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,fp8,0,0.07727999985218048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,2,128,0,1,fp8,fp8,0,0.06697600086530049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,float16,0,0.07698133091131847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,fp8,0,0.07784533500671387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,4,128,0,1,fp8,fp8,0,0.06994666655858357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,float16,0,0.07950399816036224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,fp8,0,0.08083733419577281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,16,8,128,0,1,fp8,fp8,0,0.0739573339621226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,float16,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,fp8,0,0.05279466509819031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,16,128,0,1,fp8,fp8,0,0.04817600051561991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,float16,0,0.051141331593195595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,fp8,0,0.051088000337282814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,1,128,0,1,fp8,fp8,0,0.04427733520666758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,float16,0,0.05109333495299021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,fp8,0,0.05076266825199127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,2,128,0,1,fp8,fp8,0,0.044719999035199486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,128,0,1,fp8,fp8,0,0.04565866788228353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,float16,0,0.05041066805521647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,fp8,0,0.05162666738033295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,4,128,0,1,fp8,fp8,0,0.04526400069395701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,float16,0,0.0509493350982666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,fp8,0,0.052005335688591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,float16,0,1.903450647989909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,fp8,0,1.8821439743041992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,1,128,0,1,fp8,fp8,0,1.7507626215616863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,float16,0,1.9322452545166016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,fp8,0,1.903663953145345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,2,128,0,1,fp8,fp8,0,1.828719933827718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,float16,0,2.062021255493164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,fp8,0,1.9527734120686848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,4,128,0,1,fp8,fp8,0,1.9754133224487305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,float16,0,1.1391572952270508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,fp8,0,1.1175466378529866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,float16,0,2.040869394938151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,fp8,0,2.0247413317362466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,16,8,128,0,1,fp8,fp8,0,1.9697813987731934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,16,128,0,1,fp8,fp8,0,1.057909329732259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,float16,0,0.9721279939015707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,fp8,0,0.9629066785176595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,1,128,0,1,fp8,fp8,0,0.8825226624806722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,float16,0,0.9689760208129883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,fp8,0,0.9851626555124918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,2,128,0,1,fp8,fp8,0,0.9259946346282959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,float16,0,1.0032107035319011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,fp8,0,0.9924853642781576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,4,128,0,1,fp8,fp8,0,0.9949599901835123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,float16,0,1.0174506505330403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,float16,0,0.5804640054702759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,fp8,0,1.010042667388916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,16,8,128,0,1,fp8,fp8,0,0.9981280167897543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,fp8,0,0.5674293438593546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,16,128,0,1,fp8,fp8,0,0.5200746854146322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,float16,0,0.49479464689890545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,fp8,0,0.4941226641337077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,1,128,0,1,fp8,fp8,0,0.44884268442789715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,float16,0,0.49845866362253827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,fp8,0,0.49863465627034503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,2,128,0,1,fp8,fp8,0,0.4691093365351359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,float16,0,0.5106559991836548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,fp8,0,0.5049440066019694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,4,128,0,1,fp8,fp8,0,0.5024640162785848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,float16,0,0.30298133691151935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,float16,0,0.5227146546045939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,fp8,0,0.5183146794637045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,16,8,128,0,1,fp8,fp8,0,0.5125759840011597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,fp8,0,0.2952853242556254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,16,128,0,1,fp8,fp8,0,0.26894932985305786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,float16,0,0.26065067450205487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,fp8,0,0.26078933477401733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,1,128,0,1,fp8,fp8,0,0.23255467414855957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,float16,0,0.26235200961430866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,fp8,0,0.2617013255755107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,float16,0,0.26629867156346637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,2,128,0,1,fp8,fp8,0,0.2371679941813151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,fp8,0,0.2674559950828552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,4,128,0,1,fp8,fp8,0,0.2597279946009318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,float16,0,0.2726400097211202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,fp8,0,0.2703839937845866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,16,8,128,0,1,fp8,fp8,0,0.2630133430163066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,float16,0,0.16370133558909097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,fp8,0,0.16074666380882263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,16,128,0,1,fp8,fp8,0,0.132314662138621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,128,0,1,fp8,fp8,0,0.11569600303967793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,float16,0,0.14194132884343466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,fp8,0,0.14106667041778564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,128,0,1,fp8,fp8,0,0.12643200159072876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,fp8,0,0.14763200283050537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,1,128,0,1,fp8,fp8,0,0.11472533146540324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,float16,0,0.14359466234842935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,fp8,0,0.1441973348458608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,float16,0,0.1479200025399526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,float16,0,0.15359999736150107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,fp8,0,0.1495466629664103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,16,8,128,0,1,fp8,fp8,0,0.12813867131868997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,float16,0,0.08407466610272725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,fp8,0,0.08390933275222778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,16,128,0,1,fp8,fp8,0,0.07285866638024648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,float16,0,0.07334933181603749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,fp8,0,0.07351466516653697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,1,128,0,1,fp8,fp8,0,0.0626986672480901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,128,0,1,fp8,fp8,0,0.0687306672334671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,float16,0,0.07397333284219106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,fp8,0,0.07451733450094859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,2,128,0,1,fp8,fp8,0,0.06355733176072438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,float16,0,0.07787199815114339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,fp8,0,0.07618133227030437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,float16,0,0.07879466811815898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,fp8,0,0.07843733330567677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,16,8,128,0,1,fp8,fp8,0,0.07041599849859874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,float16,0,0.04660800099372864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,fp8,0,0.04619733492533366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,16,128,0,1,fp8,fp8,0,0.04311466713746389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,float16,0,0.04308266441027323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,fp8,0,0.04218133290608724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,1,128,0,1,fp8,fp8,0,0.036464000741640724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,float16,0,0.04270400106906891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,fp8,0,0.04258666435877482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,float16,0,0.04362666606903076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,128,0,1,fp8,fp8,0,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,2,128,0,1,fp8,fp8,0,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,4,128,0,1,fp8,fp8,0,0.039221333960692085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,float16,0,0.03265066693226496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,float16,0,0.0435146689414978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,fp8,0,0.044346665342648826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,float16,0,0.03283733377854029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,float16,0,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,fp8,0,0.03388266762097677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,16,128,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,fp8,0,0.032138665517171226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,1,128,0,1,fp8,fp8,0,0.027877333263556164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,fp8,0,0.032560000816980995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,2,128,0,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,float16,0,0.03316800047953924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,4,128,0,1,fp8,fp8,0,0.028714666763941448
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,float16,0,0.03293866664171219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,fp8,0,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,16,8,128,0,1,fp8,fp8,0,0.029637334247430164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,1,128,0,1,float16,float16,0,1.4724159240722656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,1,128,0,1,float16,fp8,0,1.477402687072754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,1,128,0,1,fp8,fp8,0,1.4261919657389324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,2,128,0,1,float16,float16,0,1.5022719701131184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,2,128,0,1,float16,fp8,0,1.4802026748657227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,2,128,0,1,fp8,fp8,0,1.5062559445699055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,4,128,0,1,float16,float16,0,1.573520024617513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,4,128,0,1,float16,fp8,0,1.592074712117513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,4,128,0,1,fp8,fp8,0,1.615242640177409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,8,128,0,1,float16,float16,0,1.5959146817525227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,16,128,0,1,float16,float16,0,0.9223039944966634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,8,128,0,1,float16,fp8,0,1.588175932566325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,16,8,128,0,1,fp8,fp8,0,1.6481119791666667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,16,128,0,1,float16,fp8,0,0.8989546298980713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,16,128,0,1,fp8,fp8,0,0.8468320369720459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,1,128,0,1,float16,float16,0,0.7462560335795084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,1,128,0,1,float16,fp8,0,0.7497013409932455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,2,128,0,1,float16,float16,0,0.7609919706980387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,1,128,0,1,fp8,fp8,0,0.7051200071970621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,2,128,0,1,float16,fp8,0,0.7508373260498047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,2,128,0,1,fp8,fp8,0,0.7526240348815918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,4,128,0,1,float16,float16,0,0.7813759644826254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,4,128,0,1,float16,fp8,0,0.7670986652374268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,4,128,0,1,fp8,fp8,0,0.8221813042958578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,8,128,0,1,float16,float16,0,0.8092052936553955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,16,128,0,1,float16,float16,0,0.46933865547180176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,8,128,0,1,float16,fp8,0,0.7937599817911783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,16,8,128,0,1,fp8,fp8,0,0.8353760242462158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,16,128,0,1,float16,fp8,0,0.4535413185755412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,16,128,0,1,fp8,fp8,0,0.4325546820958455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,1,128,0,1,float16,float16,0,0.3850826819737752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,1,128,0,1,float16,fp8,0,0.38649598757425946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,1,128,0,1,fp8,fp8,0,0.3601599931716919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,4,128,0,1,float16,float16,0,0.40171198050181073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,2,128,0,1,float16,float16,0,0.3873279889424642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,2,128,0,1,float16,fp8,0,0.38630934556325275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,2,128,0,1,fp8,fp8,0,0.3726453383763631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,8,128,0,1,float16,float16,0,0.4142560164133708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,4,128,0,1,float16,fp8,0,0.3977760076522827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,4,128,0,1,fp8,fp8,0,0.4211626847585042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,8,128,0,1,float16,fp8,0,0.4076319932937622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,16,8,128,0,1,fp8,fp8,0,0.42001068592071533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,16,128,0,1,float16,float16,0,0.24679466088612875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,16,128,0,1,fp8,fp8,0,0.2228320042292277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,16,128,0,1,float16,fp8,0,0.2394719918568929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,1,128,0,1,float16,float16,0,0.20253332455952963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,1,128,0,1,float16,fp8,0,0.20387200514475504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,1,128,0,1,fp8,fp8,0,0.1906826694806417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,2,128,0,1,float16,float16,0,0.20381333430608115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,2,128,0,1,float16,fp8,0,0.20442134141921997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,2,128,0,1,fp8,fp8,0,0.19344000021616617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,4,128,0,1,float16,float16,0,0.21204266945521036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,16,128,0,1,float16,float16,0,0.1336799959341685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,4,128,0,1,float16,fp8,0,0.2104319930076599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,4,128,0,1,fp8,fp8,0,0.2171893318494161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,8,128,0,1,float16,float16,0,0.2170026699701945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,8,128,0,1,float16,fp8,0,0.21694932381312051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,16,8,128,0,1,fp8,fp8,0,0.216538667678833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,16,128,0,1,float16,fp8,0,0.13117866714795431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,16,128,0,1,fp8,fp8,0,0.11319999893506368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,1,128,0,1,float16,float16,0,0.11062933007876079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,4,128,0,1,float16,float16,0,0.11754133303960164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,1,128,0,1,float16,fp8,0,0.1111946702003479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,1,128,0,1,fp8,fp8,0,0.09566932916641235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,2,128,0,1,float16,float16,0,0.11454400420188904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,2,128,0,1,float16,fp8,0,0.11403733491897583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,8,128,0,1,fp8,fp8,0,0.10738133390744527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,2,128,0,1,fp8,fp8,0,0.09715200463930766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,4,128,0,1,float16,fp8,0,0.11622933546702068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,4,128,0,1,fp8,fp8,0,0.1053546667098999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,8,128,0,1,float16,float16,0,0.1221386690934499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,16,8,128,0,1,float16,fp8,0,0.11917866269747417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,16,128,0,1,float16,float16,0,0.07153599957625072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,16,128,0,1,float16,fp8,0,0.0691786656777064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,16,128,0,1,fp8,fp8,0,0.06481066842873891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,1,128,0,1,float16,float16,0,0.05979733169078827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,4,128,0,1,float16,float16,0,0.06474133332570393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,1,128,0,1,float16,fp8,0,0.05932799975077311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,1,128,0,1,fp8,fp8,0,0.05329599976539612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,2,128,0,1,float16,float16,0,0.060517330964406334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,2,128,0,1,float16,fp8,0,0.06149866680304209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,2,128,0,1,fp8,fp8,0,0.054469332098960876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,4,128,0,1,float16,fp8,0,0.0633653352657954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,4,128,0,1,fp8,fp8,0,0.05973866581916809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,8,128,0,1,float16,float16,0,0.06458133459091187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,8,128,0,1,float16,fp8,0,0.06508266429106395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,16,8,128,0,1,fp8,fp8,0,0.06087466577688853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,16,128,0,1,float16,float16,0,0.040778666734695435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,16,128,0,1,float16,fp8,0,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,16,128,0,1,fp8,fp8,0,0.03755733370780945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,1,128,0,1,float16,float16,0,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,1,128,0,1,float16,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,1,128,0,1,fp8,fp8,0,0.032111999889214836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,2,128,0,1,float16,float16,0,0.03619733452796936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,2,128,0,1,float16,fp8,0,0.03586133321126302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,2,128,0,1,fp8,fp8,0,0.032272001107533775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,4,128,0,1,float16,float16,0,0.037530665596326195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,4,128,0,1,float16,fp8,0,0.036858665446440377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,4,128,0,1,fp8,fp8,0,0.03498133271932602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,8,128,0,1,float16,float16,0,0.03743999948104223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,8,128,0,1,float16,fp8,0,0.0388373335202535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,16,8,128,0,1,fp8,fp8,0,0.034634667138258614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,16,128,0,1,float16,float16,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,16,128,0,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,16,128,0,1,fp8,fp8,0,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,1,128,0,1,float16,float16,0,0.026144000391165417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,1,128,0,1,float16,fp8,0,0.02497600018978119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,1,128,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,4,128,0,1,float16,fp8,0,0.026501332720120747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,2,128,0,1,float16,float16,0,0.026000000536441803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,2,128,0,1,float16,fp8,0,0.025829332570234936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,2,128,0,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,4,128,0,1,float16,float16,0,0.026352000733216602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,4,128,0,1,fp8,fp8,0,0.024288001159826916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,8,128,0,1,float16,float16,0,0.0266239990790685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,8,128,0,1,float16,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,16,8,128,0,1,fp8,fp8,0,0.0239680012067159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,16,128,0,1,float16,float16,0,0.02351466566324234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,16,128,0,1,float16,fp8,0,0.023706667125225067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,16,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,1,128,0,1,float16,float16,0,0.022261333962281544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,1,128,0,1,float16,fp8,0,0.0227360005180041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,1,128,0,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,2,128,0,1,float16,float16,0,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,2,128,0,1,float16,fp8,0,0.02235200007756551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,2,128,0,1,fp8,fp8,0,0.020736000190178554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,4,128,0,1,float16,float16,0,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,4,128,0,1,float16,fp8,0,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,4,128,0,1,fp8,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,8,128,0,1,float16,float16,0,0.024085332949956257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,8,128,0,1,float16,fp8,0,0.024304000039895374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,16,8,128,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,1,128,0,1,float16,float16,0,0.6459786494572958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,1,128,0,1,float16,fp8,0,0.6425653298695883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,1,128,0,1,fp8,fp8,0,0.5726933479309082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,2,128,0,1,float16,float16,0,0.6543306509653727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,2,128,0,1,float16,fp8,0,0.648416002591451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,2,128,0,1,fp8,fp8,0,0.6123413244883219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,4,128,0,1,float16,float16,0,0.6818239688873291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,4,128,0,1,float16,fp8,0,0.6712533632914225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,4,128,0,1,fp8,fp8,0,0.6839146614074707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,8,128,0,1,float16,float16,0,0.6978560288747152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,8,128,0,1,float16,fp8,0,0.6870400110880533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,16,128,0,1,float16,float16,0,0.416101336479187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,16,8,128,0,1,fp8,fp8,0,0.693498690923055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,16,128,0,1,float16,fp8,0,0.4033653338750203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,16,128,0,1,fp8,fp8,0,0.3703999916712443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,1,128,0,1,float16,float16,0,0.33081066608428955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,1,128,0,1,float16,fp8,0,0.32999465862909955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,1,128,0,1,fp8,fp8,0,0.2922826608022054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,2,128,0,1,float16,float16,0,0.33296533425649005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,2,128,0,1,float16,fp8,0,0.3322773377100627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,2,128,0,1,fp8,fp8,0,0.30562132596969604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,4,128,0,1,float16,float16,0,0.3479306697845459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,4,128,0,1,float16,fp8,0,0.34692267576853436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,4,128,0,1,fp8,fp8,0,0.35235198338826496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,8,128,0,1,float16,float16,0,0.35968534151713055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,16,128,0,1,fp8,fp8,0,0.19336533546447754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,8,128,0,1,float16,fp8,0,0.3558666706085205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,16,8,128,0,1,fp8,fp8,0,0.3540000120798747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,1,128,0,1,fp8,fp8,0,0.1555519998073578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,16,128,0,1,float16,float16,0,0.2173866629600525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,16,128,0,1,float16,fp8,0,0.21163199345270792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,1,128,0,1,float16,float16,0,0.17674134174982706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,1,128,0,1,float16,fp8,0,0.17403199275334677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,2,128,0,1,float16,float16,0,0.17846399545669556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,2,128,0,1,float16,fp8,0,0.17645865678787231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,2,128,0,1,fp8,fp8,0,0.158160001039505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,4,128,0,1,float16,float16,0,0.18526933590571085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,4,128,0,1,float16,fp8,0,0.18395733833312988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,4,128,0,1,fp8,fp8,0,0.18156800667444864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,8,128,0,1,float16,float16,0,0.19037334124247232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,8,128,0,1,float16,fp8,0,0.18779732783635458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,16,8,128,0,1,fp8,fp8,0,0.1848586599032084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,16,128,0,1,float16,float16,0,0.12154666582743327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,16,128,0,1,float16,fp8,0,0.11707199613253276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,16,128,0,1,fp8,fp8,0,0.10713600118954976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,1,128,0,1,float16,float16,0,0.09718400239944458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,1,128,0,1,float16,fp8,0,0.09713066617647807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,1,128,0,1,fp8,fp8,0,0.08617599805196126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,2,128,0,1,float16,float16,0,0.09910933176676433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,2,128,0,1,float16,fp8,0,0.10019200046857198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,2,128,0,1,fp8,fp8,0,0.08774933218955994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,4,128,0,1,float16,float16,0,0.10430933038393657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,4,128,0,1,float16,fp8,0,0.1032960017522176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,4,128,0,1,fp8,fp8,0,0.09693866968154907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,8,128,0,1,float16,float16,0,0.10574400424957275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,8,128,0,1,float16,fp8,0,0.10664000113805135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,16,8,128,0,1,fp8,fp8,0,0.09941333532333374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,16,128,0,1,float16,float16,0,0.06869333485762279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,16,128,0,1,float16,fp8,0,0.06705600023269653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,16,128,0,1,fp8,fp8,0,0.060506666700045265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,1,128,0,1,float16,float16,0,0.05496533215045929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,1,128,0,1,float16,fp8,0,0.05352533360322317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,1,128,0,1,fp8,fp8,0,0.04804799954096476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,2,128,0,1,float16,float16,0,0.05601066847642263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,2,128,0,1,float16,fp8,0,0.05600533386071523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,2,128,0,1,fp8,fp8,0,0.05061866839726766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,4,128,0,1,float16,float16,0,0.05739733576774597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,4,128,0,1,float16,fp8,0,0.05821333328882853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,4,128,0,1,fp8,fp8,0,0.05541333556175232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,8,128,0,1,float16,float16,0,0.05930666625499725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,8,128,0,1,float16,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,16,8,128,0,1,fp8,fp8,0,0.05681600173314413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,16,128,0,1,float16,float16,0,0.03806933263937632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,16,128,0,1,float16,fp8,0,0.03654933224121729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,16,128,0,1,fp8,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,1,128,0,1,float16,float16,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,1,128,0,1,float16,fp8,0,0.03257066756486893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,1,128,0,1,fp8,fp8,0,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,2,128,0,1,float16,float16,0,0.032698666055997215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,2,128,0,1,float16,fp8,0,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,2,128,0,1,fp8,fp8,0,0.03017599880695343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,4,128,0,1,float16,float16,0,0.03382933388153712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,4,128,0,1,float16,fp8,0,0.03492266684770584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,4,128,0,1,fp8,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,8,128,0,1,float16,float16,0,0.03538133452335993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,16,128,0,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,8,128,0,1,float16,fp8,0,0.034234667817751564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,16,8,128,0,1,fp8,fp8,0,0.03458133339881897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,16,128,0,1,float16,float16,0,0.024826665719350178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,16,128,0,1,float16,fp8,0,0.023962666591008503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,1,128,0,1,float16,float16,0,0.022634667654832203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,1,128,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,1,128,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,2,128,0,1,float16,float16,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,2,128,0,1,float16,fp8,0,0.024218666056791942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,2,128,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,4,128,0,1,float16,float16,0,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,4,128,0,1,float16,fp8,0,0.023946667710940044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,4,128,0,1,fp8,fp8,0,0.022698665658632915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,8,128,0,1,float16,float16,0,0.024847999215126038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,8,128,0,1,float16,fp8,0,0.023951999843120575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,16,8,128,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,16,128,0,1,float16,float16,0,0.020554666717847187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,16,128,0,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,16,128,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,1,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,1,128,0,1,float16,fp8,0,0.02021866664290428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,1,128,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,2,128,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,2,128,0,1,float16,fp8,0,0.020362666497627895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,8,128,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,2,128,0,1,fp8,fp8,0,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,4,128,0,1,float16,float16,0,0.019909333437681198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,4,128,0,1,float16,fp8,0,0.020165332903464634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,4,128,0,1,fp8,fp8,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,8,128,0,1,float16,float16,0,0.020309332758188248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,16,8,128,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,16,128,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,16,128,0,1,float16,fp8,0,0.019914666811625164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,16,128,0,1,fp8,fp8,0,0.017743999759356182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,1,128,0,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,1,128,0,1,float16,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,1,128,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,2,128,0,1,float16,float16,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,2,128,0,1,float16,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,2,128,0,1,fp8,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,4,128,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,4,128,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,4,128,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,8,128,0,1,float16,float16,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,8,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,1,128,0,1,fp8,fp8,0,0.2918826738993327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,16,8,128,0,1,fp8,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,1,128,0,1,float16,float16,0,0.30342400074005127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,1,128,0,1,float16,fp8,0,0.30300267537434894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,2,128,0,1,float16,float16,0,0.3128746747970581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,2,128,0,1,float16,fp8,0,0.3092693289120992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,2,128,0,1,fp8,fp8,0,0.30933332443237305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,4,128,0,1,float16,float16,0,0.32965866724650067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,4,128,0,1,float16,fp8,0,0.32414400577545166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,4,128,0,1,fp8,fp8,0,0.3500746488571167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,8,128,0,1,float16,float16,0,0.33506667613983154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,8,128,0,1,float16,fp8,0,0.33077865839004517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,16,8,128,0,1,fp8,fp8,0,0.3556640148162842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,16,128,0,1,float16,float16,0,0.21143466234207153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,16,128,0,1,float16,fp8,0,0.20638400316238403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,16,128,0,1,fp8,fp8,0,0.19115199645360312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,1,128,0,1,float16,float16,0,0.16314133008321127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,1,128,0,1,float16,fp8,0,0.16226133704185486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,1,128,0,1,fp8,fp8,0,0.15413332978884378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,2,128,0,1,float16,float16,0,0.1678719917933146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,4,128,0,1,fp8,fp8,0,0.1830079952875773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,2,128,0,1,float16,fp8,0,0.16657066345214844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,2,128,0,1,fp8,fp8,0,0.15921066204706827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,8,128,0,1,fp8,fp8,0,0.18661866585413614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,4,128,0,1,float16,float16,0,0.17482133706410727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,4,128,0,1,float16,fp8,0,0.17269867658615112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,8,128,0,1,float16,float16,0,0.1808799902598063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,16,8,128,0,1,float16,fp8,0,0.17798932393391928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,16,128,0,1,float16,float16,0,0.11868266264597575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,16,128,0,1,float16,fp8,0,0.11617066462834676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,2,128,0,1,float16,fp8,0,0.0940053363641103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,16,128,0,1,fp8,fp8,0,0.10131733616193135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,4,128,0,1,float16,float16,0,0.09890133142471313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,4,128,0,1,float16,fp8,0,0.09859733780225118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,1,128,0,1,float16,float16,0,0.09243200222651164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,1,128,0,1,float16,fp8,0,0.09098133444786072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,1,128,0,1,fp8,fp8,0,0.08596266309420268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,2,128,0,1,float16,float16,0,0.0953386624654134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,2,128,0,1,fp8,fp8,0,0.08960533142089844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,4,128,0,1,fp8,fp8,0,0.09618666768074036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,8,128,0,1,float16,float16,0,0.10228799780209859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,8,128,0,1,float16,fp8,0,0.10280000170071919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,16,8,128,0,1,fp8,fp8,0,0.09678933024406433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,16,128,0,1,float16,float16,0,0.06468800206979115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,16,128,0,1,float16,fp8,0,0.06389866769313812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,16,128,0,1,fp8,fp8,0,0.05913066864013672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,1,128,0,1,float16,float16,0,0.051738664507865906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,1,128,0,1,float16,fp8,0,0.050981332858403526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,1,128,0,1,fp8,fp8,0,0.04987733562787374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,2,128,0,1,float16,float16,0,0.05251200000445048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,2,128,0,1,float16,fp8,0,0.05268266797065735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,2,128,0,1,fp8,fp8,0,0.04952000081539154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,4,128,0,1,float16,float16,0,0.055546666185061135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,4,128,0,1,float16,fp8,0,0.05550399919350942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,4,128,0,1,fp8,fp8,0,0.05474133292833964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,8,128,0,1,float16,float16,0,0.05715199808279673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,8,128,0,1,float16,fp8,0,0.05629866818586985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,16,8,128,0,1,fp8,fp8,0,0.05468800167242686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,16,128,0,1,float16,float16,0,0.03583466758330663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,16,128,0,1,float16,fp8,0,0.03499733408292135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,16,128,0,1,fp8,fp8,0,0.03461333364248276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,1,128,0,1,float16,float16,0,0.03190399954716364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,1,128,0,1,float16,fp8,0,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,1,128,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,2,128,0,1,float16,float16,0,0.03224000086386999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,2,128,0,1,float16,fp8,0,0.03221333275238673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,2,128,0,1,fp8,fp8,0,0.030832000076770782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,4,128,0,1,float16,float16,0,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,4,128,0,1,float16,fp8,0,0.033157333731651306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,4,128,0,1,fp8,fp8,0,0.032032000521818794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,8,128,0,1,float16,float16,0,0.034847999612490334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,8,128,0,1,float16,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,16,8,128,0,1,fp8,fp8,0,0.0323786661028862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,16,128,0,1,float16,float16,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,16,128,0,1,float16,fp8,0,0.024101334313551586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,16,128,0,1,fp8,fp8,0,0.022789334257443745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,1,128,0,1,float16,float16,0,0.02197866638501485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,1,128,0,1,float16,fp8,0,0.021712000171343487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,1,128,0,1,fp8,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,2,128,0,1,float16,float16,0,0.021759999295075733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,2,128,0,1,float16,fp8,0,0.022272000710169475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,2,128,0,1,fp8,fp8,0,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,4,128,0,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,4,128,0,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,4,128,0,1,fp8,fp8,0,0.022261333962281544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,8,128,0,1,float16,float16,0,0.02260799954334895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,8,128,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,16,8,128,0,1,fp8,fp8,0,0.022266666094462078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,16,128,0,1,float16,float16,0,0.018453333526849747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,16,128,0,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,16,128,0,1,fp8,fp8,0,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,1,128,0,1,float16,float16,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,1,128,0,1,float16,fp8,0,0.01801066721479098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,1,128,0,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,2,128,0,1,float16,float16,0,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,2,128,0,1,float16,fp8,0,0.018197332819302876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,2,128,0,1,fp8,fp8,0,0.018218666315078735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,8,128,0,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,4,128,0,1,float16,float16,0,0.018730666488409042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,4,128,0,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,16,128,0,1,fp8,fp8,0,0.01814933369557063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,4,128,0,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,8,128,0,1,float16,float16,0,0.018405333161354065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,16,8,128,0,1,float16,fp8,0,0.018378666291634243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,16,128,0,1,float16,float16,0,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,16,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,2,128,0,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,1,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,1,128,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,1,128,0,1,fp8,fp8,0,0.017802666872739792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,2,128,0,1,float16,float16,0,0.01634666696190834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,8,128,0,1,float16,fp8,0,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,2,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,4,128,0,1,float16,float16,0,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,4,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,4,128,0,1,fp8,fp8,0,0.01766933376590411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,8,128,0,1,float16,float16,0,0.016741332908471424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,16,8,128,0,1,fp8,fp8,0,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,16,128,0,1,float16,float16,0,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,16,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,16,128,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,1,128,0,1,float16,float16,0,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,1,128,0,1,float16,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,1,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,2,128,0,1,float16,float16,0,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,2,128,0,1,float16,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,2,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,4,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,4,128,0,1,float16,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,4,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,8,128,0,1,float16,float16,0,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,8,128,0,1,float16,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,16,8,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,1,128,0,1,float16,float16,0,0.1833440065383911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,1,128,0,1,float16,fp8,0,0.1825973391532898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,1,128,0,1,fp8,fp8,0,0.2100213368733724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,2,128,0,1,float16,float16,0,0.18745599190394083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,2,128,0,1,float16,fp8,0,0.18396800756454468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,2,128,0,1,fp8,fp8,0,0.21725332736968994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,4,128,0,1,float16,float16,0,0.19734400510787964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,4,128,0,1,float16,fp8,0,0.1955946683883667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,4,128,0,1,fp8,fp8,0,0.235152006149292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,8,128,0,1,float16,fp8,0,0.2004693349202474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,8,128,0,1,float16,float16,0,0.20494933923085532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,16,8,128,0,1,fp8,fp8,0,0.2378773291905721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,16,128,0,1,float16,float16,0,0.12437867124875386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,16,128,0,1,float16,fp8,0,0.12256532907485962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,16,128,0,1,fp8,fp8,0,0.13153599699338278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,1,128,0,1,float16,float16,0,0.09963200489679973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,1,128,0,1,float16,fp8,0,0.09921600421269734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,2,128,0,1,fp8,fp8,0,0.11769066254297893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,1,128,0,1,fp8,fp8,0,0.11533866326014201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,2,128,0,1,float16,float16,0,0.10212266445159912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,2,128,0,1,float16,fp8,0,0.10272000233332317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,4,128,0,1,float16,float16,0,0.10919466614723206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,4,128,0,1,float16,fp8,0,0.10809600353240967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,4,128,0,1,fp8,fp8,0,0.1251573363939921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,8,128,0,1,float16,float16,0,0.11228799819946289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,8,128,0,1,float16,fp8,0,0.11170666416486104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,16,8,128,0,1,fp8,fp8,0,0.12572800119717917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,16,128,0,1,float16,float16,0,0.06860266625881195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,16,128,0,1,float16,fp8,0,0.06761066615581512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,16,128,0,1,fp8,fp8,0,0.07283199826876323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,1,128,0,1,float16,float16,0,0.05589333176612854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,1,128,0,1,float16,fp8,0,0.05635733405749003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,1,128,0,1,fp8,fp8,0,0.0645653357108434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,2,128,0,1,float16,float16,0,0.057376002271970115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,2,128,0,1,float16,fp8,0,0.05673066775004069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,2,128,0,1,fp8,fp8,0,0.06489066779613495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,4,128,0,1,float16,float16,0,0.06051200131575266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,4,128,0,1,float16,fp8,0,0.06044800082842509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,4,128,0,1,fp8,fp8,0,0.06927466889222463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,16,128,0,1,fp8,fp8,0,0.04242133100827535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,8,128,0,1,float16,float16,0,0.061887999375661217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,8,128,0,1,float16,fp8,0,0.06200533111890157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,16,8,128,0,1,fp8,fp8,0,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,16,128,0,1,float16,float16,0,0.037477334340413414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,16,128,0,1,float16,fp8,0,0.03708266715208689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,1,128,0,1,float16,float16,0,0.03401066611210505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,1,128,0,1,float16,fp8,0,0.033770665526390076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,1,128,0,1,fp8,fp8,0,0.03807466725508372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,2,128,0,1,float16,float16,0,0.03453866640726725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,2,128,0,1,float16,fp8,0,0.03495466709136963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,8,128,0,1,float16,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,2,128,0,1,fp8,fp8,0,0.038245332737763725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,4,128,0,1,float16,float16,0,0.03477866699298223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,4,128,0,1,float16,fp8,0,0.03514133393764496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,4,128,0,1,fp8,fp8,0,0.03937600056330363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,8,128,0,1,float16,float16,0,0.03531199942032496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,16,8,128,0,1,fp8,fp8,0,0.04027199993530909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,16,128,0,1,float16,float16,0,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,16,128,0,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,16,128,0,1,fp8,fp8,0,0.026416001220544178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,1,128,0,1,float16,float16,0,0.022490667800108593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,1,128,0,1,float16,fp8,0,0.022730665902296703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,1,128,0,1,fp8,fp8,0,0.024330665667851765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,2,128,0,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,2,128,0,1,float16,fp8,0,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,2,128,0,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,4,128,0,1,float16,float16,0,0.02438933402299881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,4,128,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,4,128,0,1,fp8,fp8,0,0.025839999318122864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,8,128,0,1,float16,float16,0,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,8,128,0,1,float16,fp8,0,0.023706667125225067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,16,8,128,0,1,fp8,fp8,0,0.0262719988822937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,1,128,0,1,fp8,fp8,0,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,2,128,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,2,128,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,16,128,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,16,128,0,1,float16,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,16,128,0,1,fp8,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,1,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,1,128,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,2,128,0,1,fp8,fp8,0,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,4,128,0,1,float16,float16,0,0.01766933376590411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,4,128,0,1,float16,fp8,0,0.017887999614079792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,4,128,0,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,8,128,0,1,float16,float16,0,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,8,128,0,1,float16,fp8,0,0.018378666291634243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,16,8,128,0,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,16,128,0,1,float16,float16,0,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,16,128,0,1,float16,fp8,0,0.016352000335852306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,16,128,0,1,fp8,fp8,0,0.017621333400408428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,1,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,1,128,0,1,float16,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,1,128,0,1,fp8,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,2,128,0,1,float16,float16,0,0.015850666910409927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,8,128,0,1,float16,float16,0,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,2,128,0,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,2,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,4,128,0,1,float16,float16,0,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,4,128,0,1,float16,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,4,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,8,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,16,8,128,0,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,16,128,0,1,float16,float16,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,16,128,0,1,float16,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,16,128,0,1,fp8,fp8,0,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,1,128,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,1,128,0,1,float16,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,1,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,4,128,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,2,128,0,1,float16,float16,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,2,128,0,1,float16,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,2,128,0,1,fp8,fp8,0,0.016490666816631954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,4,128,0,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,4,128,0,1,fp8,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,8,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,8,128,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,16,8,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,16,128,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,16,128,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,16,128,0,1,fp8,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,1,128,0,1,float16,float16,0,0.014629332969586054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,1,128,0,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,1,128,0,1,fp8,fp8,0,0.016384000579516094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,2,128,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,4,128,0,1,fp8,fp8,0,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,8,128,0,1,float16,float16,0,0.014463999619086584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,2,128,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,8,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,2,128,0,1,fp8,fp8,0,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,4,128,0,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,4,128,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,16,8,128,0,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,1,128,0,1,float16,float16,0,0.12866133451461792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,1,128,0,1,float16,fp8,0,0.12994133432706198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,1,128,0,1,fp8,fp8,0,0.16937599579493204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,2,128,0,1,float16,float16,0,0.13193066914876303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,2,128,0,1,float16,fp8,0,0.13051733374595642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,2,128,0,1,fp8,fp8,0,0.17153600851694742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,4,128,0,1,float16,fp8,0,0.1368000010649363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,4,128,0,1,fp8,fp8,0,0.18104533354441324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,4,128,0,1,float16,float16,0,0.1376213332017263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,8,128,0,1,float16,float16,0,0.1425440013408661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,8,128,0,1,float16,fp8,0,0.1418506701787313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,16,8,128,0,1,fp8,fp8,0,0.18055999279022217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,16,128,0,1,float16,float16,0,0.08501332998275757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,16,128,0,1,float16,fp8,0,0.08330133557319641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,16,128,0,1,fp8,fp8,0,0.10204799969991048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,1,128,0,1,float16,float16,0,0.07098133365313213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,1,128,0,1,float16,fp8,0,0.07211733361085255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,1,128,0,1,fp8,fp8,0,0.0912000040213267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,2,128,0,1,float16,float16,0,0.07318933308124542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,2,128,0,1,float16,fp8,0,0.07180266578992207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,2,128,0,1,fp8,fp8,0,0.09347732861836751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,4,128,0,1,float16,float16,0,0.07470933099587758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,4,128,0,1,float16,fp8,0,0.07424533367156982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,4,128,0,1,fp8,fp8,0,0.09763733545939128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,8,128,0,1,float16,float16,0,0.07815999786059062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,8,128,0,1,float16,fp8,0,0.07720533510049184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,16,8,128,0,1,fp8,fp8,0,0.0990133285522461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,1,128,0,1,fp8,fp8,0,0.052517334620157875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,16,128,0,1,float16,float16,0,0.04517866671085358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,16,128,0,1,float16,fp8,0,0.044480000933011375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,1,128,0,1,float16,float16,0,0.04115733255942663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,16,128,0,1,fp8,fp8,0,0.05677866439024607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,1,128,0,1,float16,fp8,0,0.04058133314053217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,2,128,0,1,float16,float16,0,0.041802664597829185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,2,128,0,1,float16,fp8,0,0.04194133480389913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,2,128,0,1,fp8,fp8,0,0.05354666709899902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,4,128,0,1,float16,float16,0,0.0420959989229838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,4,128,0,1,float16,fp8,0,0.04225599765777588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,4,128,0,1,fp8,fp8,0,0.054144000013669334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,8,128,0,1,float16,float16,0,0.043162668744723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,8,128,0,1,float16,fp8,0,0.04276266694068909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,16,8,128,0,1,fp8,fp8,0,0.05505066613356272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,16,128,0,1,float16,float16,0,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,16,128,0,1,float16,fp8,0,0.02834133307139079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,2,128,0,1,float16,fp8,0,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,16,128,0,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,1,128,0,1,float16,float16,0,0.0262719988822937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,1,128,0,1,float16,fp8,0,0.026005332668622334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,4,128,0,1,fp8,fp8,0,0.03295466552178065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,1,128,0,1,fp8,fp8,0,0.03260799994071325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,2,128,0,1,float16,float16,0,0.02606933315594991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,2,128,0,1,fp8,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,4,128,0,1,float16,float16,0,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,4,128,0,1,float16,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,8,128,0,1,float16,float16,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,8,128,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,16,8,128,0,1,fp8,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,16,128,0,1,float16,float16,0,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,16,128,0,1,float16,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,16,128,0,1,fp8,fp8,0,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,1,128,0,1,float16,float16,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,1,128,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,4,128,0,1,float16,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,4,128,0,1,fp8,fp8,0,0.023717333873112995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,1,128,0,1,fp8,fp8,0,0.02236266682545344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,2,128,0,1,float16,float16,0,0.01858666663368543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,2,128,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,2,128,0,1,fp8,fp8,0,0.022309333086013794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,4,128,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,8,128,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,8,128,0,1,fp8,fp8,0,0.024160000185171764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,16,8,128,0,1,float16,fp8,0,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,16,128,0,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,16,128,0,1,float16,fp8,0,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,16,128,0,1,fp8,fp8,0,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,1,128,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,1,128,0,1,float16,fp8,0,0.016271999726692837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,1,128,0,1,fp8,fp8,0,0.017893332988023758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,4,128,0,1,float16,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,2,128,0,1,float16,float16,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,2,128,0,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,2,128,0,1,fp8,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,4,128,0,1,float16,float16,0,0.015642666568358738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,4,128,0,1,fp8,fp8,0,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,8,128,0,1,float16,float16,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,8,128,0,1,float16,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,1,128,0,1,float16,float16,0,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,16,8,128,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,16,128,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,16,128,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,16,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,1,128,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,1,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,2,128,0,1,float16,float16,0,0.014597332725922266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,2,128,0,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,2,128,0,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,4,128,0,1,float16,float16,0,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,4,128,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,4,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,8,128,0,1,float16,float16,0,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,8,128,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,16,8,128,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,1,128,0,1,fp8,fp8,0,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,16,128,0,1,float16,float16,0,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,16,128,0,1,float16,fp8,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,16,128,0,1,fp8,fp8,0,0.016074666132529575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,1,128,0,1,float16,fp8,0,0.01452800010641416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,1,128,0,1,float16,float16,0,0.01423466702302297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,2,128,0,1,float16,float16,0,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,2,128,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,2,128,0,1,fp8,fp8,0,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,4,128,0,1,float16,float16,0,0.01379199946920077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,4,128,0,1,float16,fp8,0,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,4,128,0,1,fp8,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,8,128,0,1,float16,float16,0,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,8,128,0,1,float16,fp8,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,16,8,128,0,1,fp8,fp8,0,0.017658667018016178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,16,128,0,1,float16,float16,0,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,16,128,0,1,float16,fp8,0,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,16,128,0,1,fp8,fp8,0,0.01595199977358182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,1,128,0,1,float16,float16,0,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,1,128,0,1,float16,fp8,0,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,1,128,0,1,fp8,fp8,0,0.01632533346613248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,2,128,0,1,float16,float16,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,2,128,0,1,float16,fp8,0,0.014544000228246054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,2,128,0,1,fp8,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,4,128,0,1,float16,float16,0,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,4,128,0,1,float16,fp8,0,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,4,128,0,1,fp8,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,8,128,0,1,float16,float16,0,0.013690666606028875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,8,128,0,1,float16,fp8,0,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,16,8,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,1,128,0,1,float16,float16,0,0.10813867052396138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,2,128,0,1,fp8,fp8,0,0.14970133701960245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,1,128,0,1,float16,fp8,0,0.1071519951025645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,1,128,0,1,fp8,fp8,0,0.1474133332570394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,2,128,0,1,float16,float16,0,0.10818666219711304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,2,128,0,1,float16,fp8,0,0.10776533683141072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,4,128,0,1,float16,float16,0,0.10998400052388509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,4,128,0,1,float16,fp8,0,0.10997866590817769
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,16,128,0,1,float16,fp8,0,0.06272533535957336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,4,128,0,1,fp8,fp8,0,0.15402133266131082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,8,128,0,1,float16,float16,0,0.11241599917411804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,8,128,0,1,float16,fp8,0,0.11155200004577637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,16,8,128,0,1,fp8,fp8,0,0.15379732847213745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,16,128,0,1,float16,float16,0,0.06399466594060262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,16,128,0,1,fp8,fp8,0,0.0865066647529602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,1,128,0,1,float16,float16,0,0.0599839985370636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,1,128,0,1,float16,fp8,0,0.059418668349583946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,1,128,0,1,fp8,fp8,0,0.08042133351167043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,2,128,0,1,float16,float16,0,0.05938666562239329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,2,128,0,1,float16,fp8,0,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,2,128,0,1,fp8,fp8,0,0.08102400104204814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,4,128,0,1,float16,float16,0,0.06001066664854685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,4,128,0,1,float16,fp8,0,0.060640002290407814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,4,128,0,1,fp8,fp8,0,0.08410132924715678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,8,128,0,1,float16,float16,0,0.060506666700045265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,8,128,0,1,float16,fp8,0,0.06098133325576782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,16,8,128,0,1,fp8,fp8,0,0.08278400202592213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,16,128,0,1,float16,float16,0,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,16,128,0,1,float16,fp8,0,0.036831999818483986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,16,128,0,1,fp8,fp8,0,0.04886400202910105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,1,128,0,1,float16,float16,0,0.035258665680885315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,1,128,0,1,float16,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,1,128,0,1,fp8,fp8,0,0.046816001335779824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,2,128,0,1,float16,float16,0,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,2,128,0,1,float16,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,2,128,0,1,fp8,fp8,0,0.04829333225886027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,4,128,0,1,float16,float16,0,0.03605333218971888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,4,128,0,1,float16,fp8,0,0.03604800005753835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,4,128,0,1,fp8,fp8,0,0.04779199759165446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,8,128,0,1,float16,float16,0,0.036933332681655884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,8,128,0,1,float16,fp8,0,0.0365226666132609
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,16,8,128,0,1,fp8,fp8,0,0.04858666658401489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,16,128,0,1,float16,float16,0,0.02477866659561793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,1,128,0,1,fp8,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,16,128,0,1,float16,fp8,0,0.024832000335057575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,16,128,0,1,fp8,fp8,0,0.03035199890534083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,1,128,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,1,128,0,1,float16,fp8,0,0.024373332659403484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,2,128,0,1,float16,float16,0,0.023999998966852825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,2,128,0,1,float16,fp8,0,0.024031999210516613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,2,128,0,1,fp8,fp8,0,0.030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,4,128,0,1,float16,float16,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,4,128,0,1,float16,fp8,0,0.02388266722361247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,4,128,0,1,fp8,fp8,0,0.029978667696317036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,8,128,0,1,float16,float16,0,0.02420799930890401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,8,128,0,1,float16,fp8,0,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,16,8,128,0,1,fp8,fp8,0,0.030970667799313862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,16,128,0,1,float16,float16,0,0.018207999567190807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,16,128,0,1,float16,fp8,0,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,16,128,0,1,fp8,fp8,0,0.022287999590237934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,1,128,0,1,float16,float16,0,0.017840000490347546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,1,128,0,1,float16,fp8,0,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,1,128,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,2,128,0,1,float16,float16,0,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,2,128,0,1,float16,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,2,128,0,1,fp8,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,4,128,0,1,float16,float16,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,4,128,0,1,float16,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,4,128,0,1,fp8,fp8,0,0.021935999393463135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,8,128,0,1,float16,fp8,0,0.018133333573738735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,8,128,0,1,float16,float16,0,0.01814933369557063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,16,8,128,0,1,fp8,fp8,0,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,16,128,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,16,128,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,16,128,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,1,128,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,1,128,0,1,float16,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,1,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,2,128,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,2,128,0,1,float16,fp8,0,0.016021333634853363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,2,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,4,128,0,1,float16,float16,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,16,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,4,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,4,128,0,1,fp8,fp8,0,0.017786666750907898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,8,128,0,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,8,128,0,1,float16,fp8,0,0.015664000064134598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,16,8,128,0,1,fp8,fp8,0,0.018133333573738735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,16,128,0,1,float16,fp8,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,16,128,0,1,fp8,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,2,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,1,128,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,1,128,0,1,float16,fp8,0,0.014256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,1,128,0,1,fp8,fp8,0,0.016021333634853363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,2,128,0,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,2,128,0,1,float16,fp8,0,0.014416000495354334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,4,128,0,1,float16,float16,0,0.014309333016475042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,4,128,0,1,float16,fp8,0,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,4,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,8,128,0,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,8,128,0,1,float16,fp8,0,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,16,8,128,0,1,fp8,fp8,0,0.01632533346613248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,16,128,0,1,float16,float16,0,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,16,128,0,1,float16,fp8,0,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,16,128,0,1,fp8,fp8,0,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,1,128,0,1,float16,float16,0,0.0136266661187013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,1,128,0,1,float16,fp8,0,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,1,128,0,1,fp8,fp8,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,2,128,0,1,float16,float16,0,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,2,128,0,1,float16,fp8,0,0.014186666657527288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,2,128,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,4,128,0,1,float16,float16,0,0.01349866638580958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,4,128,0,1,float16,fp8,0,0.014453332871198654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,4,128,0,1,fp8,fp8,0,0.01640533283352852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,8,128,0,1,float16,float16,0,0.014053333550691605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,8,128,0,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,16,8,128,0,1,fp8,fp8,0,0.01635733370979627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,16,128,0,1,float16,float16,0,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,2,128,0,1,float16,float16,0,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,16,128,0,1,float16,fp8,0,0.014490666488806406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,16,128,0,1,fp8,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,1,128,0,1,float16,float16,0,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,1,128,0,1,float16,fp8,0,0.01392000044385592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,1,128,0,1,fp8,fp8,0,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,2,128,0,1,float16,fp8,0,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,2,128,0,1,fp8,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,4,128,0,1,float16,float16,0,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,4,128,0,1,float16,fp8,0,0.014362666755914688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,4,128,0,1,fp8,fp8,0,0.01599466676513354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,8,128,0,1,float16,float16,0,0.01452800010641416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,8,128,0,1,float16,fp8,0,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,16,8,128,0,1,fp8,fp8,0,0.016208000481128693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,128,0,1,fp8,fp8,0,8.286730448404947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,128,0,1,fp8,fp8,0,8.381290435791016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,float16,0,12.608416239420572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,fp8,0,12.601749420166016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,float16,0,12.69321060180664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,fp8,0,12.51418685913086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,float16,0,12.53387705485026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,fp8,0,12.807931264241537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,float16,0,6.371552149454753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,12,4,128,0,1,fp8,fp8,0,8.375120162963867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,128,0,1,fp8,fp8,0,4.37226676940918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,fp8,0,6.454245249430339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,128,0,1,fp8,fp8,0,4.31553586324056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,float16,0,6.341616312662761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,fp8,0,6.353498458862305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,128,0,1,fp8,fp8,0,4.239952087402344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,float16,0,6.396378835042317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,float16,0,3.2334025700887046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,fp8,0,3.216047922770182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,fp8,0,6.3089650472005205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,128,0,1,fp8,fp8,0,4.303157488505046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,12,128,0,1,fp8,fp8,0,2.169050693511963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,float16,0,6.365552266438802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,fp8,0,6.522725423177083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,128,0,1,fp8,fp8,0,2.1265172958374023
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,float16,0,3.175018628438314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,fp8,0,3.191413243611654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,fp8,0,3.2109705607096353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,float16,0,3.0935465494791665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,2,128,0,1,fp8,fp8,0,2.099994659423828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,float16,0,1.6724054018656414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,128,0,1,fp8,fp8,0,2.119877338409424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,float16,0,3.241840044657389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,fp8,0,1.6413547197977703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,12,128,0,1,fp8,fp8,0,1.1167679627736409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,fp8,0,3.2915093104044595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,float16,0,1.6178827285766602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,128,0,1,fp8,fp8,0,1.12554136912028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,fp8,0,1.5962133407592773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,float16,0,1.595578670501709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,128,0,1,fp8,fp8,0,1.0462186336517334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,fp8,0,1.606869379679362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,float16,0,1.712000052134196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,128,0,1,fp8,fp8,0,1.1049119631449382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,fp8,0,1.6122612953186035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,128,0,1,fp8,fp8,0,4.865743954976399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,128,0,1,fp8,fp8,0,4.886831919352214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,float16,0,7.262240091959636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,fp8,0,7.339637120564778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,float16,0,7.390581130981445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,fp8,0,7.282501220703125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,float16,0,7.278079986572266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,fp8,0,7.513973236083984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,float16,0,3.7832053502400718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,128,0,1,fp8,fp8,0,2.5242932637532554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,12,4,128,0,1,fp8,fp8,0,4.905632019042969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,fp8,0,3.6821492513020835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,128,0,1,fp8,fp8,0,2.439919948577881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,float16,0,3.6740907033284507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,fp8,0,3.582677205403646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,float16,0,3.7199414571126304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,128,0,1,fp8,fp8,0,2.4120267232259116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,fp8,0,3.7024799982706704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,float16,0,1.8785546620686848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,fp8,0,1.842133363087972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,128,0,1,fp8,fp8,0,2.384618600209554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,float16,0,3.656741460164388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,fp8,0,3.70471986134847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,12,128,0,1,fp8,fp8,0,1.3035039901733398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,float16,0,1.7846506436665852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,128,0,1,fp8,fp8,0,1.2517653306325276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,fp8,0,1.818021297454834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,float16,0,1.8033706347147624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,128,0,1,fp8,fp8,0,1.348629315694173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,fp8,0,1.7938346862792969
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,float16,0,1.7785439491271973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,fp8,0,1.7986559867858887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,float16,0,0.9749759833017985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,128,0,1,fp8,fp8,0,0.6833279927571615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,float16,0,0.9649013678232828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,12,4,128,0,1,fp8,fp8,0,1.2420106728871663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,fp8,0,0.9915839831034342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,fp8,0,0.9684960047403971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,1,128,0,1,fp8,fp8,0,0.6442559957504272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,float16,0,0.9784639676411947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,fp8,0,0.9678933620452881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,2,128,0,1,fp8,fp8,0,0.6375519831975301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,float16,0,0.9678613344828287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,128,0,1,fp8,fp8,0,0.6309226751327515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,fp8,0,0.9693280061086019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,128,0,1,fp8,fp8,0,3.4206345876057944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,128,0,1,fp8,fp8,0,3.4620800018310547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,float16,0,5.202266693115234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,fp8,0,5.215488115946452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,float16,0,5.2020266850789385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,fp8,0,5.1746721267700195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,float16,0,5.182623863220215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,fp8,0,5.313520113627116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,128,0,1,fp8,fp8,0,1.810970624287923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,float16,0,2.615407943725586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,12,4,128,0,1,fp8,fp8,0,3.4780747095743814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,fp8,0,2.584005355834961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,float16,0,2.5432106653849282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,128,0,1,fp8,fp8,0,1.7299307187398274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,fp8,0,2.524229367574056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,float16,0,2.4797919591267905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,128,0,1,fp8,fp8,0,1.7124640146891277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,fp8,0,2.6023732821146646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,128,0,1,fp8,fp8,0,1.7369279861450195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,float16,0,1.2954346338907878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,float16,0,2.6361653010050454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,fp8,0,2.5522133509318032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,128,0,1,fp8,fp8,0,0.890554666519165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,fp8,0,1.3329493204752605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,float16,0,1.3897973696390789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,128,0,1,fp8,fp8,0,0.8484373092651367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,fp8,0,1.273802677790324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,float16,0,1.2829653422037761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,128,0,1,fp8,fp8,0,0.8544853528340658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,fp8,0,1.2724053064982097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,float16,0,1.3080533345540364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,128,0,1,fp8,fp8,0,0.8570666313171387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,float16,0,0.6364320119222006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,fp8,0,1.2817066510518391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,128,0,1,fp8,fp8,0,0.4742826620737712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,fp8,0,0.6502613226572672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,float16,0,0.6240853468577067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,float16,0,0.6269280115763346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,fp8,0,0.6238240003585815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,1,128,0,1,fp8,fp8,0,0.4556586742401123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,fp8,0,0.6398453315099081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,2,128,0,1,fp8,fp8,0,0.45984001954396564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,float16,0,0.6281973520914713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,fp8,0,0.624720017115275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,12,4,128,0,1,fp8,fp8,0,0.46348798274993896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,128,0,1,fp8,fp8,0,4.5633440017700195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,128,0,1,fp8,fp8,0,4.671231905619304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,float16,0,6.793189366658528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,fp8,0,6.783514658610026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,float16,0,6.793856302897136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,fp8,0,6.8472105662028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,float16,0,6.756191889444987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,fp8,0,6.777983983357747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,float16,0,3.3616854349772134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,128,0,1,fp8,fp8,0,2.3760852813720703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,12,4,128,0,1,fp8,fp8,0,4.646629333496094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,fp8,0,3.460048039754232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,float16,0,3.4211254119873047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,128,0,1,fp8,fp8,0,2.279088020324707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,fp8,0,3.4928159713745117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,float16,0,3.288624127705892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,128,0,1,fp8,fp8,0,2.2646400133768716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,fp8,0,3.231264114379883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,float16,0,1.7051092783610027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,float16,0,3.3610026041666665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,128,0,1,fp8,fp8,0,2.3173386255900064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,128,0,1,fp8,fp8,0,1.225669304529826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,fp8,0,1.6791359583536785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,fp8,0,3.4661547342936196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,float16,0,1.6022292772928874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,128,0,1,fp8,fp8,0,1.1558132966359456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,fp8,0,1.628778616587321
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,float16,0,1.6085707346598308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,128,0,1,fp8,fp8,0,1.1567520300547283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,fp8,0,1.6071893374125164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,float16,0,1.6272053718566895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,128,0,1,fp8,fp8,0,1.1733866532643635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,float16,0,0.876810630162557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,fp8,0,1.6940372784932454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,fp8,0,0.8865173657735189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,12,128,0,1,fp8,fp8,0,0.6305013497670492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,float16,0,0.8621760209401449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,128,0,1,fp8,fp8,0,0.6108426650365194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,fp8,0,0.8588106632232666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,float16,0,0.867082675298055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,fp8,0,0.8582666714986166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,fp8,0,0.8740426699320475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,2,128,0,1,fp8,fp8,0,0.5852320194244385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,float16,0,0.8647680282592773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,12,4,128,0,1,fp8,fp8,0,0.5901866753896078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,fp8,0,0.4238719940185547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,float16,0,0.4379733403523763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,fp8,0,0.4347573518753052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,12,128,0,1,fp8,fp8,0,0.32737066348393756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,float16,0,0.42134400208791095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,1,128,0,1,fp8,fp8,0,0.3131306568781535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,float16,0,0.42241064707438153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,128,0,1,fp8,fp8,0,0.3186560074488322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,fp8,0,0.423525333404541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,float16,0,0.42580266793568927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,fp8,0,0.4261973301569621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,12,4,128,0,1,fp8,fp8,0,0.31939733028411865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,128,0,1,fp8,fp8,0,2.611695925394694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,float16,0,3.7930399576822915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,fp8,0,4.0456587473551435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,128,0,1,fp8,fp8,0,2.723989486694336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,float16,0,3.9044853846232095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,fp8,0,3.9832798639933267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,float16,0,3.9353866577148438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,fp8,0,3.924309412638346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,float16,0,1.931722640991211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,128,0,1,fp8,fp8,0,1.4460426966349285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,12,4,128,0,1,fp8,fp8,0,2.7912267049153647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,fp8,0,1.9885652860005696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,float16,0,1.9104746182759602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,fp8,0,1.871829350789388
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,1,128,0,1,fp8,fp8,0,1.3704853057861328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,float16,0,1.9034080505371094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,fp8,0,1.912277380625407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,2,128,0,1,fp8,fp8,0,1.3559892972310383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,float16,0,0.9939946333567301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,128,0,1,fp8,fp8,0,1.3710613250732422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,fp8,0,1.0300373236338298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,float16,0,1.9161279996236165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,fp8,0,1.9464373588562012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,12,128,0,1,fp8,fp8,0,0.7753012975056967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,float16,0,0.9668107032775879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,fp8,0,0.9664320151011149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,1,128,0,1,fp8,fp8,0,0.7093280156453451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,float16,0,0.9628586769104004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,128,0,1,fp8,fp8,0,0.7131360371907552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,fp8,0,0.9877173105875651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,128,0,1,fp8,fp8,0,0.7190720240275065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,float16,0,0.9679413636525472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,fp8,0,0.9798933664957682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,float16,0,0.5421973466873169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,fp8,0,0.5457866589228312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,12,128,0,1,fp8,fp8,0,0.37994666894276935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,fp8,0,0.5311093330383301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,float16,0,0.5336106618245443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,float16,0,0.5264586607615153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,float16,0,0.534437338511149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,1,128,0,1,fp8,fp8,0,0.3585919936498006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,fp8,0,0.5342186689376831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,2,128,0,1,fp8,fp8,0,0.3599413235982259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,fp8,0,0.5362240076065063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,12,4,128,0,1,fp8,fp8,0,0.3668373425801595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,float16,0,0.2712159951527913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,fp8,0,0.2737920085589091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,12,128,0,1,fp8,fp8,0,0.21105599403381348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,float16,0,0.262661337852478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,fp8,0,0.2643893361091614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,1,128,0,1,fp8,fp8,0,0.2016213337580363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,float16,0,0.26473067204157513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,fp8,0,0.26574933528900146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,2,128,0,1,fp8,fp8,0,0.2041013240814209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,float16,0,0.26637333631515503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,fp8,0,0.2665333350499471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,12,4,128,0,1,fp8,fp8,0,0.20572799444198608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,128,0,1,fp8,fp8,0,2.606053352355957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,float16,0,3.8077707290649414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,fp8,0,3.6852213541666665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,128,0,1,fp8,fp8,0,2.5913920402526855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,float16,0,3.792229334513346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,fp8,0,3.8795251846313477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,float16,0,3.7304372787475586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,fp8,0,3.7867520650227866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,128,0,1,fp8,fp8,0,1.4284639358520508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,float16,0,1.9073012669881184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,fp8,0,1.9100267092386882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,12,4,128,0,1,fp8,fp8,0,2.7234185536702475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,128,0,1,fp8,fp8,0,1.3495413462320964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,float16,0,1.8008960088094075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,fp8,0,1.8447252909342449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,float16,0,1.8232320149739583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,fp8,0,1.7887253761291504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,2,128,0,1,fp8,fp8,0,1.346783955891927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,float16,0,1.8550666173299153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,float16,0,0.9913866519927979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,128,0,1,fp8,fp8,0,1.3534560203552246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,fp8,0,1.8083359400431316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,128,0,1,fp8,fp8,0,0.7525920073191324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,fp8,0,0.9702186584472656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,float16,0,0.9120693206787109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,fp8,0,0.9129707018534342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,1,128,0,1,fp8,fp8,0,0.69378129641215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,float16,0,0.9168639977773031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,128,0,1,fp8,fp8,0,0.688917318979899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,fp8,0,0.9137492974599203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,float16,0,0.9212373097737631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,128,0,1,fp8,fp8,0,0.6996320088704427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,fp8,0,0.9293920199076334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,float16,0,0.5078293482462565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,fp8,0,0.5154346625010172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,12,128,0,1,fp8,fp8,0,0.38472533226013184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,float16,0,0.4869920015335083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,fp8,0,0.4888480106989543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,1,128,0,1,fp8,fp8,0,0.34948798020680744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,float16,0,0.4915519952774048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,fp8,0,0.49371735254923504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,2,128,0,1,fp8,fp8,0,0.35388799508412677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,float16,0,0.4925599892934163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,fp8,0,0.50054931640625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,12,4,128,0,1,fp8,fp8,0,0.3590666850407918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,float16,0,0.25410133600234985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,fp8,0,0.2589600086212158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,12,128,0,1,fp8,fp8,0,0.20781866709391275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,float16,0,0.24382932980855307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,fp8,0,0.24653865893681845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,1,128,0,1,fp8,fp8,0,0.1918933391571045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,float16,0,0.24649065732955933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,fp8,0,0.24676799774169922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,2,128,0,1,fp8,fp8,0,0.1927093267440796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,float16,0,0.2462986707687378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,fp8,0,0.24923733870188394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,12,4,128,0,1,fp8,fp8,0,0.19645865758260092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,fp8,0,0.14443733294804892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,float16,0,0.1473973294099172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,fp8,0,0.1511306663354238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,12,128,0,1,fp8,fp8,0,0.12307733297348022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,float16,0,0.14475199580192566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,1,128,0,1,fp8,fp8,0,0.113045334815979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,float16,0,0.14537066221237183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,fp8,0,0.14450132846832275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,2,128,0,1,fp8,fp8,0,0.11434666315714519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,float16,0,0.14501866698265076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,fp8,0,0.14678933223088583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,12,4,128,0,1,fp8,fp8,0,0.11531200011571248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,float16,0,2.207749366760254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,128,0,1,fp8,fp8,0,1.6279733975728352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,fp8,0,2.227557341257731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,float16,0,2.242015997568766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,128,0,1,fp8,fp8,0,1.6697972615559895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,fp8,0,2.1551574071248374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,float16,0,2.3015999794006348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,128,0,1,fp8,fp8,0,1.6807786623636882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,float16,0,1.1713919639587402
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,fp8,0,2.295072078704834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,fp8,0,1.1736746629079182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,12,128,0,1,fp8,fp8,0,0.9205066363016764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,float16,0,1.122314691543579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,128,0,1,fp8,fp8,0,0.8435680071512858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,fp8,0,1.1520906289418538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,float16,0,1.0911786556243896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,fp8,0,1.115552028020223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,float16,0,1.0869706471761067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,2,128,0,1,fp8,fp8,0,0.8547893365224203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,float16,0,0.6005226771036783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,128,0,1,fp8,fp8,0,0.8607093493143717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,fp8,0,1.1144213676452637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,fp8,0,0.6081440051396688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,12,128,0,1,fp8,fp8,0,0.4809759855270386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,float16,0,0.5709706544876099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,fp8,0,0.5720320145289103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,1,128,0,1,fp8,fp8,0,0.4424906571706136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,float16,0,0.5682506561279297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,fp8,0,0.5725813309351603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,2,128,0,1,fp8,fp8,0,0.4455519914627075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,float16,0,0.5753866831461588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,128,0,1,fp8,fp8,0,0.45187731583913165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,fp8,0,0.5811839898427328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,float16,0,0.32524800300598145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,fp8,0,0.33290666341781616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,12,128,0,1,fp8,fp8,0,0.25045865774154663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,float16,0,0.310810665289561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,fp8,0,0.31379733482996625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,1,128,0,1,fp8,fp8,0,0.22688533862431845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,float16,0,0.31335999568303424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,fp8,0,0.31703466176986694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,2,128,0,1,fp8,fp8,0,0.2273226579030355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,float16,0,0.3169333338737488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,fp8,0,0.3185013333956401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,12,4,128,0,1,fp8,fp8,0,0.2315359910329183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,float16,0,0.16716800133387247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,fp8,0,0.17029867569605509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,12,128,0,1,fp8,fp8,0,0.1402400036652883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,float16,0,0.15988799929618835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,fp8,0,0.1584053337574005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,1,128,0,1,fp8,fp8,0,0.12823466459910074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,float16,0,0.1584160029888153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,fp8,0,0.15899200240770975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,2,128,0,1,fp8,fp8,0,0.12961066762606302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,float16,0,0.15927466750144958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,fp8,0,0.162282665570577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,fp8,0,0.09524266918500264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,12,4,128,0,1,fp8,fp8,0,0.13084800044695535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,float16,0,0.0983786682287852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,fp8,0,0.09961600104967754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,12,128,0,1,fp8,fp8,0,0.08346133430798848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,float16,0,0.09446932872136433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,fp8,0,0.09603733817736308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,1,128,0,1,fp8,fp8,0,0.07726933558781941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,float16,0,0.09541333715120952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,fp8,0,0.09547199805577596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,2,128,0,1,fp8,fp8,0,0.07797333101431529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,float16,0,0.09524800380071004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,12,4,128,0,1,fp8,fp8,0,0.07746666669845581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,float16,0,2.204554716746012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,128,0,1,fp8,fp8,0,1.7495999336242676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,fp8,0,2.197978655497233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,float16,0,2.2142507235209146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,128,0,1,fp8,fp8,0,1.7730132738749187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,fp8,0,2.299781322479248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,float16,0,2.2334399223327637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,float16,0,1.2115573088328044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,fp8,0,2.3448373476664224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,12,4,128,0,1,fp8,fp8,0,1.8221707344055176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,128,0,1,fp8,fp8,0,1.007914702097575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,fp8,0,1.2140639623006184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,float16,0,1.1058133443196614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,fp8,0,1.1474613348642986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,1,128,0,1,fp8,fp8,0,0.9153013229370117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,fp8,0,1.1568480332692463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,float16,0,1.1291893323262532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,2,128,0,1,fp8,fp8,0,0.9057333469390869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,float16,0,1.1340160369873047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,fp8,0,1.141317367553711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,float16,0,0.6228640079498291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,12,4,128,0,1,fp8,fp8,0,0.9263306458791097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,fp8,0,0.6211093266805013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,12,128,0,1,fp8,fp8,0,0.5193066596984863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,float16,0,0.5726879835128784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,fp8,0,0.5746826728185018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,1,128,0,1,fp8,fp8,0,0.46956801414489746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,float16,0,0.5727946758270264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,fp8,0,0.5777279933293661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,2,128,0,1,fp8,fp8,0,0.4690133333206177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,float16,0,0.578714648882548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,fp8,0,0.3323360085487366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,fp8,0,0.5822879870732626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,12,4,128,0,1,fp8,fp8,0,0.48129598299662274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,float16,0,0.3288960059483846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,12,128,0,1,fp8,fp8,0,0.27135999997456867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,float16,0,0.30364267031351727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,fp8,0,0.30769066015879315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,1,128,0,1,fp8,fp8,0,0.23694932460784912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,float16,0,0.3044426639874776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,fp8,0,0.31060800949732464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,2,128,0,1,fp8,fp8,0,0.24053333202997842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,float16,0,0.30982933441797894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,fp8,0,0.3109760085741679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,12,4,128,0,1,fp8,fp8,0,0.24521599213282266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,float16,0,0.1700213352839152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,fp8,0,0.17441066106160483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,12,128,0,1,fp8,fp8,0,0.14759467045466104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,float16,0,0.1562399963537852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,fp8,0,0.15754666924476624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,1,128,0,1,fp8,fp8,0,0.1302239994208018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,float16,0,0.1570080022017161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,fp8,0,0.15796800454457602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,2,128,0,1,fp8,fp8,0,0.13121066490809122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,float16,0,0.16124799847602844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,fp8,0,0.16005333264668783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,12,4,128,0,1,fp8,fp8,0,0.1346453328927358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,128,0,1,fp8,fp8,0,0.07493866483370464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,float16,0,0.09603733817736308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,fp8,0,0.09780800342559814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,12,128,0,1,fp8,fp8,0,0.08516266942024231
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,float16,0,0.08890133102734883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,fp8,0,0.08917867143948872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,float16,0,0.0897173285484314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,fp8,0,0.0904853343963623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,2,128,0,1,fp8,fp8,0,0.07576000193754832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,float16,0,0.0918293297290802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,fp8,0,0.09200533231099446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,12,4,128,0,1,fp8,fp8,0,0.07801066835721333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,float16,0,0.05884266893068949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,fp8,0,0.05979733169078827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,12,128,0,1,fp8,fp8,0,0.05323199927806854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,float16,0,0.058543999989827476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,fp8,0,0.057962665955225624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,1,128,0,1,fp8,fp8,0,0.04991999765237173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,float16,0,0.05819733440876007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,fp8,0,0.057962665955225624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,2,128,0,1,fp8,fp8,0,0.05012266834576925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,float16,0,0.05776533484458923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,fp8,0,0.05885866781075796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,12,4,128,0,1,fp8,fp8,0,0.050586665670077004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,float16,0,1.41431458791097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,128,0,1,fp8,fp8,0,1.1834932963053386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,fp8,0,1.415712038675944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,128,0,1,fp8,fp8,0,1.2012159824371338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,float16,0,1.4202507336934407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,fp8,0,1.4188426335652669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,float16,0,1.4501759211222331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,float16,0,0.7892693678538004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,fp8,0,1.4520853360493977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,12,4,128,0,1,fp8,fp8,0,1.2428800264994304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,fp8,0,0.7941599686940511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,12,128,0,1,fp8,fp8,0,0.682474692662557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,float16,0,0.7186079819997152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,fp8,0,0.7210559844970703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,1,128,0,1,fp8,fp8,0,0.6073973178863525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,float16,0,0.7122933069864908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,128,0,1,fp8,fp8,0,0.6323146820068359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,128,0,1,fp8,fp8,0,0.6149919827779134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,fp8,0,0.7251573403676351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,float16,0,0.7367786566416422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,fp8,0,0.7343626817067465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,float16,0,0.40986132621765137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,fp8,0,0.411077340443929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,12,128,0,1,fp8,fp8,0,0.35679999987284344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,float16,0,0.3720853328704834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,fp8,0,0.3758293390274048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,1,128,0,1,fp8,fp8,0,0.32205333312352497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,fp8,0,0.3805493513743083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,float16,0,0.2182719906171163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,float16,0,0.3736960093180339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,fp8,0,0.3755733172098796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,2,128,0,1,fp8,fp8,0,0.32334399223327637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,float16,0,0.377402663230896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,fp8,0,0.2050079902013143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,12,4,128,0,1,fp8,fp8,0,0.33241599798202515
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,fp8,0,0.22451200087865195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,12,128,0,1,fp8,fp8,0,0.18475200732549033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,float16,0,0.20102399587631226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,1,128,0,1,fp8,fp8,0,0.16074132919311523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,float16,0,0.20497600237528482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,fp8,0,0.20523200432459512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,2,128,0,1,fp8,fp8,0,0.1647040049235026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,float16,0,0.20658133427302042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,fp8,0,0.20907199382781982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,12,4,128,0,1,fp8,fp8,0,0.16737600167592367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,float16,0,0.11555733283360799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,fp8,0,0.1179039975007375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,12,128,0,1,fp8,fp8,0,0.10517866412798564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,float16,0,0.1064906617005666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,fp8,0,0.10709866881370544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,1,128,0,1,fp8,fp8,0,0.09128533800443013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,float16,0,0.10659733414649963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,fp8,0,0.10807466506958008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,2,128,0,1,fp8,fp8,0,0.09373866518338521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,float16,0,0.10864532987276714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,fp8,0,0.11104533076286316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,float16,0,0.06304533282915752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,12,4,128,0,1,fp8,fp8,0,0.09758933385213216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,float16,0,0.06724266707897186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,float16,0,0.06407466530799866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,fp8,0,0.06905066470305125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,12,128,0,1,fp8,fp8,0,0.0613919993241628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,128,0,1,fp8,fp8,0,0.05514133473237356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,fp8,0,0.06369600196679433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,1,128,0,1,fp8,fp8,0,0.05497066676616669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,fp8,0,0.06465066472689311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,float16,0,0.06469866633415222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,fp8,0,0.0651039977868398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,12,4,128,0,1,fp8,fp8,0,0.05550399919350942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,float16,0,0.048058668772379555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,fp8,0,0.04850666721661886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,12,128,0,1,fp8,fp8,0,0.04234666625658671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,float16,0,0.04753066599369049
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,128,0,1,fp8,fp8,0,0.04032533367474874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,fp8,0,0.047210668524106346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,float16,0,0.0467199981212616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,fp8,0,0.04721599817276001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,2,128,0,1,fp8,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,float16,0,0.046767999728520714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,fp8,0,0.047456001242001854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,12,4,128,0,1,fp8,fp8,0,0.041050667564074196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,float16,0,1.4529013633728027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,fp8,0,1.4654879570007324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,1,128,0,1,fp8,fp8,0,1.312496026357015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,float16,0,1.4614027341206868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,fp8,0,1.4553227424621582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,2,128,0,1,fp8,fp8,0,1.3806506792704265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,float16,0,1.497312068939209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,float16,0,0.8704213301340739
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,128,0,1,fp8,fp8,0,0.761023998260498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,fp8,0,0.8448320229848226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,fp8,0,1.4891573588053386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,12,4,128,0,1,fp8,fp8,0,1.4681013425191243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,float16,0,0.7381172974904379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,fp8,0,0.7465013662974039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,1,128,0,1,fp8,fp8,0,0.6588799953460693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,float16,0,0.7500747044881185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,fp8,0,0.7398239771525065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,2,128,0,1,fp8,fp8,0,0.6964426835378011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,float16,0,0.7657066980997721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,fp8,0,0.759173313776652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,float16,0,0.4451520045598348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,12,4,128,0,1,fp8,fp8,0,0.7434399922688802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,fp8,0,0.4359840154647827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,12,128,0,1,fp8,fp8,0,0.39603734016418457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,float16,0,0.38264000415802
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,fp8,0,0.38226131598154706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,1,128,0,1,fp8,fp8,0,0.34297601381937665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,float16,0,0.38676265875498456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,fp8,0,0.38682134946187335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,2,128,0,1,fp8,fp8,0,0.3447999954223633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,float16,0,0.39986133575439453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,fp8,0,0.3980319897333781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,12,4,128,0,1,fp8,fp8,0,0.3824586470921834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,float16,0,0.23413866758346558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,fp8,0,0.2328266700108846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,12,128,0,1,fp8,fp8,0,0.19432532787322998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,float16,0,0.20522133509318033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,fp8,0,0.20487467447916666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,1,128,0,1,fp8,fp8,0,0.16420267025629678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,float16,0,0.20908266305923462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,fp8,0,0.20622400442759195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,2,128,0,1,fp8,fp8,0,0.16606932878494263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,float16,0,0.2134880026181539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,fp8,0,0.21077332894007364
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,12,4,128,0,1,fp8,fp8,0,0.18454933166503906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,float16,0,0.12706133723258972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,fp8,0,0.12524267037709555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,12,128,0,1,fp8,fp8,0,0.10486400127410889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,float16,0,0.10700266559918721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,fp8,0,0.10717866818110149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,1,128,0,1,fp8,fp8,0,0.0912000040213267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,float16,0,0.10875200231870015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,fp8,0,0.10943466424942017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,2,128,0,1,fp8,fp8,0,0.09145067135492961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,float16,0,0.11282133062680562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,fp8,0,0.11223999659220378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,12,4,128,0,1,fp8,fp8,0,0.09777067104975383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,float16,0,0.06676800052324931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,fp8,0,0.0668213317791621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,12,128,0,1,fp8,fp8,0,0.058687999844551086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,float16,0,0.058304001887639366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,fp8,0,0.05860800047715505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,1,128,0,1,fp8,fp8,0,0.05036266644795736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,float16,0,0.05922133227189382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,fp8,0,0.059279998143514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,2,128,0,1,fp8,fp8,0,0.05261866748332977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,float16,0,0.062080000837643944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,fp8,0,0.061434666315714516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,12,4,128,0,1,fp8,fp8,0,0.055674667159716286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,float16,0,0.038586666186650596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,fp8,0,0.03737599899371465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,12,128,0,1,fp8,fp8,0,0.03510933369398117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,float16,0,0.0359253336985906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,fp8,0,0.03514133393764496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,1,128,0,1,fp8,fp8,0,0.03190933416287104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,float16,0,0.036176001032193504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,fp8,0,0.03617066641648611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,2,128,0,1,fp8,fp8,0,0.03224000086386999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,float16,0,0.036464000741640724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,fp8,0,0.037178667883078255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,12,4,128,0,1,fp8,fp8,0,0.03381866713364919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,float16,0,0.03257066756486893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,fp8,0,0.033941333492596946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,12,128,0,1,fp8,fp8,0,0.02962133288383484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,float16,0,0.03164800008138021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,fp8,0,0.03173866619666418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,1,128,0,1,fp8,fp8,0,0.028042666614055634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,float16,0,0.03267733256022135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,fp8,0,0.03245333333810171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,2,128,0,1,fp8,fp8,0,0.027984000742435455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,float16,0,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,fp8,0,0.03342399994532267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,12,4,128,0,1,fp8,fp8,0,0.02826133370399475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,1,128,0,1,float16,float16,0,1.1239946683247883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,1,128,0,1,float16,fp8,0,1.130405346552531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,1,128,0,1,fp8,fp8,0,1.0708746910095215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,2,128,0,1,float16,float16,0,1.1459946632385254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,2,128,0,1,float16,fp8,0,1.1528586546579997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,2,128,0,1,fp8,fp8,0,1.1427466869354248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,4,128,0,1,float16,float16,0,1.24290132522583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,4,128,0,1,float16,fp8,0,1.2379146416982014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,12,128,0,1,float16,float16,0,0.706496000289917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,12,128,0,1,float16,fp8,0,0.6762026945749918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,12,4,128,0,1,fp8,fp8,0,1.2110346953074138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,12,128,0,1,fp8,fp8,0,0.6333973407745361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,1,128,0,1,float16,float16,0,0.5754293203353882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,1,128,0,1,float16,fp8,0,0.5721813440322876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,1,128,0,1,fp8,fp8,0,0.5281066497166952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,2,128,0,1,float16,float16,0,0.5857119957605997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,2,128,0,1,float16,fp8,0,0.5790186723073324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,2,128,0,1,fp8,fp8,0,0.567909320195516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,4,128,0,1,float16,float16,0,0.6182026863098145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,4,128,0,1,float16,fp8,0,0.6058506568272909
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,12,4,128,0,1,fp8,fp8,0,0.6160213152567545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,12,128,0,1,float16,float16,0,0.36103467146555585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,12,128,0,1,float16,fp8,0,0.35282667477925617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,12,128,0,1,fp8,fp8,0,0.326800008614858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,1,128,0,1,float16,float16,0,0.30050132671991986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,1,128,0,1,float16,fp8,0,0.296506663163503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,1,128,0,1,fp8,fp8,0,0.275002658367157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,2,128,0,1,float16,float16,0,0.3027893304824829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,2,128,0,1,float16,fp8,0,0.30131200949350995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,2,128,0,1,fp8,fp8,0,0.28385066986083984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,4,128,0,1,float16,float16,0,0.3163359959920247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,4,128,0,1,float16,fp8,0,0.31255465745925903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,12,4,128,0,1,fp8,fp8,0,0.32019199927647907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,12,128,0,1,float16,float16,0,0.19353600343068442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,12,128,0,1,float16,fp8,0,0.1872746745745341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,12,128,0,1,fp8,fp8,0,0.1632319986820221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,1,128,0,1,float16,float16,0,0.1597493290901184
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,1,128,0,1,float16,fp8,0,0.16124799847602844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,1,128,0,1,fp8,fp8,0,0.13545599579811096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,2,128,0,1,float16,float16,0,0.16358400384585062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,2,128,0,1,float16,fp8,0,0.1630400021870931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,2,128,0,1,fp8,fp8,0,0.13857600092887878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,4,128,0,1,float16,float16,0,0.16704533497492471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,4,128,0,1,float16,fp8,0,0.16826132933298746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,12,4,128,0,1,fp8,fp8,0,0.15553067127863565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,12,128,0,1,float16,float16,0,0.10690666238466899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,12,128,0,1,float16,fp8,0,0.10575999816258748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,12,128,0,1,fp8,fp8,0,0.08860799670219421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,1,128,0,1,float16,float16,0,0.08827199538548787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,1,128,0,1,float16,fp8,0,0.08692800005276997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,1,128,0,1,fp8,fp8,0,0.07534933090209961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,4,128,0,1,fp8,fp8,0,0.08318933347860973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,2,128,0,1,float16,float16,0,0.08889599641164143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,2,128,0,1,float16,fp8,0,0.0878613293170929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,2,128,0,1,fp8,fp8,0,0.07713599999745686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,4,128,0,1,float16,float16,0,0.09227733810742696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,12,4,128,0,1,float16,fp8,0,0.09173333644866943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,12,128,0,1,float16,float16,0,0.058602665861447654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,12,128,0,1,float16,fp8,0,0.05621333420276642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,12,128,0,1,fp8,fp8,0,0.053264002005259194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,1,128,0,1,float16,float16,0,0.048357332746187844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,1,128,0,1,float16,fp8,0,0.04946133494377136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,1,128,0,1,fp8,fp8,0,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,2,128,0,1,float16,float16,0,0.05041066805521647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,2,128,0,1,float16,fp8,0,0.05027733246485392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,2,128,0,1,fp8,fp8,0,0.04523199796676636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,12,128,0,1,float16,fp8,0,0.03405333310365677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,4,128,0,1,float16,float16,0,0.051872000098228455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,4,128,0,1,float16,fp8,0,0.05249066650867462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,12,4,128,0,1,fp8,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,12,128,0,1,float16,float16,0,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,12,128,0,1,fp8,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,1,128,0,1,float16,float16,0,0.03199466566244761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,1,128,0,1,float16,fp8,0,0.031850665807724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,1,128,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,2,128,0,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,12,128,0,1,float16,float16,0,0.026421333352724712
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,2,128,0,1,float16,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,2,128,0,1,fp8,fp8,0,0.029872000217437744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,4,128,0,1,float16,float16,0,0.03221333275238673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,4,128,0,1,float16,fp8,0,0.03347733368476232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,12,4,128,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,12,128,0,1,float16,fp8,0,0.026693334182103474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,12,128,0,1,fp8,fp8,0,0.02481066683928172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,1,128,0,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,1,128,0,1,float16,fp8,0,0.025653332471847534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,1,128,0,1,fp8,fp8,0,0.022677332162857056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,2,128,0,1,float16,float16,0,0.02571200082699458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,2,128,0,1,float16,fp8,0,0.025754667818546295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,4,128,0,1,float16,float16,0,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,2,128,0,1,fp8,fp8,0,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,4,128,0,1,float16,fp8,0,0.02605333427588145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,12,4,128,0,1,fp8,fp8,0,0.023872000475724537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,12,128,0,1,float16,float16,0,0.023525332411130268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,12,128,0,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,12,128,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,1,128,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,1,128,0,1,float16,fp8,0,0.02266666789849599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,1,128,0,1,fp8,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,2,128,0,1,float16,fp8,0,0.021802666286627453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,2,128,0,1,float16,float16,0,0.022266666094462078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,2,128,0,1,fp8,fp8,0,0.020682666450738907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,4,128,0,1,float16,float16,0,0.022554665803909302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,4,128,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,12,4,128,0,1,fp8,fp8,0,0.020629333953062694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,1,128,0,1,float16,float16,0,0.4857493241628011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,1,128,0,1,float16,fp8,0,0.48605867226918537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,2,128,0,1,float16,fp8,0,0.48797865708669025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,1,128,0,1,fp8,fp8,0,0.41651201248168945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,2,128,0,1,float16,float16,0,0.49187731742858887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,2,128,0,1,fp8,fp8,0,0.4592053492863973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,4,128,0,1,float16,float16,0,0.5250080029169718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,4,128,0,1,float16,fp8,0,0.5045226812362671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,1,128,0,1,float16,float16,0,0.25092800458272296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,12,4,128,0,1,fp8,fp8,0,0.5056213140487671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,12,128,0,1,float16,float16,0,0.3197439908981323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,12,128,0,1,float16,fp8,0,0.3098026712735494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,12,128,0,1,fp8,fp8,0,0.2784159978230794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,1,128,0,1,float16,fp8,0,0.250383992989858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,1,128,0,1,fp8,fp8,0,0.21825599670410156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,4,128,0,1,float16,fp8,0,0.2680906653404236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,2,128,0,1,float16,float16,0,0.2579840024312337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,2,128,0,1,float16,fp8,0,0.25416000684102374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,2,128,0,1,fp8,fp8,0,0.23270932833353677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,4,128,0,1,float16,float16,0,0.26977066198984784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,12,4,128,0,1,fp8,fp8,0,0.26081599791844684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,12,128,0,1,float16,float16,0,0.17041067282358804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,12,128,0,1,float16,fp8,0,0.1653279960155487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,12,128,0,1,fp8,fp8,0,0.14799466729164124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,1,128,0,1,float16,float16,0,0.13673599561055502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,1,128,0,1,float16,fp8,0,0.13568533460299173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,1,128,0,1,fp8,fp8,0,0.11903466780980428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,2,128,0,1,float16,float16,0,0.13983999689420065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,2,128,0,1,float16,fp8,0,0.13730133573214212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,2,128,0,1,fp8,fp8,0,0.12382400035858154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,4,128,0,1,float16,float16,0,0.1469386617342631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,4,128,0,1,float16,fp8,0,0.14478933811187744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,12,128,0,1,float16,float16,0,0.09777599573135376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,12,4,128,0,1,fp8,fp8,0,0.1381066640218099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,12,128,0,1,float16,fp8,0,0.09473599990208943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,12,128,0,1,fp8,fp8,0,0.08185600241025288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,1,128,0,1,float16,float16,0,0.07894933223724365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,1,128,0,1,float16,fp8,0,0.07680533329645793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,1,128,0,1,fp8,fp8,0,0.06838933130105336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,2,128,0,1,float16,float16,0,0.07945600152015686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,2,128,0,1,float16,fp8,0,0.07904000083605449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,2,128,0,1,fp8,fp8,0,0.06911466519037883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,4,128,0,1,float16,float16,0,0.08334400256474812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,4,128,0,1,float16,fp8,0,0.08245866497357686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,12,4,128,0,1,fp8,fp8,0,0.07524266839027405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,12,128,0,1,float16,float16,0,0.0525546669960022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,12,128,0,1,float16,fp8,0,0.05227733155091604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,12,128,0,1,fp8,fp8,0,0.048911998669306435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,1,128,0,1,float16,float16,0,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,1,128,0,1,float16,fp8,0,0.04389866689840952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,1,128,0,1,fp8,fp8,0,0.03823466598987579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,2,128,0,1,float16,float16,0,0.04543999830881754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,2,128,0,1,float16,fp8,0,0.04493333399295807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,2,128,0,1,fp8,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,4,128,0,1,float16,float16,0,0.04659733176231384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,4,128,0,1,float16,fp8,0,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,12,4,128,0,1,fp8,fp8,0,0.043050666650136314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,12,128,0,1,float16,float16,0,0.03252800057331721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,12,128,0,1,float16,fp8,0,0.032101333141326904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,12,128,0,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,1,128,0,1,float16,float16,0,0.030037333567937214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,1,128,0,1,float16,fp8,0,0.030005333324273426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,1,128,0,1,fp8,fp8,0,0.027813332776228588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,2,128,0,1,float16,float16,0,0.029909332593282063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,2,128,0,1,float16,fp8,0,0.031002665559450786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,2,128,0,1,fp8,fp8,0,0.027989332874615986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,4,128,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,4,128,0,1,float16,fp8,0,0.031045332551002502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,12,4,128,0,1,fp8,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,12,128,0,1,float16,float16,0,0.023951999843120575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,12,128,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,12,128,0,1,fp8,fp8,0,0.022789334257443745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,2,128,0,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,1,128,0,1,float16,float16,0,0.022090665996074677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,4,128,0,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,1,128,0,1,float16,fp8,0,0.02253866692384084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,1,128,0,1,fp8,fp8,0,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,2,128,0,1,float16,float16,0,0.022410665949185688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,2,128,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,4,128,0,1,float16,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,1,128,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,12,4,128,0,1,fp8,fp8,0,0.02223466585079829
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,12,128,0,1,float16,float16,0,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,12,128,0,1,float16,fp8,0,0.02032533288002014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,12,128,0,1,fp8,fp8,0,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,1,128,0,1,float16,float16,0,0.018768000106016796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,2,128,0,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,4,128,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,4,128,0,1,fp8,fp8,0,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,1,128,0,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,2,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,2,128,0,1,fp8,fp8,0,0.018085333208243053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,12,4,128,0,1,float16,float16,0,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,12,128,0,1,float16,float16,0,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,12,128,0,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,12,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,1,128,0,1,float16,float16,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,1,128,0,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,1,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,2,128,0,1,float16,float16,0,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,2,128,0,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,2,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,4,128,0,1,float16,float16,0,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,1,128,0,1,float16,float16,0,0.235642671585083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,4,128,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,12,4,128,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,1,128,0,1,float16,fp8,0,0.23362666368484497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,1,128,0,1,fp8,fp8,0,0.21618133783340454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,2,128,0,1,float16,fp8,0,0.23758933941523233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,4,128,0,1,fp8,fp8,0,0.2648959954579671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,2,128,0,1,float16,float16,0,0.23959465821584067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,2,128,0,1,fp8,fp8,0,0.2370133399963379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,4,128,0,1,float16,float16,0,0.24980799357096353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,12,4,128,0,1,float16,fp8,0,0.24823999404907227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,12,128,0,1,float16,float16,0,0.17171200116475424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,12,128,0,1,float16,fp8,0,0.16149333119392395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,2,128,0,1,float16,fp8,0,0.1306880017121633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,12,128,0,1,fp8,fp8,0,0.14483732978502908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,1,128,0,1,float16,float16,0,0.12773866454760233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,1,128,0,1,float16,fp8,0,0.1288266678651174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,4,128,0,1,float16,fp8,0,0.13845866918563843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,12,128,0,1,float16,float16,0,0.092549333969752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,1,128,0,1,fp8,fp8,0,0.11782933274904887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,2,128,0,1,float16,float16,0,0.13024533788363138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,2,128,0,1,fp8,fp8,0,0.12379200259844463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,4,128,0,1,float16,float16,0,0.13690666357676187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,12,4,128,0,1,fp8,fp8,0,0.1365386644999186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,12,128,0,1,float16,fp8,0,0.09080533186594646
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,1,128,0,1,float16,float16,0,0.07083733379840851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,12,128,0,1,fp8,fp8,0,0.08171733220418294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,1,128,0,1,float16,fp8,0,0.0714026689529419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,1,128,0,1,fp8,fp8,0,0.06653333206971486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,2,128,0,1,float16,float16,0,0.07357866565386455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,2,128,0,1,float16,fp8,0,0.07311999797821045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,2,128,0,1,fp8,fp8,0,0.07041599849859874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,4,128,0,1,float16,float16,0,0.07745066781838734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,4,128,0,1,float16,fp8,0,0.07638933261235555
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,12,4,128,0,1,fp8,fp8,0,0.07610666751861572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,12,128,0,1,float16,float16,0,0.050767997900644936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,12,128,0,1,float16,fp8,0,0.049973333875338234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,12,128,0,1,fp8,fp8,0,0.04752533137798309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,1,128,0,1,float16,float16,0,0.04162133236726125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,1,128,0,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,1,128,0,1,fp8,fp8,0,0.039120001097520195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,2,128,0,1,float16,float16,0,0.042261332273483276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,2,128,0,1,float16,fp8,0,0.042853335539499916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,2,128,0,1,fp8,fp8,0,0.04038933416207632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,4,128,0,1,float16,float16,0,0.04427200059096018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,1,128,0,1,float16,float16,0,0.03028800090154012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,4,128,0,1,float16,fp8,0,0.04528533418973287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,12,4,128,0,1,fp8,fp8,0,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,12,128,0,1,float16,float16,0,0.0321066677570343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,12,128,0,1,float16,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,12,128,0,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,1,128,0,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,1,128,0,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,2,128,0,1,float16,float16,0,0.03089066594839096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,2,128,0,1,float16,fp8,0,0.030741333961486816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,2,128,0,1,fp8,fp8,0,0.02703999976317088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,4,128,0,1,float16,float16,0,0.030879999200503033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,12,128,0,1,fp8,fp8,0,0.022895999252796173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,4,128,0,1,float16,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,12,4,128,0,1,fp8,fp8,0,0.028783999383449554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,12,128,0,1,float16,float16,0,0.022229333718617756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,12,128,0,1,float16,fp8,0,0.02367999901374181
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,1,128,0,1,float16,float16,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,1,128,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,1,128,0,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,2,128,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,2,128,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,2,128,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,12,128,0,1,float16,fp8,0,0.01825599993268649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,4,128,0,1,float16,float16,0,0.02266666789849599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,4,128,0,1,float16,fp8,0,0.022853332261244457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,12,4,128,0,1,fp8,fp8,0,0.021727999051411945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,12,128,0,1,float16,float16,0,0.017952000101407368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,12,128,0,1,fp8,fp8,0,0.01829333355029424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,1,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,1,128,0,1,float16,fp8,0,0.017877332866191864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,1,128,0,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,2,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,2,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,2,128,0,1,fp8,fp8,0,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,4,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,4,128,0,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,12,4,128,0,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,12,128,0,1,float16,float16,0,0.01770666614174843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,12,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,12,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,1,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,2,128,0,1,fp8,fp8,0,0.017621333400408428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,1,128,0,1,float16,fp8,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,1,128,0,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,2,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,2,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,4,128,0,1,float16,float16,0,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,4,128,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,12,4,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,12,128,0,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,12,128,0,1,float16,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,12,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,1,128,0,1,float16,float16,0,0.01637866720557213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,1,128,0,1,float16,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,1,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,2,128,0,1,float16,float16,0,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,2,128,0,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,2,128,0,1,fp8,fp8,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,4,128,0,1,float16,float16,0,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,4,128,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,12,4,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,1,128,0,1,float16,fp8,0,0.14509866635004678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,1,128,0,1,float16,float16,0,0.14524799585342407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,1,128,0,1,fp8,fp8,0,0.16105066736539206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,2,128,0,1,float16,float16,0,0.14645866552988687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,2,128,0,1,float16,fp8,0,0.14590400457382202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,2,128,0,1,fp8,fp8,0,0.16901866594950357
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,4,128,0,1,float16,float16,0,0.1529813309510549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,4,128,0,1,float16,fp8,0,0.15240533153216043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,12,128,0,1,float16,fp8,0,0.09431999921798706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,12,4,128,0,1,fp8,fp8,0,0.1808799902598063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,12,128,0,1,float16,float16,0,0.0981119970480601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,12,128,0,1,fp8,fp8,0,0.10169066985448201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,1,128,0,1,float16,float16,0,0.07773866752783458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,1,128,0,1,float16,fp8,0,0.07909333209196727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,1,128,0,1,fp8,fp8,0,0.09076799949010213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,2,128,0,1,float16,float16,0,0.08080000181992848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,2,128,0,1,float16,fp8,0,0.07914666831493378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,2,128,0,1,fp8,fp8,0,0.09341866771380107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,4,128,0,1,float16,float16,0,0.08354133367538452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,4,128,0,1,float16,fp8,0,0.08412266770998637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,12,4,128,0,1,fp8,fp8,0,0.09759466846783955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,12,128,0,1,float16,float16,0,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,12,128,0,1,float16,fp8,0,0.05337599913279215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,12,128,0,1,fp8,fp8,0,0.05972266693909963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,1,128,0,1,float16,float16,0,0.04401599864164988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,1,128,0,1,float16,fp8,0,0.044826666514078774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,1,128,0,1,fp8,fp8,0,0.05075199902057648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,2,128,0,1,float16,float16,0,0.0450133333603541
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,2,128,0,1,float16,fp8,0,0.045125335454940796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,2,128,0,1,fp8,fp8,0,0.053039997816085815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,4,128,0,1,float16,float16,0,0.048021331429481506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,1,128,0,1,float16,float16,0,0.02829333394765854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,4,128,0,1,float16,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,12,4,128,0,1,fp8,fp8,0,0.05470400055249532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,12,128,0,1,float16,float16,0,0.030933332939942677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,12,128,0,1,float16,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,12,128,0,1,fp8,fp8,0,0.03365333378314972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,1,128,0,1,float16,fp8,0,0.028746667007605236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,1,128,0,1,fp8,fp8,0,0.03177600105603536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,2,128,0,1,float16,float16,0,0.028517333169778187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,2,128,0,1,float16,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,2,128,0,1,fp8,fp8,0,0.031701333820819855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,4,128,0,1,float16,float16,0,0.029824001093705494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,4,128,0,1,float16,fp8,0,0.03054933249950409
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,12,4,128,0,1,fp8,fp8,0,0.03293866664171219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,12,128,0,1,float16,float16,0,0.02436800052722295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,12,128,0,1,float16,fp8,0,0.02474133421977361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,12,128,0,1,fp8,fp8,0,0.02566933383544286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,1,128,0,1,float16,float16,0,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,1,128,0,1,float16,fp8,0,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,1,128,0,1,fp8,fp8,0,0.024725332856178284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,2,128,0,1,float16,float16,0,0.02364266663789749
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,2,128,0,1,float16,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,2,128,0,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,4,128,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,4,128,0,1,float16,fp8,0,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,12,4,128,0,1,fp8,fp8,0,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,1,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,12,128,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,12,128,0,1,float16,fp8,0,0.017738666385412216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,12,128,0,1,fp8,fp8,0,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,1,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,1,128,0,1,fp8,fp8,0,0.01773333301146825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,2,128,0,1,float16,float16,0,0.01669866715868314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,2,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,2,128,0,1,fp8,fp8,0,0.018218666315078735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,4,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,4,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,12,4,128,0,1,fp8,fp8,0,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,12,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,12,128,0,1,float16,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,12,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,1,128,0,1,float16,float16,0,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,1,128,0,1,float16,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,4,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,1,128,0,1,fp8,fp8,0,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,2,128,0,1,float16,float16,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,2,128,0,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,2,128,0,1,fp8,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,4,128,0,1,float16,fp8,0,0.016037333756685257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,12,4,128,0,1,fp8,fp8,0,0.017610666652520496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,12,128,0,1,float16,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,12,128,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,12,128,0,1,fp8,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,1,128,0,1,float16,float16,0,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,1,128,0,1,fp8,fp8,0,0.01597333326935768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,1,128,0,1,float16,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,2,128,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,2,128,0,1,float16,fp8,0,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,2,128,0,1,fp8,fp8,0,0.0163680004576842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,4,128,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,4,128,0,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,12,4,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,12,128,0,1,float16,float16,0,0.014271999398867289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,12,128,0,1,float16,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,12,128,0,1,fp8,fp8,0,0.016208000481128693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,1,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,1,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,1,128,0,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,4,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,2,128,0,1,float16,float16,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,2,128,0,1,float16,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,2,128,0,1,fp8,fp8,0,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,4,128,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,12,4,128,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,1,128,0,1,float16,float16,0,0.10126933455467224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,1,128,0,1,float16,fp8,0,0.10240532954533894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,1,128,0,1,fp8,fp8,0,0.13083733121554056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,2,128,0,1,float16,float16,0,0.10368000467618306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,2,128,0,1,float16,fp8,0,0.10472533106803894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,2,128,0,1,fp8,fp8,0,0.13471999764442444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,4,128,0,1,float16,float16,0,0.10823999842007954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,4,128,0,1,float16,fp8,0,0.10872532924016316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,12,4,128,0,1,fp8,fp8,0,0.13913066188494363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,12,128,0,1,float16,float16,0,0.06678933401902516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,12,128,0,1,float16,fp8,0,0.06524266799290974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,12,128,0,1,fp8,fp8,0,0.07981333136558533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,1,128,0,1,float16,float16,0,0.057589332262674965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,1,128,0,1,float16,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,4,128,0,1,float16,fp8,0,0.05907199780146281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,1,128,0,1,fp8,fp8,0,0.07143466671307881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,2,128,0,1,float16,float16,0,0.05801600217819214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,2,128,0,1,float16,fp8,0,0.056874667604764305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,2,128,0,1,fp8,fp8,0,0.0746666689713796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,4,128,0,1,float16,float16,0,0.05938666562239329
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,12,4,128,0,1,fp8,fp8,0,0.0784746656815211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,12,128,0,1,float16,float16,0,0.03605866680542628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,12,128,0,1,float16,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,12,128,0,1,fp8,fp8,0,0.04474133253097534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,1,128,0,1,float16,float16,0,0.034485332667827606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,1,128,0,1,float16,fp8,0,0.033770665526390076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,1,128,0,1,fp8,fp8,0,0.04243200023969015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,2,128,0,1,float16,float16,0,0.0347626656293869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,2,128,0,1,float16,fp8,0,0.03498133271932602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,2,128,0,1,fp8,fp8,0,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,4,128,0,1,float16,float16,0,0.03499733408292135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,4,128,0,1,float16,fp8,0,0.03514133393764496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,12,4,128,0,1,fp8,fp8,0,0.04460266729195913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,12,128,0,1,float16,float16,0,0.024720000723997753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,12,128,0,1,float16,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,12,128,0,1,fp8,fp8,0,0.03014400104681651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,1,128,0,1,float16,float16,0,0.0242399995525678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,1,128,0,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,1,128,0,1,fp8,fp8,0,0.02845866729815801
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,2,128,0,1,float16,float16,0,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,2,128,0,1,float16,fp8,0,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,2,128,0,1,fp8,fp8,0,0.029050665597120922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,4,128,0,1,float16,float16,0,0.024608001112937927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,4,128,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,12,4,128,0,1,fp8,fp8,0,0.0303413321574529
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,12,128,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,12,128,0,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,12,128,0,1,fp8,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,1,128,0,1,float16,float16,0,0.01851733277241389
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,1,128,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,1,128,0,1,fp8,fp8,0,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,2,128,0,1,float16,float16,0,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,2,128,0,1,float16,fp8,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,2,128,0,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,12,128,0,1,float16,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,4,128,0,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,4,128,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,12,4,128,0,1,fp8,fp8,0,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,12,128,0,1,float16,float16,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,12,128,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,1,128,0,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,1,128,0,1,float16,fp8,0,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,1,128,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,2,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,2,128,0,1,float16,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,2,128,0,1,fp8,fp8,0,0.017850667238235474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,4,128,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,4,128,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,12,4,128,0,1,fp8,fp8,0,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,12,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,12,128,0,1,float16,fp8,0,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,12,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,1,128,0,1,float16,float16,0,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,1,128,0,1,float16,fp8,0,0.01443733274936676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,1,128,0,1,fp8,fp8,0,0.016458666572968166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,2,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,2,128,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,2,128,0,1,fp8,fp8,0,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,4,128,0,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,4,128,0,1,float16,float16,0,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,12,4,128,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,1,128,0,1,fp8,fp8,0,0.01646399994691213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,12,128,0,1,float16,float16,0,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,2,128,0,1,float16,fp8,0,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,12,128,0,1,float16,fp8,0,0.014495999862750372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,12,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,1,128,0,1,float16,float16,0,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,1,128,0,1,float16,fp8,0,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,2,128,0,1,float16,float16,0,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,12,128,0,1,float16,fp8,0,0.01452800010641416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,2,128,0,1,fp8,fp8,0,0.01640533283352852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,4,128,0,1,float16,float16,0,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,4,128,0,1,float16,fp8,0,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,12,4,128,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,12,128,0,1,float16,float16,0,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,12,128,0,1,fp8,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,1,128,0,1,float16,float16,0,0.013749333719412485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,1,128,0,1,float16,fp8,0,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,1,128,0,1,fp8,fp8,0,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,4,128,0,1,fp8,fp8,0,0.016271999726692837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,2,128,0,1,float16,float16,0,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,2,128,0,1,float16,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,2,128,0,1,fp8,fp8,0,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,4,128,0,1,float16,float16,0,0.014250667144854864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,12,4,128,0,1,float16,fp8,0,0.01403733342885971
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,1,128,0,1,float16,float16,0,0.08442133665084839
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,1,128,0,1,float16,fp8,0,0.08425066868464152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,1,128,0,1,fp8,fp8,0,0.11480533083279927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,2,128,0,1,float16,float16,0,0.08400000135103862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,2,128,0,1,float16,fp8,0,0.08459200461705525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,2,128,0,1,fp8,fp8,0,0.11571733156840007
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,4,128,0,1,float16,float16,0,0.08741866548856099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,4,128,0,1,float16,fp8,0,0.08568533261617024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,12,128,0,1,float16,float16,0,0.050250664353370667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,12,4,128,0,1,fp8,fp8,0,0.12044266859690349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,12,128,0,1,float16,fp8,0,0.0498986691236496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,12,128,0,1,fp8,fp8,0,0.06688533226648967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,1,128,0,1,float16,float16,0,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,1,128,0,1,float16,fp8,0,0.04754666487375895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,1,128,0,1,fp8,fp8,0,0.06525866687297821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,2,128,0,1,float16,float16,0,0.04781333108743032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,2,128,0,1,float16,fp8,0,0.048469334840774536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,2,128,0,1,fp8,fp8,0,0.06458133459091187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,12,128,0,1,fp8,fp8,0,0.04114133367935816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,4,128,0,1,float16,float16,0,0.0491946687301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,4,128,0,1,float16,fp8,0,0.0483893354733785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,12,4,128,0,1,fp8,fp8,0,0.06566933294137318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,12,128,0,1,float16,float16,0,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,12,128,0,1,float16,fp8,0,0.032218667368094124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,1,128,0,1,float16,float16,0,0.03081600119670232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,1,128,0,1,fp8,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,1,128,0,1,float16,fp8,0,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,2,128,0,1,float16,float16,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,2,128,0,1,float16,fp8,0,0.030581332743167877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,2,128,0,1,fp8,fp8,0,0.04036800066630045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,4,128,0,1,float16,float16,0,0.031093334158261616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,4,128,0,1,float16,fp8,0,0.03218133250872294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,1,128,0,1,float16,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,12,4,128,0,1,fp8,fp8,0,0.041008000572522484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,12,128,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,12,128,0,1,float16,fp8,0,0.0216799999276797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,12,128,0,1,fp8,fp8,0,0.02666666607062022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,1,128,0,1,float16,float16,0,0.02021866664290428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,1,128,0,1,fp8,fp8,0,0.025781333446502686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,2,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,2,128,0,1,float16,fp8,0,0.021850667893886566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,2,128,0,1,fp8,fp8,0,0.025797332326571148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,4,128,0,1,float16,float16,0,0.020799999435742695
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,4,128,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,12,4,128,0,1,fp8,fp8,0,0.025962665677070618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,12,128,0,1,float16,float16,0,0.017786666750907898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,12,128,0,1,float16,fp8,0,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,12,128,0,1,fp8,fp8,0,0.021850667893886566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,1,128,0,1,float16,float16,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,1,128,0,1,float16,fp8,0,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,1,128,0,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,4,128,0,1,float16,fp8,0,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,2,128,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,2,128,0,1,float16,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,2,128,0,1,fp8,fp8,0,0.021935999393463135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,4,128,0,1,float16,float16,0,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,12,4,128,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,12,128,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,12,128,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,12,128,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,1,128,0,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,1,128,0,1,float16,fp8,0,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,1,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,2,128,0,1,float16,float16,0,0.01434133326013883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,2,128,0,1,float16,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,2,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,4,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,4,128,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,12,4,128,0,1,fp8,fp8,0,0.017711999515692394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,12,128,0,1,float16,float16,0,0.01452800010641416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,12,128,0,1,float16,fp8,0,0.0143306665122509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,12,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,1,128,0,1,float16,float16,0,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,1,128,0,1,float16,fp8,0,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,1,128,0,1,fp8,fp8,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,2,128,0,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,2,128,0,1,float16,fp8,0,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,2,128,0,1,fp8,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,4,128,0,1,float16,float16,0,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,4,128,0,1,float16,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,12,4,128,0,1,fp8,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,12,128,0,1,float16,float16,0,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,12,128,0,1,float16,fp8,0,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,12,128,0,1,fp8,fp8,0,0.016623999923467636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,1,128,0,1,float16,float16,0,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,1,128,0,1,float16,fp8,0,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,1,128,0,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,2,128,0,1,float16,float16,0,0.013370666652917862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,2,128,0,1,float16,fp8,0,0.014256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,2,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,4,128,0,1,float16,float16,0,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,4,128,0,1,float16,fp8,0,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,1,128,0,1,float16,fp8,0,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,12,4,128,0,1,fp8,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,12,128,0,1,float16,float16,0,0.014053333550691605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,12,128,0,1,float16,fp8,0,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,12,128,0,1,fp8,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,1,128,0,1,float16,float16,0,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,1,128,0,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,2,128,0,1,float16,float16,0,0.013610667238632837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,2,128,0,1,float16,fp8,0,0.01379199946920077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,2,128,0,1,fp8,fp8,0,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,4,128,0,1,float16,float16,0,0.014085333794355392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,4,128,0,1,float16,fp8,0,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,12,4,128,0,1,fp8,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,128,0,1,fp8,fp8,0,5.617727915445964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,128,0,1,fp8,fp8,0,5.6922562917073565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,float16,0,8.379002888997396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,fp8,0,8.379034678141275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,float16,0,8.294431686401367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,fp8,0,8.541237513224283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,float16,0,8.443685531616211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,fp8,0,8.490554809570312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,float16,0,4.303658803304036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,8,4,128,0,1,fp8,fp8,0,5.678415934244792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,128,0,1,fp8,fp8,0,2.8710400263468423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,128,0,1,fp8,fp8,0,2.782522519429525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,fp8,0,4.307749430338542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,float16,0,4.098304112752278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,fp8,0,4.3320051829020185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,float16,0,4.291482607523601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,128,0,1,fp8,fp8,0,2.8231894175211587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,float16,0,2.1477333704630532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,128,0,1,fp8,fp8,0,1.5267252922058105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,fp8,0,4.350037256876628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,128,0,1,fp8,fp8,0,2.8175840377807617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,fp8,0,2.179621378580729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,float16,0,4.32093874613444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,fp8,0,4.280799865722656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,float16,0,2.0803893407185874
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,128,0,1,fp8,fp8,0,1.4124266306559246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,fp8,0,2.110383987426758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,float16,0,2.188613255818685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,fp8,0,2.093754609425863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,2,128,0,1,fp8,fp8,0,1.4867946306864421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,128,0,1,fp8,fp8,0,1.4499146143595378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,float16,0,1.1190400123596191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,float16,0,2.058682600657145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,fp8,0,1.1287519931793213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,fp8,0,2.1024853388468423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,8,128,0,1,fp8,fp8,0,0.7387680212656657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,float16,0,1.1222346623738606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,fp8,0,1.111397345860799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,1,128,0,1,fp8,fp8,0,0.7001173496246338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,128,0,1,fp8,fp8,0,0.7113973299662272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,float16,0,1.117685317993164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,fp8,0,1.1016213099161785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,float16,0,1.122933308283488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,128,0,1,fp8,fp8,0,0.7064106464385986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,fp8,0,1.1191199620564778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,128,0,1,fp8,fp8,0,3.27945613861084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,float16,0,4.926218668619792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,128,0,1,fp8,fp8,0,3.321455955505371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,fp8,0,4.876559893290202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,float16,0,4.894000053405762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,fp8,0,4.916714668273926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,float16,0,4.967813491821289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,128,0,1,fp8,fp8,0,1.672101338704427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,float16,0,2.463215986887614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,fp8,0,2.533573309580485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,128,0,1,fp8,fp8,0,3.3251307805379233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,128,0,1,fp8,fp8,0,1.6174079577128093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,float16,0,2.3940693537394204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,fp8,0,2.3895519574483237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,fp8,0,4.956458727518718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,float16,0,2.401024023691813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,128,0,1,fp8,fp8,0,1.6445706685384114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,fp8,0,2.4240800539652505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,float16,0,1.2388426462809246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,float16,0,2.4595413208007812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,128,0,1,fp8,fp8,0,1.6492373148600261
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,fp8,0,1.259663979212443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,8,128,0,1,fp8,fp8,0,0.8506080309549967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,fp8,0,2.46342404683431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,128,0,1,fp8,fp8,0,0.7970506350199381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,fp8,0,1.3071520328521729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,float16,0,1.2134666442871094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,float16,0,1.2740800380706787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,128,0,1,fp8,fp8,0,0.7978293100992838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,fp8,0,1.2185813585917156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,float16,0,1.2088106473286946
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,128,0,1,fp8,fp8,0,0.8040160338083903
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,float16,0,0.5987146695454916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,fp8,0,1.2251253128051758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,fp8,0,0.5917173226674398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,8,128,0,1,fp8,fp8,0,0.4416853189468384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,float16,0,0.5796106656392416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,fp8,0,0.5815840164820353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,fp8,0,0.5841386715571085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,1,128,0,1,fp8,fp8,0,0.4230293432871501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,float16,0,0.5788160165150961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,2,128,0,1,fp8,fp8,0,0.42580266793568927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,float16,0,0.583733320236206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,fp8,0,0.5872586568196615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,8,4,128,0,1,fp8,fp8,0,0.43294934431711835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,128,0,1,fp8,fp8,0,2.2577385902404785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,float16,0,3.323589324951172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,fp8,0,3.4519999821980796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,128,0,1,fp8,fp8,0,2.2969120343526206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,float16,0,3.50056521097819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,fp8,0,3.4744532903035483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,float16,0,3.4103358586629233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,128,0,1,fp8,fp8,0,2.252229372660319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,fp8,0,3.458762804667155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,float16,0,1.7283093134562175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,128,0,1,fp8,fp8,0,1.2429920037587483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,fp8,0,1.7233440081278484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,128,0,1,fp8,fp8,0,1.1951733430226643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,float16,0,1.7033653259277344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,fp8,0,1.6591040293375652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,float16,0,1.6894346872965496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,128,0,1,fp8,fp8,0,1.1700426737467449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,fp8,0,1.6611413955688477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,float16,0,1.6328320503234863
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,128,0,1,fp8,fp8,0,1.1733226776123047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,128,0,1,fp8,fp8,0,0.6154773235321045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,float16,0,0.895365317662557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,fp8,0,1.661253293355306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,fp8,0,0.8967786629994711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,float16,0,0.8782453536987305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,float16,0,0.878922700881958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,fp8,0,0.8777013619740804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,1,128,0,1,fp8,fp8,0,0.6399360100428263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,fp8,0,0.8791680335998535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,float16,0,0.9036533037821451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,2,128,0,1,fp8,fp8,0,0.57860799630483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,float16,0,0.4344853162765503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,128,0,1,fp8,fp8,0,0.5865600109100342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,fp8,0,0.8886666297912598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,fp8,0,0.4352960189183553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,8,128,0,1,fp8,fp8,0,0.32871466875076294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,float16,0,0.4243573347727458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,fp8,0,0.4303733507792155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,1,128,0,1,fp8,fp8,0,0.3153546651204427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,float16,0,0.42763201395670575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,fp8,0,0.43054401874542236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,2,128,0,1,fp8,fp8,0,0.3200640082359314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,float16,0,0.4297226667404175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,128,0,1,fp8,fp8,0,0.3187573353449504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,fp8,0,0.4319146474202474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,128,0,1,fp8,fp8,0,2.9401652018229165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,float16,0,4.394021352132161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,128,0,1,fp8,fp8,0,3.0101760228474936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,fp8,0,4.570650736490886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,float16,0,4.487546602884929
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,fp8,0,4.480928103129069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,float16,0,4.410762786865234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,float16,0,2.3217546145121255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,128,0,1,fp8,fp8,0,1.588479995727539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,fp8,0,2.29695463180542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,128,0,1,fp8,fp8,0,3.1293067932128906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,fp8,0,4.527935981750488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,float16,0,2.1830612818400064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,fp8,0,2.15283203125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,1,128,0,1,fp8,fp8,0,1.5217866897583008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,128,0,1,fp8,fp8,0,1.5100266138712566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,float16,0,2.1561226844787598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,fp8,0,2.212927977244059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,float16,0,1.109984000523885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,float16,0,2.2214506467183432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,128,0,1,fp8,fp8,0,1.5337920188903809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,fp8,0,2.2454026540120444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,128,0,1,fp8,fp8,0,0.8570293585459391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,fp8,0,1.1401173273722331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,fp8,0,1.0901866753896077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,128,0,1,fp8,fp8,0,0.8235146999359131
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,float16,0,1.1880799929300945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,128,0,1,fp8,fp8,0,0.7937866846720377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,fp8,0,1.1109813054402669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,float16,0,1.0929706891377766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,128,0,1,fp8,fp8,0,0.8022879759470621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,float16,0,1.1119999885559082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,fp8,0,1.1034986972808838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,float16,0,0.6200906833012899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,fp8,0,0.616096019744873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,8,128,0,1,fp8,fp8,0,0.41201599438985187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,float16,0,0.6019306580225626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,128,0,1,fp8,fp8,0,0.3934933344523112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,fp8,0,0.6026133298873901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,float16,0,0.597653349240621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,fp8,0,0.6070133447647095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,2,128,0,1,fp8,fp8,0,0.3955093224843343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,float16,0,0.6066773335138956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,fp8,0,0.6080533266067505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,8,4,128,0,1,fp8,fp8,0,0.4024159908294678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,float16,0,0.2911093235015869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,128,0,1,fp8,fp8,0,0.2179093360900879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,fp8,0,0.29340267181396484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,fp8,0,0.28862400849660236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,8,128,0,1,fp8,fp8,0,0.2254613240559896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,float16,0,0.2881973385810852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,float16,0,0.2842453320821126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,fp8,0,0.2847573359807332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,float16,0,0.28651199738184613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,2,128,0,1,fp8,fp8,0,0.21846399704615274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,fp8,0,0.2903413375218709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,8,4,128,0,1,fp8,fp8,0,0.22206399838129678
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,128,0,1,fp8,fp8,0,1.7876159350077312
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,float16,0,2.473994731903076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,fp8,0,2.4425013860066733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,128,0,1,fp8,fp8,0,1.7804907162984211
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,float16,0,2.58298126856486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,fp8,0,2.461712042490641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,float16,0,2.572634696960449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,fp8,0,2.6514506340026855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,128,0,1,fp8,fp8,0,0.9637760321299235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,float16,0,1.320314645767212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,fp8,0,1.3254186312357585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,8,4,128,0,1,fp8,fp8,0,1.7919893264770508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,float16,0,1.275498628616333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,fp8,0,1.2573280334472656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,1,128,0,1,fp8,fp8,0,0.9401386578877767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,fp8,0,1.2561493714650471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,float16,0,1.28766934076945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,2,128,0,1,fp8,fp8,0,0.9338026841481527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,float16,0,1.2966986497243245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,fp8,0,1.262613296508789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,float16,0,0.6843626499176025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,8,4,128,0,1,fp8,fp8,0,0.9353226820627848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,fp8,0,0.6879733403523763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,fp8,0,0.6662773291269938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,128,0,1,fp8,fp8,0,0.45819199085235596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,8,128,0,1,fp8,fp8,0,0.4865226745605469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,float16,0,0.6731359958648682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,float16,0,0.6713226636250814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,fp8,0,0.6723039944966634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,2,128,0,1,fp8,fp8,0,0.46084264914194745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,float16,0,0.6710346539815267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,128,0,1,fp8,fp8,0,0.4721866846084595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,fp8,0,0.6760213375091553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,fp8,0,0.3219146728515625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,float16,0,0.3317546645800273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,float16,0,0.32313599189122516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,fp8,0,0.33832534154256183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,8,128,0,1,fp8,fp8,0,0.2587786714235942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,float16,0,0.32201600074768066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,1,128,0,1,fp8,fp8,0,0.24688533941904703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,fp8,0,0.3235146601994832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,2,128,0,1,fp8,fp8,0,0.24814399083455405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,float16,0,0.3254186709721883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,fp8,0,0.3307039936383565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,8,4,128,0,1,fp8,fp8,0,0.25405333439509076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,float16,0,0.19220266739527384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,fp8,0,0.1945706605911255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,8,128,0,1,fp8,fp8,0,0.15293866395950317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,float16,0,0.18837867180506387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,fp8,0,0.18996800978978476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,128,0,1,fp8,fp8,0,0.14615466197331747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,1,128,0,1,fp8,fp8,0,0.14474133650461832
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,float16,0,0.18954666455586752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,fp8,0,0.18877333402633667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,float16,0,0.19157334168752035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,fp8,0,0.1902880072593689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,8,4,128,0,1,fp8,fp8,0,0.1497119963169098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,128,0,1,fp8,fp8,0,1.7457493146260579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,float16,0,2.473877271016439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,fp8,0,2.3655306498209634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,float16,0,2.486858685811361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,128,0,1,fp8,fp8,0,1.7605652809143066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,fp8,0,2.3756799697875977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,float16,0,2.4124107360839844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,128,0,1,fp8,fp8,0,0.9715200265248617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,fp8,0,1.2889973322550456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,float16,0,1.2674132982889812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,float16,0,1.2083306312561035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,128,0,1,fp8,fp8,0,1.809541384379069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,fp8,0,2.4487573305765786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,fp8,0,1.2113280296325684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,1,128,0,1,fp8,fp8,0,0.9014773368835449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,float16,0,1.2230933507283528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,128,0,1,fp8,fp8,0,0.9106132984161377
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,fp8,0,1.2134239673614502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,float16,0,1.2163039843241374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,128,0,1,fp8,fp8,0,0.9358719984690348
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,fp8,0,1.2631999651590984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,float16,0,0.652944008509318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,fp8,0,0.6672159830729166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,8,128,0,1,fp8,fp8,0,0.5071359872817993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,float16,0,0.6255893309911092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,128,0,1,fp8,fp8,0,0.4738773504892985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,fp8,0,0.6248053312301636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,128,0,1,fp8,fp8,0,0.48978666464487713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,float16,0,0.6257280111312866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,fp8,0,0.6360106468200684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,2,128,0,1,fp8,fp8,0,0.47722665468851727
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,float16,0,0.6278666655222574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,fp8,0,0.6386826833089193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,float16,0,0.3571946620941162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,fp8,0,0.35844798882802326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,float16,0,0.34274665514628094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,8,128,0,1,fp8,fp8,0,0.26151466369628906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,float16,0,0.34277331829071045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,fp8,0,0.34670400619506836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,1,128,0,1,fp8,fp8,0,0.2399946649869283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,fp8,0,0.34969067573547363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,2,128,0,1,fp8,fp8,0,0.2432373364766439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,float16,0,0.3482186794281006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,fp8,0,0.35117332140604657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,8,4,128,0,1,fp8,fp8,0,0.24996799230575562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,float16,0,0.17323199907938638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,fp8,0,0.17665600776672363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,8,128,0,1,fp8,fp8,0,0.14428800344467163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,float16,0,0.16672533750534058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,fp8,0,0.1664426624774933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,1,128,0,1,fp8,fp8,0,0.13382400075594583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,float16,0,0.16577600439389548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,float16,0,0.09782399733861287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,fp8,0,0.16902399063110352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,2,128,0,1,fp8,fp8,0,0.1351040005683899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,float16,0,0.16978132724761963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,fp8,0,0.1695573329925537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,8,4,128,0,1,fp8,fp8,0,0.1381333371003469
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,fp8,0,0.09868799646695454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,8,128,0,1,fp8,fp8,0,0.08342400193214417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,float16,0,0.09525866309801738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,fp8,0,0.09610666831334432
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,1,128,0,1,fp8,fp8,0,0.07772266864776611
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,128,0,1,fp8,fp8,0,0.08032000064849854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,float16,0,0.09597333272298177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,fp8,0,0.09605333209037781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,2,128,0,1,fp8,fp8,0,0.07851199805736542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,float16,0,0.09635733564694722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,fp8,0,0.09575466314951579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,float16,0,1.44650665918986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,fp8,0,1.4495946566263835
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,1,128,0,1,fp8,fp8,0,1.110047976175944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,float16,0,1.459210713704427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,fp8,0,1.4637866020202637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,2,128,0,1,fp8,fp8,0,1.1248213450113933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,float16,0,0.7932319641113281
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,float16,0,1.475013256072998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,fp8,0,1.4687573115030925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,8,4,128,0,1,fp8,fp8,0,1.166922648747762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,fp8,0,0.7837493419647217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,8,128,0,1,fp8,fp8,0,0.6208959817886353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,float16,0,0.7634826501210531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,128,0,1,fp8,fp8,0,0.5820373296737671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,fp8,0,0.740511973698934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,float16,0,0.7482240200042725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,fp8,0,0.7513706684112549
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,2,128,0,1,fp8,fp8,0,0.5854613383611044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,float16,0,0.7505866686503092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,fp8,0,0.7570986747741699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,8,4,128,0,1,fp8,fp8,0,0.6012426614761353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,float16,0,0.41595200697580975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,fp8,0,0.4182560046513875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,128,0,1,fp8,fp8,0,0.29205866654713947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,8,128,0,1,fp8,fp8,0,0.3227199912071228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,float16,0,0.39397335052490234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,fp8,0,0.3933653434117635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,float16,0,0.3957013289133708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,fp8,0,0.39724798997243244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,2,128,0,1,fp8,fp8,0,0.29601067304611206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,float16,0,0.4023413260777791
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,fp8,0,0.4055200020472209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,8,4,128,0,1,fp8,fp8,0,0.30612800518671673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,float16,0,0.20637333393096924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,fp8,0,0.21009065707524618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,8,128,0,1,fp8,fp8,0,0.17191465695699057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,float16,0,0.19474667310714722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,fp8,0,0.19621866941452026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,1,128,0,1,fp8,fp8,0,0.15812800327936807
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,float16,0,0.1964799960454305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,fp8,0,0.20257065693537393
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,fp8,0,0.1962933341662089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,2,128,0,1,fp8,fp8,0,0.15970666209856668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,float16,0,0.1978986660639445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,float16,0,0.11008532842000325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,8,4,128,0,1,fp8,fp8,0,0.16431466738382974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,float16,0,0.11713600158691406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,fp8,0,0.12041067083676656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,8,128,0,1,fp8,fp8,0,0.09966400265693665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,fp8,0,0.10994133353233337
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,1,128,0,1,fp8,fp8,0,0.08829866846402486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,float16,0,0.11058132847150166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,fp8,0,0.11309333642323811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,2,128,0,1,fp8,fp8,0,0.09084266424179077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,float16,0,0.11507200201352437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,fp8,0,0.11508267124493916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,8,4,128,0,1,fp8,fp8,0,0.09424533446629842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,fp8,0,0.07434666653474171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,float16,0,0.07331199944019318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,8,128,0,1,fp8,fp8,0,0.0621013343334198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,float16,0,0.0727040022611618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,fp8,0,0.07239466905593872
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,1,128,0,1,fp8,fp8,0,0.05909866591294607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,float16,0,0.07227199772993724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,fp8,0,0.07210666437943776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,2,128,0,1,fp8,fp8,0,0.059605335195859276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,float16,0,0.07221333185831706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,fp8,0,0.07334400216738383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,8,4,128,0,1,fp8,fp8,0,0.060405333836873375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,float16,0,1.5078345934549968
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,fp8,0,1.511013348897298
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,1,128,0,1,fp8,fp8,0,1.2265973091125488
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,float16,0,1.534224033355713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,fp8,0,1.5241066614786785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,2,128,0,1,fp8,fp8,0,1.2284693717956543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,float16,0,0.8261760075887045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,float16,0,1.5547040303548176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,fp8,0,0.8220799763997396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,128,0,1,fp8,fp8,0,1.256773312886556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,8,128,0,1,fp8,fp8,0,0.6896479924519857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,fp8,0,1.567786693572998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,float16,0,0.7786346276601156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,fp8,0,0.7637973626454672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,1,128,0,1,fp8,fp8,0,0.6249333222707113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,float16,0,0.7711413701375326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,fp8,0,0.7707200050354004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,2,128,0,1,fp8,fp8,0,0.6255679925282797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,float16,0,0.7890933354695638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,fp8,0,0.78875199953715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,float16,0,0.4233386516571045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,8,4,128,0,1,fp8,fp8,0,0.6473866701126099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,fp8,0,0.4299039840698242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,8,128,0,1,fp8,fp8,0,0.35884801546732586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,float16,0,0.38997332255045575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,fp8,0,0.39239466190338135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,1,128,0,1,fp8,fp8,0,0.3246293266614278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,float16,0,0.39606932799021405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,fp8,0,0.39695998032887775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,2,128,0,1,fp8,fp8,0,0.329584002494812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,float16,0,0.404639999071757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,fp8,0,0.4068373441696167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,8,4,128,0,1,fp8,fp8,0,0.3404906590779622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,float16,0,0.2293706734975179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,fp8,0,0.2326293389002482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,8,128,0,1,fp8,fp8,0,0.18705066045125326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,float16,0,0.2127093275388082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,128,0,1,fp8,fp8,0,0.16697067022323608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,fp8,0,0.2183039983113607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,1,128,0,1,fp8,fp8,0,0.16577066977818808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,float16,0,0.21704532702763876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,fp8,0,0.21889066696166992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,float16,0,0.22022400299708048
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,fp8,0,0.22418665885925293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,8,4,128,0,1,fp8,fp8,0,0.171615997950236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,float16,0,0.11451199650764465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,fp8,0,0.11778666575749715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,8,128,0,1,fp8,fp8,0,0.10408533612887065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,float16,0,0.10593066612879436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,fp8,0,0.10764799515406291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,1,128,0,1,fp8,fp8,0,0.09051733215649922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,float16,0,0.10748266180356343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,fp8,0,0.10893866419792175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,2,128,0,1,fp8,fp8,0,0.09381866455078125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,float16,0,0.11089066664377849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,fp8,0,0.1113759974638621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,8,4,128,0,1,fp8,fp8,0,0.09798399607340495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,float16,0,0.06471466521422069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,fp8,0,0.06613333523273468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,8,128,0,1,fp8,fp8,0,0.06021333237489065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,float16,0,0.0618399977684021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,fp8,0,0.062447999914487205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,1,128,0,1,fp8,fp8,0,0.053770666321118675
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,float16,0,0.06185600161552429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,fp8,0,0.062165334820747375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,2,128,0,1,fp8,fp8,0,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,float16,0,0.06214933097362518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,fp8,0,0.06314133107662201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,8,4,128,0,1,fp8,fp8,0,0.05668266614278158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,float16,0,0.05310399830341339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,fp8,0,0.05392533540725708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,8,128,0,1,fp8,fp8,0,0.04507733384768168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,float16,0,0.05307200054327647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,fp8,0,0.052757332722345986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,1,128,0,1,fp8,fp8,0,0.043791999419530235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,float16,0,0.053216000398000084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,fp8,0,0.05283733208974203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,2,128,0,1,fp8,fp8,0,0.04452799757321676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,float16,0,0.05178666611512502
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,fp8,0,0.05309866865475973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,8,4,128,0,1,fp8,fp8,0,0.044677332043647766
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,float16,0,0.9477333227793375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,fp8,0,0.9521546363830566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,1,128,0,1,fp8,fp8,0,0.8164213498433431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,float16,0,0.955893357594808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,fp8,0,0.9576853116353353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,2,128,0,1,fp8,fp8,0,0.8255733648935953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,float16,0,0.9668533007303873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,fp8,0,0.9832533200581869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,float16,0,0.5289280017217001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,8,4,128,0,1,fp8,fp8,0,0.8532906373341879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,fp8,0,0.532975991566976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,8,128,0,1,fp8,fp8,0,0.4739573399225871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,float16,0,0.48581333955128986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,fp8,0,0.48695464928944904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,fp8,0,0.4944800138473511
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,1,128,0,1,fp8,fp8,0,0.42368535200754803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,float16,0,0.4888480106989543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,2,128,0,1,fp8,fp8,0,0.4272640148798625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,float16,0,0.49742400646209717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,fp8,0,0.5042986472447714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,float16,0,0.27898667256037396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,8,4,128,0,1,fp8,fp8,0,0.43744532267252606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,fp8,0,0.28353599707285565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,8,128,0,1,fp8,fp8,0,0.24761066834131876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,fp8,0,0.26155199607213336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,float16,0,0.25779734055201214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,fp8,0,0.2601813276608785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,1,128,0,1,fp8,fp8,0,0.21012266476949057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,float16,0,0.26001065969467163
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,2,128,0,1,fp8,fp8,0,0.21357333660125732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,float16,0,0.264629324277242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,fp8,0,0.2693973382314046
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,8,4,128,0,1,fp8,fp8,0,0.22258667151133218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,float16,0,0.14590400457382202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,fp8,0,0.1488800048828125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,8,128,0,1,fp8,fp8,0,0.13036266962687174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,float16,0,0.13121066490809122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,fp8,0,0.13271466890970865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,1,128,0,1,fp8,fp8,0,0.1163146694501241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,float16,0,0.13356799880663553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,fp8,0,0.134661336739858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,2,128,0,1,fp8,fp8,0,0.11680000027020772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,float16,0,0.13547733426094055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,fp8,0,0.1376479963461558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,8,4,128,0,1,fp8,fp8,0,0.12146666646003723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,float16,0,0.0805920014778773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,fp8,0,0.08239999910195668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,8,128,0,1,fp8,fp8,0,0.07529599964618683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,float16,0,0.07364266614119212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,fp8,0,0.07384000221888225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,1,128,0,1,fp8,fp8,0,0.06406933565934499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,float16,0,0.07380799949169159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,fp8,0,0.07487466434637706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,2,128,0,1,fp8,fp8,0,0.06590400139490764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,float16,0,0.07609599828720093
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,fp8,0,0.07889066636562347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,8,4,128,0,1,fp8,fp8,0,0.07026666899522145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,float16,0,0.04800533254941305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,fp8,0,0.04909333089987437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,8,128,0,1,fp8,fp8,0,0.04435733457406362
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,float16,0,0.0458186666170756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,fp8,0,0.04710933566093445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,1,128,0,1,fp8,fp8,0,0.041306667029857635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,float16,0,0.04706666866938273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,fp8,0,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,2,128,0,1,fp8,fp8,0,0.040752001106739044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,float16,0,0.04669866462548574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,fp8,0,0.048026666045188904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,8,4,128,0,1,fp8,fp8,0,0.041109333435694374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,float16,0,0.04368533194065094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,fp8,0,0.044480000933011375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,8,128,0,1,fp8,fp8,0,0.038378665844599404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,float16,0,0.042821332812309265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,fp8,0,0.04301866888999939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,1,128,0,1,fp8,fp8,0,0.03707200040419897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,float16,0,0.0429066667954127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,fp8,0,0.04343999922275543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,2,128,0,1,fp8,fp8,0,0.036831999818483986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,float16,0,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,fp8,0,0.04377066592375437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,8,4,128,0,1,fp8,fp8,0,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,float16,0,0.9753706455230713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,fp8,0,0.975114663441976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,1,128,0,1,fp8,fp8,0,0.8798933029174805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,float16,0,0.9886986414591471
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,fp8,0,0.9848266442616781
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,2,128,0,1,fp8,fp8,0,0.9222293694814047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,float16,0,1.0252959728240967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,float16,0,0.5764000018437704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,128,0,1,fp8,fp8,0,0.9993546803792318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,fp8,0,1.013493299484253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,fp8,0,0.5658026536305746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,8,128,0,1,fp8,fp8,0,0.5200746854146322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,float16,0,0.5010773340861002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,fp8,0,0.4995146592458089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,1,128,0,1,fp8,fp8,0,0.4461599985758464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,float16,0,0.507482647895813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,fp8,0,0.5065759817759196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,2,128,0,1,fp8,fp8,0,0.4635839859644572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,float16,0,0.5213813384373983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,fp8,0,0.5182986656824747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,float16,0,0.3009919921557109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,8,4,128,0,1,fp8,fp8,0,0.5183413426081339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,fp8,0,0.29410133759180707
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,8,128,0,1,fp8,fp8,0,0.271232008934021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,float16,0,0.26285332441329956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,fp8,0,0.2621333400408427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,1,128,0,1,fp8,fp8,0,0.2348746657371521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,float16,0,0.266159991423289
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,fp8,0,0.26693334182103473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,2,128,0,1,fp8,fp8,0,0.23640533288319907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,float16,0,0.27541865905125934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,fp8,0,0.2733599940935771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,8,4,128,0,1,fp8,fp8,0,0.2616426746050517
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,float16,0,0.16517866651217142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,fp8,0,0.16157333056131998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,8,128,0,1,fp8,fp8,0,0.13452266653378805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,float16,0,0.14446933070818582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,fp8,0,0.14387733737627664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,1,128,0,1,fp8,fp8,0,0.11572800079981486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,float16,0,0.14600533246994019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,fp8,0,0.1462559998035431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,2,128,0,1,fp8,fp8,0,0.11739733815193176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,float16,0,0.15165332953135172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,fp8,0,0.15070399641990662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,8,4,128,0,1,fp8,fp8,0,0.12814933061599731
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,float16,0,0.08582400282224019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,fp8,0,0.08342400193214417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,float16,0,0.0759680022795995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,8,128,0,1,fp8,fp8,0,0.07518399755160014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,float16,0,0.07393066585063934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,fp8,0,0.07375999788443248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,1,128,0,1,fp8,fp8,0,0.06378133098284404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,fp8,0,0.07695466776688893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,2,128,0,1,fp8,fp8,0,0.06609066824118297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,float16,0,0.07935466865698497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,fp8,0,0.07897066573301952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,8,4,128,0,1,fp8,fp8,0,0.07045866549015045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,float16,0,0.04741866886615753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,fp8,0,0.046629334489504494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,8,128,0,1,fp8,fp8,0,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,float16,0,0.043280000487963356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,fp8,0,0.04247466723124186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,1,128,0,1,fp8,fp8,0,0.03797333439191183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,float16,0,0.04342400034268697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,fp8,0,0.042992000778516136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,2,128,0,1,fp8,fp8,0,0.038005332152048744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,float16,0,0.044400001565615334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,fp8,0,0.044453332821528115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,8,4,128,0,1,fp8,fp8,0,0.04137066751718521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,128,0,1,fp8,fp8,0,0.02850666642189026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,float16,0,0.03408000121514002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,fp8,0,0.03497066597143809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,8,128,0,1,fp8,fp8,0,0.03012266755104065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,float16,0,0.03265066693226496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,fp8,0,0.032730666299661
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,float16,0,0.032272001107533775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,fp8,0,0.03286933402220408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,2,128,0,1,fp8,fp8,0,0.02887466549873352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,float16,0,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,fp8,0,0.03444266567627589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,8,4,128,0,1,fp8,fp8,0,0.029930666089057922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,float16,0,0.030752000709374745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,fp8,0,0.030661332110563915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,8,128,0,1,fp8,fp8,0,0.025994665920734406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,float16,0,0.0301706666747729
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,fp8,0,0.030266667405764263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,1,128,0,1,fp8,fp8,0,0.025578667720158894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,float16,0,0.031061333914597828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,fp8,0,0.030671998858451843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,2,128,0,1,fp8,fp8,0,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,float16,0,0.02972800036271413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,fp8,0,0.030821333328882854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,8,4,128,0,1,fp8,fp8,0,0.026149332523345947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,1,128,0,1,float16,float16,0,0.7642026742299398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,1,128,0,1,float16,fp8,0,0.7674400011698405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,1,128,0,1,fp8,fp8,0,0.7263893286387125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,2,128,0,1,float16,float16,0,0.7750453154246012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,2,128,0,1,float16,fp8,0,0.7725599606831869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,2,128,0,1,fp8,fp8,0,0.7584426403045654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,4,128,0,1,float16,float16,0,0.8123892943064371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,8,128,0,1,float16,float16,0,0.4676373402277629
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,4,128,0,1,float16,fp8,0,0.8027093410491943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,8,128,0,1,float16,fp8,0,0.4568800131479899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,8,128,0,1,fp8,fp8,0,0.43465598424275714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,8,4,128,0,1,fp8,fp8,0,0.8330880006154379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,1,128,0,1,float16,float16,0,0.3925173282623291
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,1,128,0,1,float16,fp8,0,0.3959306478500366
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,1,128,0,1,fp8,fp8,0,0.3636000156402588
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,2,128,0,1,float16,float16,0,0.40163731575012207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,2,128,0,1,float16,fp8,0,0.39935465653737384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,2,128,0,1,fp8,fp8,0,0.3765546480814616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,4,128,0,1,float16,float16,0,0.41389334201812744
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,4,128,0,1,float16,fp8,0,0.4136586586634318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,8,4,128,0,1,fp8,fp8,0,0.42869865894317627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,8,128,0,1,float16,float16,0,0.2461386720339457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,8,128,0,1,float16,fp8,0,0.2400746742884318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,8,128,0,1,fp8,fp8,0,0.2241706649462382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,1,128,0,1,float16,float16,0,0.20590933163960776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,1,128,0,1,float16,fp8,0,0.2057759960492452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,1,128,0,1,fp8,fp8,0,0.19212265809377035
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,2,128,0,1,float16,float16,0,0.2104319930076599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,2,128,0,1,float16,fp8,0,0.2097653349240621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,2,128,0,1,fp8,fp8,0,0.19407999515533447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,4,128,0,1,float16,float16,0,0.21990933020909628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,4,128,0,1,float16,fp8,0,0.21682665745417276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,8,128,0,1,float16,float16,0,0.13542933265368143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,8,4,128,0,1,fp8,fp8,0,0.2182613412539164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,8,128,0,1,float16,fp8,0,0.1309279998143514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,8,128,0,1,fp8,fp8,0,0.11558399597803752
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,1,128,0,1,float16,float16,0,0.11499733726183574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,1,128,0,1,float16,fp8,0,0.11302399635314941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,1,128,0,1,fp8,fp8,0,0.09676800171534221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,2,128,0,1,float16,float16,0,0.11726400256156921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,2,128,0,1,float16,fp8,0,0.11594667037328084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,2,128,0,1,fp8,fp8,0,0.09860799709955852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,4,128,0,1,float16,float16,0,0.1204853355884552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,4,128,0,1,float16,fp8,0,0.12130133310953777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,8,4,128,0,1,fp8,fp8,0,0.10751466949780782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,1,128,0,1,float16,fp8,0,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,8,128,0,1,float16,float16,0,0.07204799850781758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,8,128,0,1,float16,fp8,0,0.07090133428573608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,8,128,0,1,fp8,fp8,0,0.06591466565926869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,1,128,0,1,float16,float16,0,0.0609493354956309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,4,128,0,1,float16,float16,0,0.06628266473611195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,1,128,0,1,fp8,fp8,0,0.054378668467203774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,2,128,0,1,float16,float16,0,0.06258666515350342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,2,128,0,1,float16,fp8,0,0.06296533346176147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,2,128,0,1,fp8,fp8,0,0.056159997979799904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,4,128,0,1,float16,fp8,0,0.06574933230876923
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,8,4,128,0,1,fp8,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,8,128,0,1,float16,float16,0,0.04058666775623957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,8,128,0,1,float16,fp8,0,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,8,128,0,1,fp8,fp8,0,0.03803733239571253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,1,128,0,1,float16,float16,0,0.036517334481080375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,1,128,0,1,float16,fp8,0,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,1,128,0,1,fp8,fp8,0,0.03243733445803324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,2,128,0,1,float16,float16,0,0.03717333326737086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,2,128,0,1,float16,fp8,0,0.03699733316898346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,2,128,0,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,4,128,0,1,float16,float16,0,0.03835200021664301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,4,128,0,1,float16,fp8,0,0.03807466725508372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,8,4,128,0,1,fp8,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,8,128,0,1,float16,float16,0,0.026698666314284008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,8,128,0,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,8,128,0,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,1,128,0,1,float16,float16,0,0.02603733291228612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,1,128,0,1,float16,fp8,0,0.025834667185942333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,1,128,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,2,128,0,1,float16,float16,0,0.025989333788553875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,2,128,0,1,float16,fp8,0,0.026752000053723652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,2,128,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,4,128,0,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,4,128,0,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,8,4,128,0,1,fp8,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,8,128,0,1,float16,float16,0,0.024266667664051056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,8,128,0,1,float16,fp8,0,0.02481599897146225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,2,128,0,1,float16,fp8,0,0.023946667710940044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,8,128,0,1,fp8,fp8,0,0.021802666286627453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,1,128,0,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,1,128,0,1,float16,fp8,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,1,128,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,2,128,0,1,float16,float16,0,0.024362665911515553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,2,128,0,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,4,128,0,1,float16,float16,0,0.024101334313551586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,4,128,0,1,float16,fp8,0,0.02421333392461141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,8,4,128,0,1,fp8,fp8,0,0.02216000109910965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,8,128,0,1,float16,float16,0,0.02235200007756551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,8,128,0,1,float16,fp8,0,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,8,128,0,1,fp8,fp8,0,0.020810666183630627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,1,128,0,1,float16,float16,0,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,1,128,0,1,float16,fp8,0,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,1,128,0,1,fp8,fp8,0,0.02014933278163274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,2,128,0,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,2,128,0,1,float16,fp8,0,0.022863999009132385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,2,128,0,1,fp8,fp8,0,0.020224000016848247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,4,128,0,1,float16,float16,0,0.022783999641736347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,4,128,0,1,float16,fp8,0,0.022543999056021374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,8,4,128,0,1,fp8,fp8,0,0.02029866725206375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,1,128,0,1,float16,float16,0,0.33873601754506427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,1,128,0,1,float16,fp8,0,0.33734933535257977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,1,128,0,1,fp8,fp8,0,0.2950399915377299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,2,128,0,1,float16,float16,0,0.34350399176279706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,2,128,0,1,float16,fp8,0,0.3421013355255127
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,2,128,0,1,fp8,fp8,0,0.3153439958890279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,4,128,0,1,float16,float16,0,0.3623146613438924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,4,128,0,1,float16,fp8,0,0.35762667655944824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,8,128,0,1,float16,float16,0,0.2193333307902018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,8,128,0,1,float16,fp8,0,0.21420266230901083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,8,4,128,0,1,fp8,fp8,0,0.357807993888855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,8,128,0,1,fp8,fp8,0,0.19362133741378784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,1,128,0,1,float16,float16,0,0.17987199624379477
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,1,128,0,1,float16,fp8,0,0.17762666940689087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,1,128,0,1,fp8,fp8,0,0.1556106706460317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,2,128,0,1,float16,float16,0,0.18159466981887817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,2,128,0,1,float16,fp8,0,0.180074671904246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,2,128,0,1,fp8,fp8,0,0.1602186659971873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,4,128,0,1,float16,float16,0,0.1934773325920105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,4,128,0,1,float16,fp8,0,0.1906986633936564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,8,4,128,0,1,fp8,fp8,0,0.18758400281270346
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,8,128,0,1,float16,float16,0,0.12100799878438313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,8,128,0,1,float16,fp8,0,0.1181813379128774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,8,128,0,1,fp8,fp8,0,0.1046453317006429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,1,128,0,1,float16,float16,0,0.09868266185124715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,1,128,0,1,float16,fp8,0,0.09869333108266194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,4,128,0,1,float16,float16,0,0.10803733269373576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,1,128,0,1,fp8,fp8,0,0.08733866612116496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,2,128,0,1,float16,float16,0,0.10230400164922078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,8,128,0,1,float16,float16,0,0.06890133519967397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,2,128,0,1,float16,fp8,0,0.10289600491523743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,2,128,0,1,fp8,fp8,0,0.09162132938702901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,4,128,0,1,float16,fp8,0,0.10780800382296245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,8,4,128,0,1,fp8,fp8,0,0.0978613297144572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,8,128,0,1,float16,fp8,0,0.06760533154010773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,8,128,0,1,fp8,fp8,0,0.06242666641871134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,1,128,0,1,float16,float16,0,0.05489066739877065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,1,128,0,1,float16,fp8,0,0.05465066432952881
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,1,128,0,1,fp8,fp8,0,0.05018133421738943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,2,128,0,1,float16,float16,0,0.05715199808279673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,2,128,0,1,float16,fp8,0,0.05621333420276642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,2,128,0,1,fp8,fp8,0,0.05225066840648651
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,4,128,0,1,float16,float16,0,0.060720001657803856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,4,128,0,1,float16,fp8,0,0.058506667613983154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,8,4,128,0,1,fp8,fp8,0,0.05569600065549215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,8,128,0,1,float16,float16,0,0.03766933331886927
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,8,128,0,1,float16,fp8,0,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,8,128,0,1,fp8,fp8,0,0.03632533301909765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,1,128,0,1,float16,float16,0,0.03384000062942505
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,1,128,0,1,float16,fp8,0,0.03436266630887985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,1,128,0,1,fp8,fp8,0,0.030581332743167877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,2,128,0,1,float16,float16,0,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,2,128,0,1,float16,fp8,0,0.034341332813103996
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,2,128,0,1,fp8,fp8,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,4,128,0,1,float16,float16,0,0.03590933233499527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,4,128,0,1,float16,fp8,0,0.035631999373435974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,8,4,128,0,1,fp8,fp8,0,0.03409066547950109
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,8,128,0,1,float16,float16,0,0.02491733431816101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,8,128,0,1,float16,fp8,0,0.024879999458789825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,8,128,0,1,fp8,fp8,0,0.022842665513356526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,1,128,0,1,float16,float16,0,0.023546665906906128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,1,128,0,1,float16,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,1,128,0,1,fp8,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,2,128,0,1,float16,float16,0,0.024010665714740753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,2,128,0,1,float16,fp8,0,0.023786666492621105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,2,128,0,1,fp8,fp8,0,0.02233600119749705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,4,128,0,1,float16,float16,0,0.02481599897146225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,4,128,0,1,float16,fp8,0,0.02421333392461141
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,8,4,128,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,8,128,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,8,128,0,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,8,128,0,1,fp8,fp8,0,0.020197333147128422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,1,128,0,1,float16,float16,0,0.020058666666348774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,1,128,0,1,float16,fp8,0,0.020501332978407543
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,1,128,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,2,128,0,1,float16,float16,0,0.020266667008399963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,2,128,0,1,float16,fp8,0,0.020538666596015293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,2,128,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,4,128,0,1,float16,float16,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,4,128,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,8,4,128,0,1,fp8,fp8,0,0.020106667031844456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,8,128,0,1,float16,float16,0,0.01971199984351794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,8,128,0,1,float16,fp8,0,0.0198186660806338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,8,128,0,1,fp8,fp8,0,0.01807466646035512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,1,128,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,1,128,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,1,128,0,1,fp8,fp8,0,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,2,128,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,2,128,0,1,float16,fp8,0,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,2,128,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,4,128,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,4,128,0,1,float16,fp8,0,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,8,4,128,0,1,fp8,fp8,0,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,8,128,0,1,float16,float16,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,8,128,0,1,float16,fp8,0,0.0185759998857975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,8,128,0,1,fp8,fp8,0,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,1,128,0,1,float16,float16,0,0.01842133328318596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,1,128,0,1,float16,fp8,0,0.01830400029818217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,1,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,2,128,0,1,float16,float16,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,2,128,0,1,float16,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,2,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,4,128,0,1,float16,float16,0,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,4,128,0,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,8,4,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,1,128,0,1,float16,float16,0,0.16450666387875876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,1,128,0,1,float16,fp8,0,0.16360533237457275
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,1,128,0,1,fp8,fp8,0,0.1541973352432251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,2,128,0,1,float16,float16,0,0.16933866341908774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,2,128,0,1,float16,fp8,0,0.16972267627716064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,2,128,0,1,fp8,fp8,0,0.16044800480206808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,4,128,0,1,fp8,fp8,0,0.18185067176818848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,4,128,0,1,float16,float16,0,0.18118399381637573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,8,128,0,1,fp8,fp8,0,0.10195733110109965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,8,4,128,0,1,float16,fp8,0,0.17843733231226602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,8,128,0,1,float16,float16,0,0.11912000179290771
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,8,128,0,1,float16,fp8,0,0.11682132879892985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,1,128,0,1,float16,float16,0,0.09285866220792134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,1,128,0,1,float16,fp8,0,0.09291733304659526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,2,128,0,1,float16,float16,0,0.09614933530489604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,1,128,0,1,fp8,fp8,0,0.08690133690834045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,2,128,0,1,float16,fp8,0,0.09620799620946248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,2,128,0,1,fp8,fp8,0,0.0885653297106425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,4,128,0,1,float16,float16,0,0.10322133700052898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,4,128,0,1,float16,fp8,0,0.10266133149464925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,8,4,128,0,1,fp8,fp8,0,0.09818666179974873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,8,128,0,1,float16,float16,0,0.06753066678841908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,8,128,0,1,float16,fp8,0,0.06326933205127716
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,8,128,0,1,fp8,fp8,0,0.05945600072542826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,1,128,0,1,float16,float16,0,0.05286400020122528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,1,128,0,1,float16,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,1,128,0,1,fp8,fp8,0,0.04943466683228811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,2,128,0,1,float16,float16,0,0.054325332244237266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,2,128,0,1,float16,fp8,0,0.05387733379999796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,2,128,0,1,fp8,fp8,0,0.05110399921735128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,4,128,0,1,float16,float16,0,0.05698133508364359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,4,128,0,1,float16,fp8,0,0.05702400207519531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,8,4,128,0,1,fp8,fp8,0,0.05522666871547699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,8,128,0,1,float16,float16,0,0.035989334185918175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,8,128,0,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,8,128,0,1,fp8,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,1,128,0,1,float16,float16,0,0.0322080006202062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,1,128,0,1,float16,fp8,0,0.03259733319282532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,1,128,0,1,fp8,fp8,0,0.030080000559488933
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,2,128,0,1,float16,float16,0,0.03313600023587545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,2,128,0,1,float16,fp8,0,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,2,128,0,1,fp8,fp8,0,0.030746666093667347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,4,128,0,1,float16,float16,0,0.03402666747570038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,4,128,0,1,float16,fp8,0,0.03452266752719879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,8,4,128,0,1,fp8,fp8,0,0.03386666625738144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,8,128,0,1,float16,float16,0,0.024080000817775726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,2,128,0,1,float16,fp8,0,0.022858666876951855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,8,128,0,1,float16,fp8,0,0.022757334013779957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,8,128,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,1,128,0,1,float16,float16,0,0.022448000808556873
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,1,128,0,1,float16,fp8,0,0.021941334009170532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,1,128,0,1,fp8,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,2,128,0,1,float16,float16,0,0.02239466706911723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,2,128,0,1,fp8,fp8,0,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,4,128,0,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,4,128,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,8,4,128,0,1,fp8,fp8,0,0.022469334304332733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,8,128,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,8,128,0,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,8,128,0,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,1,128,0,1,float16,float16,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,1,128,0,1,float16,fp8,0,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,1,128,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,2,128,0,1,float16,float16,0,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,2,128,0,1,float16,fp8,0,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,2,128,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,4,128,0,1,float16,float16,0,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,4,128,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,8,4,128,0,1,fp8,fp8,0,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,8,128,0,1,float16,float16,0,0.01801066721479098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,8,128,0,1,float16,fp8,0,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,8,128,0,1,fp8,fp8,0,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,1,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,4,128,0,1,float16,float16,0,0.01770666614174843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,1,128,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,1,128,0,1,fp8,fp8,0,0.018031999468803406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,2,128,0,1,float16,float16,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,2,128,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,2,128,0,1,fp8,fp8,0,0.018581333259741466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,4,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,8,4,128,0,1,fp8,fp8,0,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,8,128,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,8,128,0,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,8,128,0,1,fp8,fp8,0,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,1,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,1,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,1,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,2,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,2,128,0,1,float16,fp8,0,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,8,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,2,128,0,1,fp8,fp8,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,4,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,4,128,0,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,8,4,128,0,1,fp8,fp8,0,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,8,128,0,1,float16,float16,0,0.015765332927306492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,8,128,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,1,128,0,1,float16,float16,0,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,1,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,4,128,0,1,float16,float16,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,4,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,2,128,0,1,float16,float16,0,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,2,128,0,1,float16,fp8,0,0.016682667036851246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,2,128,0,1,fp8,fp8,0,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,8,4,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,1,128,0,1,float16,float16,0,0.10190932949384053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,1,128,0,1,float16,fp8,0,0.10220799843470256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,1,128,0,1,fp8,fp8,0,0.11585066715876262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,2,128,0,1,float16,float16,0,0.10401599605878194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,2,128,0,1,float16,fp8,0,0.10531199971834819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,2,128,0,1,fp8,fp8,0,0.1188320020834605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,4,128,0,1,float16,float16,0,0.11175466577212016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,4,128,0,1,float16,fp8,0,0.11008532842000325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,8,4,128,0,1,fp8,fp8,0,0.1272266705830892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,8,128,0,1,float16,float16,0,0.06883200009663899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,8,128,0,1,float16,fp8,0,0.06750933329264323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,8,128,0,1,fp8,fp8,0,0.07444266478220622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,1,128,0,1,float16,float16,0,0.05681600173314413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,1,128,0,1,float16,fp8,0,0.05738666653633118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,1,128,0,1,fp8,fp8,0,0.06565333406130473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,2,128,0,1,float16,float16,0,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,2,128,0,1,float16,fp8,0,0.057775999108950295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,2,128,0,1,fp8,fp8,0,0.06754666566848755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,4,128,0,1,float16,float16,0,0.06282666822274525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,4,128,0,1,fp8,fp8,0,0.07127466797828674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,8,4,128,0,1,float16,fp8,0,0.06084266801675161
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,8,128,0,1,float16,float16,0,0.038362666964530945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,2,128,0,1,float16,float16,0,0.034586665530999504
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,8,128,0,1,float16,fp8,0,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,4,128,0,1,float16,float16,0,0.03612266729275385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,8,128,0,1,fp8,fp8,0,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,1,128,0,1,float16,float16,0,0.03416533271471659
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,1,128,0,1,float16,fp8,0,0.034527999659379326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,1,128,0,1,fp8,fp8,0,0.03868799904982249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,2,128,0,1,float16,fp8,0,0.03465066601832708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,2,128,0,1,fp8,fp8,0,0.03893866638342539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,4,128,0,1,float16,fp8,0,0.036650667587916054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,8,4,128,0,1,fp8,fp8,0,0.04066666712363561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,8,128,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,8,128,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,8,128,0,1,fp8,fp8,0,0.026954665780067444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,1,128,0,1,float16,float16,0,0.023786666492621105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,1,128,0,1,float16,fp8,0,0.023647998770078022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,1,128,0,1,fp8,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,2,128,0,1,float16,float16,0,0.024773334463437397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,2,128,0,1,float16,fp8,0,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,2,128,0,1,fp8,fp8,0,0.02589866767326991
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,4,128,0,1,float16,float16,0,0.024485332270463307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,4,128,0,1,float16,fp8,0,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,8,4,128,0,1,fp8,fp8,0,0.026389333109060924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,8,128,0,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,8,128,0,1,float16,fp8,0,0.018677332748969395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,8,128,0,1,fp8,fp8,0,0.02014933278163274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,1,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,1,128,0,1,float16,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,1,128,0,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,2,128,0,1,float16,float16,0,0.018511999398469925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,2,128,0,1,float16,fp8,0,0.018165333817402523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,2,128,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,4,128,0,1,float16,float16,0,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,4,128,0,1,float16,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,8,4,128,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,8,128,0,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,8,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,8,128,0,1,fp8,fp8,0,0.017925333231687546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,1,128,0,1,float16,float16,0,0.016117333124081295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,1,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,1,128,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,2,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,2,128,0,1,float16,fp8,0,0.016330666840076447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,2,128,0,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,4,128,0,1,float16,float16,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,4,128,0,1,float16,fp8,0,0.016458666572968166
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,8,4,128,0,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,8,128,0,1,float16,float16,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,8,128,0,1,float16,fp8,0,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,8,128,0,1,fp8,fp8,0,0.017877332866191864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,1,128,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,1,128,0,1,float16,fp8,0,0.01639466608564059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,1,128,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,2,128,0,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,2,128,0,1,float16,fp8,0,0.01639466608564059
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,2,128,0,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,4,128,0,1,float16,float16,0,0.016037333756685257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,4,128,0,1,float16,fp8,0,0.01642666632930438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,8,4,128,0,1,fp8,fp8,0,0.01647466669480006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,8,128,0,1,float16,float16,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,8,128,0,1,float16,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,8,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,1,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,1,128,0,1,float16,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,1,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,2,128,0,1,float16,float16,0,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,2,128,0,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,2,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,4,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,4,128,0,1,float16,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,8,4,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,8,128,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,8,128,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,8,128,0,1,fp8,fp8,0,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,1,128,0,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,1,128,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,1,128,0,1,fp8,fp8,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,2,128,0,1,float16,float16,0,0.014570667097965876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,2,128,0,1,float16,fp8,0,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,2,128,0,1,fp8,fp8,0,0.01637866720557213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,4,128,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,4,128,0,1,float16,fp8,0,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,8,4,128,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,1,128,0,1,float16,float16,0,0.07271466652552287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,1,128,0,1,float16,fp8,0,0.07250666618347168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,1,128,0,1,fp8,fp8,0,0.09336533149083455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,2,128,0,1,float16,float16,0,0.07340266803900401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,2,128,0,1,float16,fp8,0,0.07302933434645335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,2,128,0,1,fp8,fp8,0,0.0960693359375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,4,128,0,1,float16,float16,0,0.0786186655362447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,4,128,0,1,float16,fp8,0,0.0767626663049062
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,8,4,128,0,1,fp8,fp8,0,0.09955199559529622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,8,128,0,1,float16,float16,0,0.046816001335779824
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,8,128,0,1,float16,fp8,0,0.04491200049718221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,8,128,0,1,fp8,fp8,0,0.058133333921432495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,1,128,0,1,float16,float16,0,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,1,128,0,1,float16,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,4,128,0,1,float16,float16,0,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,1,128,0,1,fp8,fp8,0,0.05309333403905233
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,2,128,0,1,float16,float16,0,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,2,128,0,1,float16,fp8,0,0.04265599946180979
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,2,128,0,1,fp8,fp8,0,0.05249600112438202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,8,128,0,1,fp8,fp8,0,0.03489600121974945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,4,128,0,1,float16,fp8,0,0.042949333786964417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,8,128,0,1,float16,float16,0,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,8,4,128,0,1,fp8,fp8,0,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,8,128,0,1,float16,fp8,0,0.02834133307139079
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,1,128,0,1,float16,float16,0,0.027701333165168762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,1,128,0,1,float16,fp8,0,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,1,128,0,1,fp8,fp8,0,0.032560000816980995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,2,128,0,1,float16,float16,0,0.027402666707833607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,2,128,0,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,2,128,0,1,fp8,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,8,128,0,1,fp8,fp8,0,0.02385066697994868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,4,128,0,1,float16,float16,0,0.028512001037597656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,4,128,0,1,float16,fp8,0,0.02773333340883255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,8,4,128,0,1,fp8,fp8,0,0.03461333364248276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,8,128,0,1,float16,float16,0,0.020442667106787365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,8,128,0,1,float16,fp8,0,0.019760000209013622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,1,128,0,1,float16,float16,0,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,1,128,0,1,float16,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,1,128,0,1,fp8,fp8,0,0.0227360005180041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,2,128,0,1,float16,float16,0,0.0199946661790212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,2,128,0,1,float16,fp8,0,0.020090666910012562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,2,128,0,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,4,128,0,1,float16,float16,0,0.020725333442290623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,4,128,0,1,float16,fp8,0,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,8,4,128,0,1,fp8,fp8,0,0.023818666736284893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,8,128,0,1,float16,float16,0,0.016208000481128693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,8,128,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,8,128,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,1,128,0,1,float16,float16,0,0.016330666840076447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,1,128,0,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,1,128,0,1,fp8,fp8,0,0.018165333817402523
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,2,128,0,1,float16,float16,0,0.015749332805474598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,2,128,0,1,float16,fp8,0,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,2,128,0,1,fp8,fp8,0,0.01844266677896182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,4,128,0,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,4,128,0,1,float16,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,8,4,128,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,8,128,0,1,float16,float16,0,0.01569066693385442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,8,128,0,1,float16,fp8,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,8,128,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,1,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,1,128,0,1,float16,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,1,128,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,2,128,0,1,float16,float16,0,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,2,128,0,1,float16,fp8,0,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,2,128,0,1,fp8,fp8,0,0.017711999515692394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,4,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,4,128,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,1,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,8,4,128,0,1,fp8,fp8,0,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,8,128,0,1,float16,float16,0,0.014592000593741735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,8,128,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,8,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,1,128,0,1,float16,float16,0,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,1,128,0,1,float16,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,2,128,0,1,float16,float16,0,0.014490666488806406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,2,128,0,1,float16,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,2,128,0,1,fp8,fp8,0,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,4,128,0,1,float16,float16,0,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,4,128,0,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,8,4,128,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,8,128,0,1,float16,float16,0,0.014511999984582266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,8,128,0,1,float16,fp8,0,0.01443733274936676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,1,128,0,1,float16,float16,0,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,8,128,0,1,fp8,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,1,128,0,1,float16,fp8,0,0.014581333845853806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,1,128,0,1,fp8,fp8,0,0.01661866654952367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,2,128,0,1,float16,float16,0,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,2,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,2,128,0,1,fp8,fp8,0,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,4,128,0,1,float16,float16,0,0.014287999520699183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,4,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,8,4,128,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,8,128,0,1,float16,float16,0,0.014159999787807465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,8,128,0,1,float16,fp8,0,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,8,128,0,1,fp8,fp8,0,0.016330666840076447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,1,128,0,1,float16,float16,0,0.014021333307027817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,1,128,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,4,128,0,1,float16,float16,0,0.01402666668097178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,1,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,2,128,0,1,float16,float16,0,0.01394133393963178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,2,128,0,1,float16,fp8,0,0.014453332871198654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,2,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,4,128,0,1,float16,fp8,0,0.01443733274936676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,8,4,128,0,1,fp8,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,1,128,0,1,float16,float16,0,0.059802666306495667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,1,128,0,1,float16,fp8,0,0.06018133461475372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,1,128,0,1,fp8,fp8,0,0.08191466828187306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,2,128,0,1,float16,float16,0,0.06067200005054474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,2,128,0,1,float16,fp8,0,0.06117333471775055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,2,128,0,1,fp8,fp8,0,0.0817440003156662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,4,128,0,1,float16,float16,0,0.0628959983587265
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,4,128,0,1,float16,fp8,0,0.061343997716903687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,8,128,0,1,fp8,fp8,0,0.0495413343111674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,8,4,128,0,1,fp8,fp8,0,0.08499733606974284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,8,128,0,1,float16,float16,0,0.03798400113979975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,8,128,0,1,float16,fp8,0,0.03798400113979975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,1,128,0,1,float16,float16,0,0.03548266738653183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,1,128,0,1,float16,fp8,0,0.03664533297220866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,2,128,0,1,float16,float16,0,0.036602665980656944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,1,128,0,1,fp8,fp8,0,0.04760533571243286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,2,128,0,1,float16,fp8,0,0.03722133239110311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,2,128,0,1,fp8,fp8,0,0.04821333289146423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,4,128,0,1,float16,float16,0,0.03793066740036011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,4,128,0,1,fp8,fp8,0,0.04850133260091146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,8,4,128,0,1,float16,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,8,128,0,1,float16,float16,0,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,8,128,0,1,float16,fp8,0,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,8,128,0,1,fp8,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,1,128,0,1,float16,float16,0,0.024170666933059692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,1,128,0,1,float16,fp8,0,0.024341332415739696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,1,128,0,1,fp8,fp8,0,0.030858665704727173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,2,128,0,1,float16,float16,0,0.023760000864664715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,2,128,0,1,float16,fp8,0,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,2,128,0,1,fp8,fp8,0,0.030591999491055805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,4,128,0,1,float16,float16,0,0.024645333488782246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,4,128,0,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,8,4,128,0,1,fp8,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,8,128,0,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,8,128,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,8,128,0,1,fp8,fp8,0,0.022117334107557934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,1,128,0,1,float16,float16,0,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,1,128,0,1,float16,fp8,0,0.018453333526849747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,1,128,0,1,fp8,fp8,0,0.022426667312781017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,2,128,0,1,float16,float16,0,0.018661333868900936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,2,128,0,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,2,128,0,1,fp8,fp8,0,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,4,128,0,1,float16,float16,0,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,4,128,0,1,float16,fp8,0,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,8,4,128,0,1,fp8,fp8,0,0.022128000855445862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,8,128,0,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,8,128,0,1,float16,fp8,0,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,8,128,0,1,fp8,fp8,0,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,1,128,0,1,float16,float16,0,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,1,128,0,1,float16,fp8,0,0.016048000504573185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,1,128,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,2,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,2,128,0,1,float16,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,2,128,0,1,fp8,fp8,0,0.018063999712467194
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,4,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,1,128,0,1,float16,float16,0,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,4,128,0,1,float16,fp8,0,0.01613333324591319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,8,4,128,0,1,fp8,fp8,0,0.01775466650724411
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,8,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,8,128,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,8,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,1,128,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,1,128,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,2,128,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,2,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,2,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,4,128,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,4,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,8,4,128,0,1,fp8,fp8,0,0.0163680004576842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,8,128,0,1,float16,float16,0,0.014538666854302088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,8,128,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,8,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,1,128,0,1,float16,float16,0,0.014416000495354334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,1,128,0,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,1,128,0,1,fp8,fp8,0,0.016490666816631954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,2,128,0,1,float16,float16,0,0.013989333063364029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,2,128,0,1,float16,fp8,0,0.014698666830857595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,2,128,0,1,fp8,fp8,0,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,4,128,0,1,float16,float16,0,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,4,128,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,8,4,128,0,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,8,128,0,1,float16,float16,0,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,8,128,0,1,float16,fp8,0,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,8,128,0,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,1,128,0,1,float16,float16,0,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,1,128,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,1,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,2,128,0,1,float16,float16,0,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,2,128,0,1,float16,fp8,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,8,128,0,1,float16,float16,0,0.014549333602190018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,2,128,0,1,fp8,fp8,0,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,4,128,0,1,float16,float16,0,0.014544000228246054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,4,128,0,1,float16,fp8,0,0.014352000008026758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,8,4,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,8,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,8,128,0,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,2,128,0,1,float16,fp8,0,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,1,128,0,1,float16,float16,0,0.014490666488806406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,1,128,0,1,float16,fp8,0,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,1,128,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,2,128,0,1,float16,float16,0,0.014159999787807465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,2,128,0,1,fp8,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,4,128,0,1,float16,float16,0,0.014362666755914688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,4,128,0,1,float16,fp8,0,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,8,4,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,128,0,1,fp8,fp8,0,2.69162654876709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,float16,0,4.1536054611206055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,float16,0,2.011845270792643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,fp8,0,4.329333305358887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,128,0,1,fp8,fp8,0,2.772245407104492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,float16,0,4.29150390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,fp8,0,4.332330703735352
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,fp8,0,2.172383944193522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,4,128,0,1,fp8,fp8,0,1.4367680549621582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,128,0,1,fp8,fp8,0,1.40993070602417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,float16,0,2.132549285888672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,fp8,0,2.088138739267985
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,float16,0,1.0969226360321045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,128,0,1,fp8,fp8,0,1.4231732686360676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,float16,0,2.0308639208475747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,fp8,0,2.1108694076538086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,128,0,1,fp8,fp8,0,0.7192373275756836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,fp8,0,1.105733315149943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,128,0,1,fp8,fp8,0,0.6829439798990885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,float16,0,1.0932213465372722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,fp8,0,1.0806879997253418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,float16,0,1.0854933261871338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,128,0,1,fp8,fp8,0,0.6902933120727539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,float16,0,0.5020373264948527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,fp8,0,1.0980693499247234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,fp8,0,0.49696532885233563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,128,0,1,fp8,fp8,0,0.3659733136494954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,fp8,0,0.5084960063298544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,float16,0,0.49694931507110596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,1,128,0,1,fp8,fp8,0,0.3599146604537964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,float16,0,0.49853865305582684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,fp8,0,0.501039981842041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,4,2,128,0,1,fp8,fp8,0,0.36025599638621014
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,float16,0,2.432981332143148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,128,0,1,fp8,fp8,0,1.5981547037760417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,fp8,0,2.4232640266418457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,float16,0,2.437328020731608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,float16,0,1.2080000241597493
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,128,0,1,fp8,fp8,0,1.6051626205444336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,fp8,0,2.3866186141967773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,fp8,0,1.220474640528361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,4,128,0,1,fp8,fp8,0,0.8306453227996826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,float16,0,1.1999893188476562
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,fp8,0,1.205082654953003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,1,128,0,1,fp8,fp8,0,0.8139733473459879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,float16,0,1.195024013519287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,128,0,1,fp8,fp8,0,0.7890559832255045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,float16,0,0.5665173530578613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,float16,0,0.5781813462575277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,fp8,0,1.209829330444336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,fp8,0,0.5755306482315063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,4,128,0,1,fp8,fp8,0,0.41707201798756915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,fp8,0,0.5717120170593262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,1,128,0,1,fp8,fp8,0,0.4153439998626709
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,float16,0,0.5613066752751669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,fp8,0,0.5664000113805135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,4,2,128,0,1,fp8,fp8,0,0.4124106566111247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,float16,0,0.3323733409245809
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,fp8,0,0.3277013301849365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,4,128,0,1,fp8,fp8,0,0.2411200006802877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,float16,0,0.3236853281656901
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,fp8,0,0.3235306739807129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,1,128,0,1,fp8,fp8,0,0.23374400536219278
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,float16,0,0.32232000430425006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,fp8,0,0.32570133606592816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,4,2,128,0,1,fp8,fp8,0,0.2391306757926941
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,float16,0,1.6327733993530273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,128,0,1,fp8,fp8,0,1.1559147040049236
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,fp8,0,1.6883947054545085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,float16,0,1.6248159408569336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,float16,0,0.8838133017222086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,128,0,1,fp8,fp8,0,1.1670933564503987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,fp8,0,1.653674602508545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,128,0,1,fp8,fp8,0,0.5895786682764689
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,fp8,0,0.8917173544565836
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,float16,0,0.8715733687082926
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,fp8,0,0.8675093650817871
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,1,128,0,1,fp8,fp8,0,0.5673439900080363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,float16,0,0.8693439960479736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,fp8,0,0.8796586990356445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,4,2,128,0,1,fp8,fp8,0,0.5738293329874674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,float16,0,0.4211733341217041
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,128,0,1,fp8,fp8,0,0.31270933151245117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,fp8,0,0.4262080192565918
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,float16,0,0.414522647857666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,fp8,0,0.4155786832173665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,1,128,0,1,fp8,fp8,0,0.30323199431101483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,float16,0,0.411845326423645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,fp8,0,0.41808001200358075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,4,2,128,0,1,fp8,fp8,0,0.3063093423843384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,float16,0,0.23785599072774252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,fp8,0,0.23941866556803384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,4,128,0,1,fp8,fp8,0,0.18119466304779053
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,float16,0,0.23430933554967245
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,fp8,0,0.2327573299407959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,1,128,0,1,fp8,fp8,0,0.1744640072186788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,float16,0,0.23624000946680704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,fp8,0,0.2379466692606608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,4,2,128,0,1,fp8,fp8,0,0.17505600055058798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,128,0,1,fp8,fp8,0,1.4876426060994465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,float16,0,2.132751941680908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,fp8,0,2.2390453020731607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,float16,0,1.1193599700927734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,fp8,0,1.1203786532084148
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,float16,0,2.1591572761535645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,fp8,0,2.188938617706299
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,4,2,128,0,1,fp8,fp8,0,1.5043892860412598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,4,128,0,1,fp8,fp8,0,0.8132266998291016
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,float16,0,1.1019093195597331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,fp8,0,1.1410826841990154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,1,128,0,1,fp8,fp8,0,0.784330685933431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,float16,0,1.0996159712473552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,128,0,1,fp8,fp8,0,0.8114240169525146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,fp8,0,1.1103839874267578
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,float16,0,0.5981760025024414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,128,0,1,fp8,fp8,0,0.3932373523712158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,fp8,0,0.6055146853129069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,4,128,0,1,fp8,fp8,0,0.40277334054311115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,float16,0,0.5992213487625122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,fp8,0,0.5931679805119833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,float16,0,0.593231995900472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,128,0,1,fp8,fp8,0,0.3887466589609782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,fp8,0,0.6025866667429606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,float16,0,0.28008000055948895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,fp8,0,0.2826293309529622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,float16,0,0.2727893392244975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,4,128,0,1,fp8,fp8,0,0.21471999088923135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,float16,0,0.27535466353098553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,fp8,0,0.27450666824976605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,1,128,0,1,fp8,fp8,0,0.2048799991607666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,fp8,0,0.2775839964548747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,4,2,128,0,1,fp8,fp8,0,0.20993600289026895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,float16,0,0.15266666809717813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,fp8,0,0.15266133348147073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,4,128,0,1,fp8,fp8,0,0.119759996732076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,float16,0,0.151146670182546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,fp8,0,0.15074666341145834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,1,128,0,1,fp8,fp8,0,0.11381866534550984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,float16,0,0.15214932958285013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,fp8,0,0.15215999881426492
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,4,2,128,0,1,fp8,fp8,0,0.115365336338679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,float16,0,1.2505706946055095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,128,0,1,fp8,fp8,0,0.9208587010701498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,fp8,0,1.2640906969706218
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,float16,0,1.272490660349528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,fp8,0,1.2723146279652913
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,4,2,128,0,1,fp8,fp8,0,0.9304640293121338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,float16,0,0.6822400093078613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,fp8,0,0.6774293581644694
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,4,128,0,1,fp8,fp8,0,0.4809279839197795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,float16,0,0.659173329671224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,fp8,0,0.6632213195164999
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,1,128,0,1,fp8,fp8,0,0.4604906638463338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,float16,0,0.6658293406168619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,fp8,0,0.6697173118591309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,4,2,128,0,1,fp8,fp8,0,0.46373867988586426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,float16,0,0.3249280055363973
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,fp8,0,0.328821341196696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,float16,0,0.3182026743888855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,4,128,0,1,fp8,fp8,0,0.2529226740201314
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,float16,0,0.31523199876149494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,fp8,0,0.31667200724283856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,1,128,0,1,fp8,fp8,0,0.2416800061861674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,fp8,0,0.3195466597874959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,4,2,128,0,1,fp8,fp8,0,0.24521599213282266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,float16,0,0.18330132961273193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,fp8,0,0.18577067057291666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,4,128,0,1,fp8,fp8,0,0.14404267072677612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,float16,0,0.17841599384943643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,fp8,0,0.18075199921925864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,1,128,0,1,fp8,fp8,0,0.1364959975083669
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,float16,0,0.17846399545669556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,fp8,0,0.18157867590586343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,4,2,128,0,1,fp8,fp8,0,0.13897599776585898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,float16,0,0.11588799953460693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,fp8,0,0.11592533191045125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,4,128,0,1,fp8,fp8,0,0.09054399530092876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,float16,0,0.11436266700426738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,fp8,0,0.11477333307266235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,1,128,0,1,fp8,fp8,0,0.08665600419044495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,float16,0,0.11379200220108032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,fp8,0,0.11462400356928508
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,4,2,128,0,1,fp8,fp8,0,0.08854933579762776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,float16,0,1.2165546417236328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,fp8,0,1.2302133242289226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,1,128,0,1,fp8,fp8,0,0.9180586338043213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,float16,0,1.2422080039978027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,fp8,0,1.2421653270721436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,4,2,128,0,1,fp8,fp8,0,0.9259839852650961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,float16,0,0.6600053310394287
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,fp8,0,0.6534773508707682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,4,128,0,1,fp8,fp8,0,0.5068639914194742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,float16,0,0.6314560174942017
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,128,0,1,fp8,fp8,0,0.47733867168426514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,fp8,0,0.6290080149968466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,fp8,0,0.3555146853129069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,float16,0,0.6413173278172811
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,float16,0,0.3396159807840983
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,fp8,0,0.6421759923299154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,4,2,128,0,1,fp8,fp8,0,0.48769601186116535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,float16,0,0.3555999994277954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,4,128,0,1,fp8,fp8,0,0.25702399015426636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,fp8,0,0.34139732519785565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,1,128,0,1,fp8,fp8,0,0.24040534098943075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,float16,0,0.343173344930013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,fp8,0,0.3444480101267497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,128,0,1,fp8,fp8,0,0.12971733013788858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,4,2,128,0,1,fp8,fp8,0,0.24496533473332724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,float16,0,0.16851733128229776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,fp8,0,0.17178134123484293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,4,128,0,1,fp8,fp8,0,0.13980799913406372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,float16,0,0.16077867150306702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,fp8,0,0.16241066654523215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,float16,0,0.16247466206550598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,fp8,0,0.1646986703077952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,4,2,128,0,1,fp8,fp8,0,0.1335040032863617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,float16,0,0.09272533655166626
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,fp8,0,0.0940106709798177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,4,128,0,1,fp8,fp8,0,0.0788213312625885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,float16,0,0.08932266632715861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,fp8,0,0.09007466832796733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,1,128,0,1,fp8,fp8,0,0.07117866476376851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,float16,0,0.08954133590062459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,fp8,0,0.09034132957458496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,4,2,128,0,1,fp8,fp8,0,0.07337066531181335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,128,0,1,fp8,fp8,0,0.06177600224812826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,float16,0,0.08126933375994365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,fp8,0,0.08171733220418294
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,4,128,0,1,fp8,fp8,0,0.06404800216356914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,float16,0,0.08065066734949748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,fp8,0,0.07931733131408691
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,float16,0,0.08071466783682506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,fp8,0,0.0802293320496877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,4,2,128,0,1,fp8,fp8,0,0.06275733311971028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,float16,0,0.7550186316172282
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,fp8,0,0.7534666856129965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,fp8,0,0.4153493245442708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,1,128,0,1,fp8,fp8,0,0.590768019358317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,float16,0,0.7630720138549805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,fp8,0,0.7616693178812662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,4,2,128,0,1,fp8,fp8,0,0.6015146573384603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,float16,0,0.40932265917460126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,4,128,0,1,fp8,fp8,0,0.3237599929173787
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,fp8,0,0.4006506601969401
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,float16,0,0.39374931653340656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,fp8,0,0.39658665657043457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,1,128,0,1,fp8,fp8,0,0.2948426604270935
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,float16,0,0.19148266315460205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,float16,0,0.40143465995788574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,128,0,1,fp8,fp8,0,0.15760533014933267
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,4,2,128,0,1,fp8,fp8,0,0.304970661799113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,float16,0,0.20305599768956503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,fp8,0,0.208079993724823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,4,128,0,1,fp8,fp8,0,0.16915732622146606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,fp8,0,0.1935466726620992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,float16,0,0.19460266828536987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,fp8,0,0.19781333208084106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,4,2,128,0,1,fp8,fp8,0,0.16178666551907858
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,float16,0,0.11452266573905945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,fp8,0,0.11478400230407715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,4,128,0,1,fp8,fp8,0,0.09512000282605489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,float16,0,0.10711999734242757
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,fp8,0,0.10683733224868774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,1,128,0,1,fp8,fp8,0,0.08683733145395915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,float16,0,0.11089066664377849
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,fp8,0,0.11049600442250569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,4,2,128,0,1,fp8,fp8,0,0.0905013382434845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,fp8,0,0.068122665087382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,128,0,1,fp8,fp8,0,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,float16,0,0.0684746652841568
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,fp8,0,0.06804800033569336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,4,128,0,1,fp8,fp8,0,0.05755733450253805
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,float16,0,0.0666293352842331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,float16,0,0.06745066742102306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,fp8,0,0.06865600248177846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,4,2,128,0,1,fp8,fp8,0,0.05599466462930044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,float16,0,0.06420266628265381
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,float16,0,0.06364266574382782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,fp8,0,0.06366933385531108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,4,128,0,1,fp8,fp8,0,0.05063466727733612
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,float16,0,0.0633653352657954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,fp8,0,0.06326400240262349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,1,128,0,1,fp8,fp8,0,0.05089066425959269
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,fp8,0,0.06438399851322174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,128,0,1,fp8,fp8,0,0.6348693370819092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,4,2,128,0,1,fp8,fp8,0,0.05049066742261251
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,float16,0,0.7831040223439535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,fp8,0,0.7847146987915039
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,float16,0,0.8029706478118896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,float16,0,0.42904531955718994
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,fp8,0,0.7981653213500977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,4,2,128,0,1,fp8,fp8,0,0.6572213172912598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,fp8,0,0.42897601922353107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,4,128,0,1,fp8,fp8,0,0.36027201016743976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,float16,0,0.4073653221130371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,fp8,0,0.41490666071573895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,128,0,1,fp8,fp8,0,0.33162132898966473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,fp8,0,0.4094613393147786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,float16,0,0.41334935029347736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,4,2,128,0,1,fp8,fp8,0,0.34302934010823566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,float16,0,0.23081600666046143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,fp8,0,0.23161600033442178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,4,128,0,1,fp8,fp8,0,0.18493866920471191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,float16,0,0.21757332483927408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,fp8,0,0.2186773419380188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,1,128,0,1,fp8,fp8,0,0.16756266355514526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,float16,0,0.22180799643198648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,fp8,0,0.22430932521820068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,4,2,128,0,1,fp8,fp8,0,0.17336533466974893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,float16,0,0.11297067006429036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,float16,0,0.1090133289496104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,fp8,0,0.11575466394424438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,128,0,1,fp8,fp8,0,0.09546666344006856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,4,128,0,1,fp8,fp8,0,0.10147733489672343
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,float16,0,0.10586133599281311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,fp8,0,0.10686399539311726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,1,128,0,1,fp8,fp8,0,0.09203733007113139
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,fp8,0,0.11006933450698853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,float16,0,0.06342933575312297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,128,0,1,fp8,fp8,0,0.05811200042565664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,fp8,0,0.06357333560784657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,float16,0,0.06006933252016703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,fp8,0,0.06035733222961426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,1,128,0,1,fp8,fp8,0,0.05147199829419454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,float16,0,0.0602453351020813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,fp8,0,0.06098666787147522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,4,2,128,0,1,fp8,fp8,0,0.052789335449536644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,float16,0,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,fp8,0,0.05036266644795736
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,4,128,0,1,fp8,fp8,0,0.04318400224049886
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,float16,0,0.050069332122802734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,fp8,0,0.05002133548259735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,1,128,0,1,fp8,fp8,0,0.04045333216587702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,float16,0,0.05009600023428599
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,fp8,0,0.05004799862702688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,4,2,128,0,1,fp8,fp8,0,0.04107200105985006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,float16,0,0.04686399797598521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,fp8,0,0.04795200129350027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,4,128,0,1,fp8,fp8,0,0.03937066594759623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,float16,0,0.04693333307902018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,fp8,0,0.04643199841181437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,1,128,0,1,fp8,fp8,0,0.03868266691764196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,float16,0,0.04668800036112467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,fp8,0,0.04630400240421295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,4,2,128,0,1,fp8,fp8,0,0.038058665891488395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,float16,0,0.49329066276550293
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,fp8,0,0.4943093458811442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,1,128,0,1,fp8,fp8,0,0.43138134479522705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,float16,0,0.5001440048217773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,fp8,0,0.5116533438364664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,float16,0,0.2601120074590047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,128,0,1,fp8,fp8,0,0.21342400709788004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,float16,0,0.27874133984247845
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,4,2,128,0,1,fp8,fp8,0,0.44409600893656415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,fp8,0,0.2837066650390625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,4,128,0,1,fp8,fp8,0,0.25084267059961957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,fp8,0,0.26395199696222943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,float16,0,0.26637866099675495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,fp8,0,0.26680533091227215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,4,2,128,0,1,fp8,fp8,0,0.22298133373260498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,float16,0,0.1446613371372223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,fp8,0,0.1490293343861898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,4,128,0,1,fp8,fp8,0,0.12961066762606302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,float16,0,0.13133866588274637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,fp8,0,0.13319466511408487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,1,128,0,1,fp8,fp8,0,0.1160640021165212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,float16,0,0.1330826679865519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,fp8,0,0.13642133275667825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,4,2,128,0,1,fp8,fp8,0,0.12053866187731425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,float16,0,0.07771199941635132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,128,0,1,fp8,fp8,0,0.0639626681804657
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,fp8,0,0.08059733112653096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,4,128,0,1,fp8,fp8,0,0.07351466516653697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,float16,0,0.07223999996980031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,fp8,0,0.07241599758466084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,float16,0,0.07459733386834462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,fp8,0,0.0758240024248759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,4,2,128,0,1,fp8,fp8,0,0.06819200019041698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,float16,0,0.045423999428749084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,fp8,0,0.04669866462548574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,4,128,0,1,fp8,fp8,0,0.04320533573627472
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,float16,0,0.04381866753101349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,fp8,0,0.04548799991607666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,1,128,0,1,fp8,fp8,0,0.038218667109807335
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,fp8,0,0.04116799930731455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,float16,0,0.044453332821528115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,fp8,0,0.04593066871166229
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,4,2,128,0,1,fp8,fp8,0,0.040218666195869446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,float16,0,0.04133866727352142
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,4,128,0,1,fp8,fp8,0,0.03513066718975703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,float16,0,0.040933333337306976
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,fp8,0,0.04043200115362803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,1,128,0,1,fp8,fp8,0,0.03398933261632919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,fp8,0,0.04195733368396759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,float16,0,0.0407679999868075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,4,2,128,0,1,fp8,fp8,0,0.03455466777086258
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,float16,0,0.03879466652870178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,fp8,0,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,4,128,0,1,fp8,fp8,0,0.032655999064445496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,float16,0,0.03793599953254064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,fp8,0,0.03802666564782461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,1,128,0,1,fp8,fp8,0,0.032261334359645844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,float16,0,0.03902933249870936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,fp8,0,0.03855466594298681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,4,2,128,0,1,fp8,fp8,0,0.03236799935499827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,float16,0,0.5383466482162476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,128,0,1,fp8,fp8,0,0.4612106482187907
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,fp8,0,0.5363146861394247
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,float16,0,0.548960010210673
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,fp8,0,0.5462933381398519
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,float16,0,0.30407466491063434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,4,2,128,0,1,fp8,fp8,0,0.4832746585210164
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,fp8,0,0.3003893295923869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,4,128,0,1,fp8,fp8,0,0.2698506712913513
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,float16,0,0.27962666749954224
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,fp8,0,0.2770613431930542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,1,128,0,1,fp8,fp8,0,0.23994133869806925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,float16,0,0.2874133388201396
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,fp8,0,0.284335990746816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,4,2,128,0,1,fp8,fp8,0,0.246453324953715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,float16,0,0.16716265678405762
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,fp8,0,0.1637493371963501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,4,128,0,1,fp8,fp8,0,0.13155200084050497
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,float16,0,0.15243200461069742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,fp8,0,0.1514400045077006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,1,128,0,1,fp8,fp8,0,0.1202293336391449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,float16,0,0.15641066431999207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,fp8,0,0.15452266732851663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,4,2,128,0,1,fp8,fp8,0,0.12304000059763591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,float16,0,0.08521599570910136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,fp8,0,0.08339732885360718
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,4,128,0,1,fp8,fp8,0,0.07435200115044911
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,float16,0,0.07482133309046428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,fp8,0,0.075914666056633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,1,128,0,1,fp8,fp8,0,0.06551466882228851
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,float16,0,0.07849066456158955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,fp8,0,0.07894933223724365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,4,2,128,0,1,fp8,fp8,0,0.06774933139483134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,float16,0,0.04664533336957296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,fp8,0,0.04675200084845225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,4,128,0,1,fp8,fp8,0,0.042410666743914284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,float16,0,0.04364799956480662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,fp8,0,0.043247997760772705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,1,128,0,1,fp8,fp8,0,0.03793599953254064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,float16,0,0.04374399781227112
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,fp8,0,0.04367466767628988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,fp8,0,0.03267733256022135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,4,2,128,0,1,fp8,fp8,0,0.0379573330283165
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,float16,0,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,4,128,0,1,fp8,fp8,0,0.029845332105954487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,float16,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,1,128,0,1,fp8,fp8,0,0.028437333802382152
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,float16,0,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,fp8,0,0.03293866664171219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,4,2,128,0,1,fp8,fp8,0,0.02807466685771942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,float16,0,0.030805334448814392
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,fp8,0,0.03065599997838338
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,4,128,0,1,fp8,fp8,0,0.026159999271233875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,float16,0,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,fp8,0,0.029824001093705494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,1,128,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,float16,0,0.030666666726271313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,4,2,128,0,1,fp8,fp8,0,0.025818665822347004
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,float16,0,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,4,128,0,1,fp8,fp8,0,0.024901332954565685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,float16,0,0.028277332584063213
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,fp8,0,0.028959999481836956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,1,128,0,1,fp8,fp8,0,0.02436800052722295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,float16,0,0.029743999242782593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,4,2,128,0,1,fp8,fp8,0,0.024432001014550526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,1,128,0,1,float16,float16,0,0.4318293333053589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,1,128,0,1,float16,fp8,0,0.4298773209253947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,1,128,0,1,fp8,fp8,0,0.37692264715830487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,2,128,0,1,float16,float16,0,0.4434826771418254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,2,128,0,1,fp8,fp8,0,0.396234671274821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,4,2,128,0,1,float16,fp8,0,0.4386133352915446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,4,128,0,1,float16,float16,0,0.2502773404121399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,4,128,0,1,float16,fp8,0,0.24707732597986856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,4,128,0,1,fp8,fp8,0,0.2229386568069458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,1,128,0,1,float16,float16,0,0.22337599595387778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,1,128,0,1,float16,fp8,0,0.22309333086013794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,1,128,0,1,fp8,fp8,0,0.19637332359949747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,2,128,0,1,float16,float16,0,0.2300106684366862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,2,128,0,1,float16,fp8,0,0.22894400358200073
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,4,2,128,0,1,fp8,fp8,0,0.20441067218780518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,4,128,0,1,float16,float16,0,0.13590400417645773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,4,128,0,1,float16,fp8,0,0.13330666224161783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,4,128,0,1,fp8,fp8,0,0.11095466216405232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,1,128,0,1,float16,float16,0,0.12069867054621379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,1,128,0,1,float16,fp8,0,0.1207413375377655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,1,128,0,1,fp8,fp8,0,0.10172266761461894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,2,128,0,1,float16,float16,0,0.1264533301194509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,2,128,0,1,float16,fp8,0,0.125109334786733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,4,2,128,0,1,fp8,fp8,0,0.10260799527168274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,1,128,0,1,fp8,fp8,0,0.05597866574923197
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,4,128,0,1,float16,float16,0,0.0728959987560908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,4,128,0,1,float16,fp8,0,0.07097066442171733
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,4,128,0,1,fp8,fp8,0,0.06520000100135803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,1,128,0,1,float16,float16,0,0.06289066871007283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,1,128,0,1,float16,fp8,0,0.062394668658574425
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,2,128,0,1,float16,float16,0,0.06623999774456024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,2,128,0,1,float16,fp8,0,0.06572799881299336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,4,2,128,0,1,fp8,fp8,0,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,4,128,0,1,float16,float16,0,0.04040000090996424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,4,128,0,1,float16,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,4,128,0,1,fp8,fp8,0,0.03805333375930786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,1,128,0,1,float16,float16,0,0.036730666955312095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,1,128,0,1,float16,fp8,0,0.03640533238649368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,4,128,0,1,float16,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,1,128,0,1,fp8,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,2,128,0,1,float16,float16,0,0.03823466598987579
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,2,128,0,1,float16,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,4,2,128,0,1,fp8,fp8,0,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,2,128,0,1,float16,float16,0,0.026901334524154663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,2,128,0,1,float16,fp8,0,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,4,128,0,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,4,128,0,1,fp8,fp8,0,0.02491733431816101
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,1,128,0,1,float16,float16,0,0.026341333985328674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,1,128,0,1,float16,fp8,0,0.026320000489552815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,1,128,0,1,fp8,fp8,0,0.024069334069887798
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,4,2,128,0,1,fp8,fp8,0,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,4,128,0,1,float16,float16,0,0.024160000185171764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,4,128,0,1,float16,fp8,0,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,4,128,0,1,fp8,fp8,0,0.0216799999276797
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,1,128,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,1,128,0,1,float16,fp8,0,0.023647998770078022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,1,128,0,1,fp8,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,2,128,0,1,float16,float16,0,0.02385599911212921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,1,128,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,2,128,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,4,2,128,0,1,fp8,fp8,0,0.021850667893886566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,4,128,0,1,float16,float16,0,0.022645334402720135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,4,128,0,1,float16,fp8,0,0.022853332261244457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,4,128,0,1,fp8,fp8,0,0.02042666698495547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,1,128,0,1,float16,fp8,0,0.022437334060668945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,1,128,0,1,fp8,fp8,0,0.020442667106787365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,2,128,0,1,float16,float16,0,0.0229066660006841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,2,128,0,1,float16,fp8,0,0.02229333420594533
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,4,2,128,0,1,fp8,fp8,0,0.020725333442290623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,4,128,0,1,float16,float16,0,0.022154666483402252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,4,128,0,1,float16,fp8,0,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,4,128,0,1,fp8,fp8,0,0.019962667177120846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,1,128,0,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,1,128,0,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,1,128,0,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,2,128,0,1,float16,float16,0,0.02048533285657565
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,2,128,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,4,2,128,0,1,fp8,fp8,0,0.020047999918460846
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,1,128,0,1,float16,float16,0,0.19241599241892496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,1,128,0,1,float16,fp8,0,0.1898720065752665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,1,128,0,1,fp8,fp8,0,0.15891200304031372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,4,128,0,1,float16,fp8,0,0.12025599678357442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,2,128,0,1,float16,float16,0,0.20255466302235922
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,2,128,0,1,float16,fp8,0,0.19829867283503214
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,4,2,128,0,1,fp8,fp8,0,0.1685439944267273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,4,128,0,1,float16,float16,0,0.12504532933235168
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,4,128,0,1,fp8,fp8,0,0.10291733344395955
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,1,128,0,1,float16,float16,0,0.10558399558067322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,1,128,0,1,float16,fp8,0,0.10628267129262288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,1,128,0,1,fp8,fp8,0,0.09009599685668945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,2,128,0,1,float16,float16,0,0.11215466260910034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,2,128,0,1,float16,fp8,0,0.10924266775449117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,4,2,128,0,1,fp8,fp8,0,0.09444800019264221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,4,128,0,1,float16,float16,0,0.069541335105896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,4,128,0,1,float16,fp8,0,0.0677706648906072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,4,128,0,1,fp8,fp8,0,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,1,128,0,1,float16,float16,0,0.05579733351866404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,2,128,0,1,fp8,fp8,0,0.05481066803137461
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,1,128,0,1,float16,fp8,0,0.05588266750176748
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,1,128,0,1,fp8,fp8,0,0.050053333242734276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,2,128,0,1,float16,float16,0,0.061199997862180076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,4,2,128,0,1,float16,fp8,0,0.05845866600672404
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,4,128,0,1,float16,float16,0,0.03754133234421412
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,4,128,0,1,float16,fp8,0,0.03729599962631861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,4,128,0,1,fp8,fp8,0,0.036101333796978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,1,128,0,1,float16,float16,0,0.034074666599432625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,1,128,0,1,float16,fp8,0,0.03377600014209747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,1,128,0,1,fp8,fp8,0,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,2,128,0,1,float16,float16,0,0.033786666889985405
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,2,128,0,1,float16,fp8,0,0.03417066733042399
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,4,2,128,0,1,fp8,fp8,0,0.032602667808532715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,4,128,0,1,float16,float16,0,0.024911999702453613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,4,128,0,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,4,128,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,1,128,0,1,float16,float16,0,0.023685333629449207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,1,128,0,1,float16,fp8,0,0.022842665513356526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,1,128,0,1,fp8,fp8,0,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,2,128,0,1,float16,float16,0,0.02436800052722295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,2,128,0,1,float16,fp8,0,0.0239680012067159
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,4,2,128,0,1,fp8,fp8,0,0.021984001000722248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,4,128,0,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,4,128,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,4,128,0,1,fp8,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,1,128,0,1,float16,float16,0,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,1,128,0,1,float16,fp8,0,0.01988799994190534
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,4,128,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,1,128,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,2,128,0,1,float16,float16,0,0.02011200040578842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,2,128,0,1,float16,fp8,0,0.02013333390156428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,4,2,128,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,4,128,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,4,128,0,1,fp8,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,1,128,0,1,float16,float16,0,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,1,128,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,1,128,0,1,fp8,fp8,0,0.017978666971127193
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,2,128,0,1,float16,float16,0,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,2,128,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,4,2,128,0,1,fp8,fp8,0,0.017840000490347546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,4,128,0,1,float16,float16,0,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,4,128,0,1,float16,fp8,0,0.01836266616980235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,4,128,0,1,fp8,fp8,0,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,4,128,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,1,128,0,1,float16,float16,0,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,1,128,0,1,float16,fp8,0,0.018357332795858383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,1,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,2,128,0,1,float16,float16,0,0.018458666900793713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,2,128,0,1,float16,fp8,0,0.01800000046690305
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,4,2,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,4,128,0,1,float16,fp8,0,0.01802666609485944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,4,128,0,1,fp8,fp8,0,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,1,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,1,128,0,1,float16,fp8,0,0.017722666263580322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,1,128,0,1,fp8,fp8,0,0.01640533283352852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,2,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,2,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,4,2,128,0,1,fp8,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,1,128,0,1,float16,float16,0,0.10016533732414246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,1,128,0,1,float16,fp8,0,0.0974666674931844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,1,128,0,1,fp8,fp8,0,0.08864532907803853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,2,128,0,1,float16,float16,0,0.1085653305053711
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,2,128,0,1,float16,fp8,0,0.10291199882825215
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,4,2,128,0,1,fp8,fp8,0,0.0957493285338084
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,4,128,0,1,float16,float16,0,0.06548266609509786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,4,128,0,1,float16,fp8,0,0.06387199958165486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,4,128,0,1,fp8,fp8,0,0.059119999408721924
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,1,128,0,1,float16,float16,0,0.053317333261171974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,1,128,0,1,float16,fp8,0,0.05453866720199585
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,1,128,0,1,fp8,fp8,0,0.05101866523424784
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,2,128,0,1,float16,float16,0,0.057301332553227745
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,2,128,0,1,float16,fp8,0,0.056549335519472756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,4,2,128,0,1,fp8,fp8,0,0.05554133156935374
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,4,128,0,1,float16,float16,0,0.035946667194366455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,4,128,0,1,float16,fp8,0,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,4,128,0,1,fp8,fp8,0,0.03467733412981033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,1,128,0,1,float16,float16,0,0.032858667274316154
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,1,128,0,1,float16,fp8,0,0.03265066693226496
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,1,128,0,1,fp8,fp8,0,0.030069333811601002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,4,128,0,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,2,128,0,1,float16,float16,0,0.0340693344672521
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,2,128,0,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,4,2,128,0,1,fp8,fp8,0,0.032101333141326904
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,4,128,0,1,float16,float16,0,0.023962666591008503
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,4,128,0,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,4,128,0,1,float16,float16,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,1,128,0,1,float16,float16,0,0.022783999641736347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,1,128,0,1,float16,fp8,0,0.022800001005331676
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,1,128,0,1,fp8,fp8,0,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,2,128,0,1,float16,float16,0,0.023599999646345775
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,2,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,1,128,0,1,float16,fp8,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,4,2,128,0,1,fp8,fp8,0,0.022533332308133442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,4,128,0,1,float16,fp8,0,0.018640000373125076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,4,128,0,1,fp8,fp8,0,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,1,128,0,1,float16,float16,0,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,1,128,0,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,2,128,0,1,float16,float16,0,0.01869333287080129
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,2,128,0,1,float16,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,4,2,128,0,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,4,128,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,4,128,0,1,float16,fp8,0,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,4,128,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,1,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,1,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,1,128,0,1,fp8,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,2,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,2,128,0,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,4,2,128,0,1,fp8,fp8,0,0.01770666614174843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,4,128,0,1,float16,float16,0,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,4,128,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,4,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,1,128,0,1,float16,float16,0,0.016501333564519882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,2,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,1,128,0,1,float16,fp8,0,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,1,128,0,1,fp8,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,2,128,0,1,float16,float16,0,0.016293333222468693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,4,2,128,0,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,4,128,0,1,float16,float16,0,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,4,128,0,1,float16,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,4,128,0,1,fp8,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,1,128,0,1,float16,float16,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,1,128,0,1,float16,fp8,0,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,1,128,0,1,fp8,fp8,0,0.016442666451136272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,2,128,0,1,float16,float16,0,0.016330666840076447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,2,128,0,1,float16,fp8,0,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,4,2,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,4,128,0,1,float16,float16,0,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,4,128,0,1,float16,fp8,0,0.01658133293191592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,4,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,1,128,0,1,float16,float16,0,0.01565333331624667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,1,128,0,1,float16,fp8,0,0.01613333324591319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,1,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,2,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,2,128,0,1,float16,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,4,2,128,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,1,128,0,1,float16,float16,0,0.05804799993832906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,1,128,0,1,float16,fp8,0,0.057562669118245445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,1,128,0,1,fp8,fp8,0,0.06618133187294006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,2,128,0,1,float16,float16,0,0.061386664708455406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,2,128,0,1,float16,fp8,0,0.06113600234190623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,4,2,128,0,1,fp8,fp8,0,0.06935999790827434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,4,128,0,1,float16,float16,0,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,4,128,0,1,float16,fp8,0,0.03729599962631861
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,4,128,0,1,fp8,fp8,0,0.04367466767628988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,1,128,0,1,float16,float16,0,0.03453866640726725
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,1,128,0,1,float16,fp8,0,0.03403199960788091
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,4,128,0,1,float16,fp8,0,0.024746666351954143
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,1,128,0,1,fp8,fp8,0,0.037871999045213066
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,2,128,0,1,float16,float16,0,0.035877334574858345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,2,128,0,1,float16,fp8,0,0.03482133398453394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,4,2,128,0,1,fp8,fp8,0,0.03998400022586187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,4,128,0,1,float16,float16,0,0.024613333245118458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,4,128,0,1,fp8,fp8,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,1,128,0,1,float16,float16,0,0.02369066576162974
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,1,128,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,1,128,0,1,fp8,fp8,0,0.02476799984773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,2,128,0,1,float16,float16,0,0.02384000023206075
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,2,128,0,1,float16,fp8,0,0.023717333873112995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,4,2,128,0,1,fp8,fp8,0,0.025754667818546295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,4,128,0,1,float16,float16,0,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,4,128,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,4,128,0,1,fp8,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,1,128,0,1,float16,float16,0,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,1,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,1,128,0,1,fp8,fp8,0,0.018458666900793713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,2,128,0,1,float16,float16,0,0.017909333109855652
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,2,128,0,1,float16,fp8,0,0.01826133330663045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,4,2,128,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,4,128,0,1,float16,float16,0,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,4,128,0,1,float16,fp8,0,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,4,128,0,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,1,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,1,128,0,1,float16,fp8,0,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,1,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,2,128,0,1,float16,float16,0,0.016415999581416447
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,2,128,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,4,2,128,0,1,fp8,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,4,128,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,4,128,0,1,float16,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,4,128,0,1,fp8,fp8,0,0.01635733370979627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,1,128,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,1,128,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,1,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,2,128,0,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,2,128,0,1,float16,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,4,2,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,4,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,4,128,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,4,128,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,1,128,0,1,float16,float16,0,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,1,128,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,1,128,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,2,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,2,128,0,1,float16,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,4,2,128,0,1,fp8,fp8,0,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,4,128,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,4,128,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,4,128,0,1,fp8,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,1,128,0,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,1,128,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,1,128,0,1,fp8,fp8,0,0.016490666816631954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,2,128,0,1,float16,float16,0,0.014426667243242264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,2,128,0,1,float16,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,4,2,128,0,1,fp8,fp8,0,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,4,128,0,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,4,128,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,4,128,0,1,fp8,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,1,128,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,1,128,0,1,float16,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,1,128,0,1,fp8,fp8,0,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,2,128,0,1,float16,float16,0,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,2,128,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,4,2,128,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,2,128,0,1,float16,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,1,128,0,1,float16,float16,0,0.04206933577855428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,1,128,0,1,float16,fp8,0,0.04162666698296865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,1,128,0,1,fp8,fp8,0,0.05222400029500326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,2,128,0,1,float16,float16,0,0.04267199834187826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,1,128,0,1,float16,fp8,0,0.026719999810059864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,4,2,128,0,1,fp8,fp8,0,0.0547626664241155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,4,128,0,1,float16,float16,0,0.02829333394765854
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,4,128,0,1,fp8,fp8,0,0.03380800038576126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,4,128,0,1,float16,fp8,0,0.028602667152881622
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,1,128,0,1,float16,float16,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,1,128,0,1,fp8,fp8,0,0.03259200106064478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,2,128,0,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,2,128,0,1,float16,fp8,0,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,4,2,128,0,1,fp8,fp8,0,0.033770665526390076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,4,128,0,1,float16,float16,0,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,4,128,0,1,float16,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,4,128,0,1,fp8,fp8,0,0.02372266600529353
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,1,128,0,1,float16,float16,0,0.019776000330845516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,1,128,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,1,128,0,1,fp8,fp8,0,0.022554665803909302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,2,128,0,1,float16,float16,0,0.01970133309563001
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,2,128,0,1,float16,fp8,0,0.020010666300853092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,4,2,128,0,1,fp8,fp8,0,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,4,128,0,1,float16,float16,0,0.015962666521469753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,4,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,4,128,0,1,fp8,fp8,0,0.018506667266289394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,1,128,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,1,128,0,1,float16,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,1,128,0,1,fp8,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,2,128,0,1,float16,float16,0,0.01595199977358182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,2,128,0,1,float16,fp8,0,0.016117333124081295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,4,2,128,0,1,fp8,fp8,0,0.017743999759356182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,4,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,4,128,0,1,float16,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,4,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,1,128,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,1,128,0,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,1,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,2,128,0,1,float16,float16,0,0.01463466634353002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,2,128,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,4,2,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,4,128,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,4,128,0,1,float16,fp8,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,4,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,1,128,0,1,float16,float16,0,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,1,128,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,1,128,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,2,128,0,1,float16,float16,0,0.014256000518798828
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,2,128,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,4,2,128,0,1,fp8,fp8,0,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,4,128,0,1,float16,float16,0,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,4,128,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,4,128,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,1,128,0,1,float16,float16,0,0.014453332871198654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,1,128,0,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,1,128,0,1,fp8,fp8,0,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,2,128,0,1,float16,float16,0,0.01434133326013883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,2,128,0,1,float16,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,4,2,128,0,1,fp8,fp8,0,0.016442666451136272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,4,128,0,1,float16,float16,0,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,4,128,0,1,float16,fp8,0,0.014544000228246054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,4,128,0,1,fp8,fp8,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,1,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,1,128,0,1,float16,fp8,0,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,1,128,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,2,128,0,1,float16,float16,0,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,2,128,0,1,float16,fp8,0,0.013973332941532135
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,1,128,0,1,fp8,fp8,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,4,2,128,0,1,fp8,fp8,0,0.016501333564519882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,4,128,0,1,float16,float16,0,0.014309333016475042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,4,128,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,4,128,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,1,128,0,1,float16,float16,0,0.013584000368913015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,1,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,2,128,0,1,float16,float16,0,0.01360000049074491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,2,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,4,2,128,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,1,128,0,1,float16,float16,0,0.03734933336575826
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,1,128,0,1,float16,fp8,0,0.03719466676314672
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,1,128,0,1,fp8,fp8,0,0.047354668378829956
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,2,128,0,1,float16,float16,0,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,4,128,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,2,128,0,1,float16,fp8,0,0.03664000084002813
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,4,2,128,0,1,fp8,fp8,0,0.0488319993019104
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,4,128,0,1,float16,float16,0,0.02497600018978119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,4,128,0,1,fp8,fp8,0,0.0315733328461647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,1,128,0,1,float16,float16,0,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,1,128,0,1,float16,fp8,0,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,1,128,0,1,fp8,fp8,0,0.030458666384220123
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,2,128,0,1,float16,float16,0,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,2,128,0,1,float16,fp8,0,0.026922665536403656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,4,2,128,0,1,fp8,fp8,0,0.03088533381621043
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,4,128,0,1,float16,float16,0,0.0186666672428449
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,4,128,0,1,float16,fp8,0,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,4,128,0,1,fp8,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,1,128,0,1,float16,float16,0,0.01815466706951459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,1,128,0,1,float16,fp8,0,0.018277333428462345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,1,128,0,1,fp8,fp8,0,0.022287999590237934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,4,128,0,1,float16,fp8,0,0.015664000064134598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,2,128,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,2,128,0,1,float16,fp8,0,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,1,128,0,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,4,2,128,0,1,fp8,fp8,0,0.021712000171343487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,2,128,0,1,float16,float16,0,0.015685333559910457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,4,128,0,1,float16,float16,0,0.015935999651749928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,4,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,1,128,0,1,float16,float16,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,1,128,0,1,fp8,fp8,0,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,2,128,0,1,float16,fp8,0,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,4,2,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,4,128,0,1,float16,float16,0,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,4,128,0,1,float16,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,4,128,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,1,128,0,1,float16,float16,0,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,1,128,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,1,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,2,128,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,2,128,0,1,float16,fp8,0,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,4,2,128,0,1,fp8,fp8,0,0.016634666671355564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,4,128,0,1,float16,float16,0,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,4,128,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,4,128,0,1,fp8,fp8,0,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,1,128,0,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,1,128,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,1,128,0,1,fp8,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,2,128,0,1,float16,float16,0,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,1,128,0,1,float16,fp8,0,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,2,128,0,1,float16,fp8,0,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,4,2,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,4,128,0,1,float16,float16,0,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,4,128,0,1,float16,fp8,0,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,4,128,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,4,128,0,1,float16,float16,0,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,1,128,0,1,float16,float16,0,0.013989333063364029
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,1,128,0,1,fp8,fp8,0,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,2,128,0,1,float16,float16,0,0.013999999811251959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,2,128,0,1,float16,fp8,0,0.014442666123310724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,4,2,128,0,1,fp8,fp8,0,0.016634666671355564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,4,128,0,1,float16,fp8,0,0.014021333307027817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,4,128,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,1,128,0,1,float16,float16,0,0.01431999976436297
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,1,128,0,1,float16,fp8,0,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,1,128,0,1,fp8,fp8,0,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,2,128,0,1,float16,float16,0,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,2,128,0,1,float16,fp8,0,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,4,2,128,0,1,fp8,fp8,0,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,4,128,0,1,float16,float16,0,0.013999999811251959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,4,128,0,1,float16,fp8,0,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,4,128,0,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,1,128,0,1,float16,float16,0,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,1,128,0,1,float16,fp8,0,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,1,128,0,1,fp8,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,2,128,0,1,float16,float16,0,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,2,128,0,1,float16,fp8,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,4,2,128,0,1,fp8,fp8,0,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,float16,0,1.08460267384847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,float16,0,2.0897653897603354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,2,1,128,0,1,fp8,fp8,0,1.3969813982645671
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,fp8,0,1.0887466271718342
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,fp8,0,1.9790080388387044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,2,128,0,1,fp8,fp8,0,0.6806560357411703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,float16,0,1.0840426286061604
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,fp8,0,1.0790239969889324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,2,1,128,0,1,fp8,fp8,0,0.6701813538869222
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,float16,0,0.5018826723098755
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,fp8,0,0.4931466579437256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,2,128,0,1,fp8,fp8,0,0.35226134459177655
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,float16,0,0.48874131838480633
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,fp8,0,0.4870826800664266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,2,1,128,0,1,fp8,fp8,0,0.35172800223032635
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,float16,0,0.25971200068791706
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,fp8,0,0.2592639923095703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,2,128,0,1,fp8,fp8,0,0.19267199436823526
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,float16,0,0.2577706575393677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,fp8,0,0.2567946712176005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,2,1,128,0,1,fp8,fp8,0,0.18785599867502847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,float16,0,1.2015893459320068
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,2,1,128,0,1,fp8,fp8,0,0.7836639881134033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,fp8,0,1.2029600143432617
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,float16,0,0.5682026545206705
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,fp8,0,0.5699306726455688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,2,128,0,1,fp8,fp8,0,0.40835734208424884
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,float16,0,0.5609546502431234
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,fp8,0,0.558079997698466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,2,1,128,0,1,fp8,fp8,0,0.4052799940109253
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,float16,0,0.3168320059776306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,fp8,0,0.3216533263524373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,2,128,0,1,fp8,fp8,0,0.2347360054651896
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,float16,0,0.31143466631571454
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,fp8,0,0.3170986572901408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,2,1,128,0,1,fp8,fp8,0,0.2281279961268107
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,float16,0,0.19919466972351074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,fp8,0,0.2002453406651815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,2,128,0,1,fp8,fp8,0,0.14604266484578451
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,float16,0,0.198527991771698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,fp8,0,0.19865065813064575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,float16,0,0.411626656850179
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,2,1,128,0,1,fp8,fp8,0,0.14460800091425577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,float16,0,0.8692106405893961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,fp8,0,0.8711840311686198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,10240,2,1,128,0,1,fp8,fp8,0,0.5653546651204427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,2,128,0,1,fp8,fp8,0,0.3056853413581848
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,fp8,0,0.4174453417460124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,float16,0,0.40646934509277344
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,fp8,0,0.40745067596435547
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,2,1,128,0,1,fp8,fp8,0,0.2985386649767558
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,float16,0,0.22988800207773843
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,fp8,0,0.23197333017985025
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,2,128,0,1,fp8,fp8,0,0.17359999815622965
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,fp8,0,0.16826132933298746
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,float16,0,0.22600533564885458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,fp8,0,0.22878400484720865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,2,1,128,0,1,fp8,fp8,0,0.16791999340057373
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,float16,0,0.16703999042510986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,2,128,0,1,fp8,fp8,0,0.1237440009911855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,float16,0,0.16581333676973978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,fp8,0,0.16817599534988403
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,2,1,128,0,1,fp8,fp8,0,0.12166399757067363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,float16,0,1.0984906355539958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,2,1,128,0,1,fp8,fp8,0,0.7873439788818359
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,fp8,0,1.0978826681772869
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,float16,0,0.5984959999720255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,2,128,0,1,fp8,fp8,0,0.40184001127878827
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,fp8,0,0.6066720088322958
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,float16,0,0.5967359940210978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,fp8,0,0.5887413422266642
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,2,1,128,0,1,fp8,fp8,0,0.38891732692718506
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,float16,0,0.27453333139419556
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,fp8,0,0.2800053358078003
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,2,128,0,1,fp8,fp8,0,0.2095253268877665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,float16,0,0.26898133754730225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,fp8,0,0.2749279936154683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,2,1,128,0,1,fp8,fp8,0,0.20542399088541666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,float16,0,0.14477333426475525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,fp8,0,0.1479093333085378
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,2,128,0,1,fp8,fp8,0,0.11288000146547954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,float16,0,0.14458666245142618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,fp8,0,0.14532267053922018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,2,1,128,0,1,fp8,fp8,0,0.10854933659235637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,float16,0,0.13576533397038779
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,fp8,0,0.13709333539009094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,2,128,0,1,fp8,fp8,0,0.10033067067464192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,float16,0,0.1351626714070638
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,fp8,0,0.13564800222714743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,2,1,128,0,1,fp8,fp8,0,0.10001066327095032
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,float16,0,0.6671199798583984
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,fp8,0,0.6652746597925822
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,6144,2,1,128,0,1,fp8,fp8,0,0.4636960029602051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,float16,0,0.3250719904899597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,1,128,0,1,fp8,fp8,0,0.24466667572657266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,fp8,0,0.32420265674591064
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,2,128,0,1,fp8,fp8,0,0.2512960036595662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,float16,0,0.31493866443634033
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,fp8,0,0.320634663105011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,float16,0,0.17917867501576742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,fp8,0,0.18293333053588867
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,2,128,0,1,fp8,fp8,0,0.1418826679388682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,2,128,0,1,fp8,fp8,0,0.08662399649620056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,float16,0,0.17602133750915527
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,fp8,0,0.17819199959437051
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,2,1,128,0,1,fp8,fp8,0,0.1365066667397817
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,float16,0,0.10883200168609619
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,fp8,0,0.11157866319020589
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,float16,0,0.10913599530855815
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,fp8,0,0.11097066601117452
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,2,1,128,0,1,fp8,fp8,0,0.08231999973456065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,float16,0,0.10512000322341919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,fp8,0,0.10501333077748616
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,2,128,0,1,fp8,fp8,0,0.07813866436481476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,float16,0,0.10425600409507751
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,fp8,0,0.10579199592272441
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,2,1,128,0,1,fp8,fp8,0,0.07787733276685078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,float16,0,0.35319467385609943
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,float16,0,0.6524213155110677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,fp8,0,0.6469013293584188
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,4096,2,1,128,0,1,fp8,fp8,0,0.490389347076416
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,fp8,0,0.35655999183654785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,2,128,0,1,fp8,fp8,0,0.25701866547266644
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,float16,0,0.35019731521606445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,fp8,0,0.3465760151545207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,2,1,128,0,1,fp8,fp8,0,0.24481600522994995
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,float16,0,0.1669173240661621
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,fp8,0,0.17029867569605509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,2,128,0,1,fp8,fp8,0,0.13660800457000732
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,float16,0,0.16214932998021445
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,fp8,0,0.16395733753840128
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,2,1,128,0,1,fp8,fp8,0,0.13159466783205667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,float16,0,0.09097599983215332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,fp8,0,0.09108266234397888
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,2,128,0,1,fp8,fp8,0,0.0774186650911967
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,float16,0,0.08589866757392883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,fp8,0,0.08746666709582011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,2,1,128,0,1,fp8,fp8,0,0.07190933326880138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,float16,0,0.07685333490371704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,fp8,0,0.07865066826343536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,2,128,0,1,fp8,fp8,0,0.06100266675154368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,float16,0,0.07690133154392242
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,2,128,0,1,fp8,fp8,0,0.05741333464781443
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,fp8,0,0.07678933441638947
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,2,1,128,0,1,fp8,fp8,0,0.05914133290449778
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,float16,0,0.07461333274841309
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,fp8,0,0.07448000212510426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,float16,0,0.07364800075689952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,fp8,0,0.07443733513355255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,fp8,0,0.4054453372955322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,2,1,128,0,1,fp8,fp8,0,0.056789333621660866
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,float16,0,0.403989315032959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,float16,0,0.20466132958730063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,3072,2,1,128,0,1,fp8,fp8,0,0.30940266450246173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,fp8,0,0.20653865734736124
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,2,128,0,1,fp8,fp8,0,0.1715466578801473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,float16,0,0.19671465953191122
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,fp8,0,0.20055466890335083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,float16,0,0.11176000038782756
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,2,1,128,0,1,fp8,fp8,0,0.16134933630625406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,fp8,0,0.11447466413180034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,2,128,0,1,fp8,fp8,0,0.09631466865539551
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,float16,0,0.1079253355662028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,fp8,0,0.1093386709690094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,2,1,128,0,1,fp8,fp8,0,0.08963732918103536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,float16,0,0.06655466556549072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,fp8,0,0.06729599833488464
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,2,128,0,1,fp8,fp8,0,0.0569706658522288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,float16,0,0.06519466638565063
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,fp8,0,0.06595199803511302
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,2,1,128,0,1,fp8,fp8,0,0.05299733579158783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,1,128,0,1,fp8,fp8,0,0.048154667019844055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,float16,0,0.059338668982187905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,float16,0,0.062224000692367554
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,fp8,0,0.061887999375661217
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,2,128,0,1,fp8,fp8,0,0.049360002080599465
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,float16,0,0.06063466767470042
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,fp8,0,0.06162666777769724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,fp8,0,0.058543999989827476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,2,128,0,1,fp8,fp8,0,0.046623999873797096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,float16,0,0.05912533402442932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,fp8,0,0.05957333246866862
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,2,1,128,0,1,fp8,fp8,0,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,float16,0,0.42335466543833417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,fp8,0,0.4190666675567627
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,float16,0,0.23191465934117636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,1,128,0,1,fp8,fp8,0,0.173418660958608
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,2048,2,1,128,0,1,fp8,fp8,0,0.34277331829071045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,fp8,0,0.23257599274317423
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,2,128,0,1,fp8,fp8,0,0.18689066171646118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,float16,0,0.22672533988952637
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,fp8,0,0.224506676197052
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,1,128,0,1,fp8,fp8,0,0.09591466188430786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,float16,0,0.11456533273061116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,float16,0,0.06334400177001953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,fp8,0,0.06444266438484192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,fp8,0,0.11563199758529663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,2,128,0,1,fp8,fp8,0,0.10174399614334106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,float16,0,0.10936533411343892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,fp8,0,0.11045866211255391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,2,128,0,1,fp8,fp8,0,0.05949866771697998
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,float16,0,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,fp8,0,0.061280002196629844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,2,1,128,0,1,fp8,fp8,0,0.05351999898751577
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,1,128,0,1,fp8,fp8,0,0.04031999905904134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,float16,0,0.049039999643961586
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,fp8,0,0.050623998045921326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,2,128,0,1,fp8,fp8,0,0.04162133236726125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,float16,0,0.04916266600290934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,fp8,0,0.050000001986821495
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,float16,0,0.04706666866938273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,fp8,0,0.04641599953174591
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,2,128,0,1,fp8,fp8,0,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,float16,0,0.04574400186538696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,fp8,0,0.0466186652580897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,2,1,128,0,1,fp8,fp8,0,0.03698666642109553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,float16,0,0.043951998154322304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,fp8,0,0.044639999667803444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,2,128,0,1,fp8,fp8,0,0.036362667878468834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,float16,0,0.04463466505209605
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,fp8,0,0.0444160004456838
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,2,1,128,0,1,fp8,fp8,0,0.034847999612490334
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,float16,0,0.2701546748479207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,fp8,0,0.27132266759872437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1536,2,1,128,0,1,fp8,fp8,0,0.22466667493184408
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,float16,0,0.14578133821487427
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,fp8,0,0.14814933141072592
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,2,128,0,1,fp8,fp8,0,0.13033599654833475
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,float16,0,0.13372266292572021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,fp8,0,0.1378613313039144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,2,1,128,0,1,fp8,fp8,0,0.12146666646003723
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,float16,0,0.07894399762153625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,fp8,0,0.08077333370844524
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,2,128,0,1,fp8,fp8,0,0.07306133210659027
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,float16,0,0.07453866799672444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,fp8,0,0.07623466849327087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,2,1,128,0,1,fp8,fp8,0,0.06875733534495036
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,float16,0,0.04560533165931702
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,fp8,0,0.0469813346862793
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,2,128,0,1,fp8,fp8,0,0.042117332418759666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,float16,0,0.04410133262475332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,fp8,0,0.044826666514078774
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,2,1,128,0,1,fp8,fp8,0,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,float16,0,0.039962666730086006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,fp8,0,0.04073066761096319
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,2,128,0,1,fp8,fp8,0,0.0349386657277743
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,float16,0,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,fp8,0,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,2,1,128,0,1,fp8,fp8,0,0.0329066663980484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,float16,0,0.037018666664759316
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,fp8,0,0.037946666280428566
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,2,128,0,1,fp8,fp8,0,0.031034665803114574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,float16,0,0.037130666275819145
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,2,1,128,0,1,fp8,fp8,0,0.0310506671667099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,fp8,0,0.03585066646337509
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,float16,0,0.03646933287382126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,fp8,0,0.28299200534820557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,2,128,0,1,fp8,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,float16,0,0.03676799933115641
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,fp8,0,0.036101333796978
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,2,1,128,0,1,fp8,fp8,0,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,float16,0,0.2866719961166382
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,1024,2,1,128,0,1,fp8,fp8,0,0.24355200926462808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,float16,0,0.1639253298441569
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,fp8,0,0.16193600495656332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,2,128,0,1,fp8,fp8,0,0.12969066699345908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,float16,0,0.1553386648495992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,fp8,0,0.1544426679611206
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,2,1,128,0,1,fp8,fp8,0,0.12108799815177917
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,float16,0,0.0864586631457011
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,fp8,0,0.08458667000134786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,2,128,0,1,fp8,fp8,0,0.07425066828727722
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,float16,0,0.07884266475836436
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,fp8,0,0.07857066889603932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,2,1,128,0,1,fp8,fp8,0,0.06803733110427856
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,float16,0,0.047242666284243263
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,fp8,0,0.046896000703175865
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,fp8,0,0.03381866713364919
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,2,128,0,1,fp8,fp8,0,0.04279466470082601
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,float16,0,0.044165333112080894
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,fp8,0,0.04436799883842468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,2,1,128,0,1,fp8,fp8,0,0.03882666677236557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,float16,0,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,2,128,0,1,fp8,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,float16,0,0.03309866786003113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,2,1,128,0,1,fp8,fp8,0,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,float16,0,0.029968000948429108
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,fp8,0,0.03070933371782303
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,2,128,0,1,fp8,fp8,0,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,float16,0,0.03010133405526479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,fp8,0,0.030250666042168934
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,2,1,128,0,1,fp8,fp8,0,0.02622933437426885
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,float16,0,0.02956266701221466
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,fp8,0,0.028954667349656422
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,2,128,0,1,fp8,fp8,0,0.024085332949956257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,float16,0,0.029631999631722767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,2,1,128,0,1,fp8,fp8,0,0.024271999796231587
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,float16,0,0.028229333460330963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,2,128,0,1,fp8,fp8,0,0.024501333634058636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,2,1,128,0,1,float16,float16,0,0.22819199164708456
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,float16,0,0.02808533360560735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,2,1,128,0,1,fp8,fp8,0,0.024362665911515553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,2,1,128,0,1,float16,fp8,0,0.2259946664174398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,1,128,0,1,float16,fp8,0,0.1220853328704834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,2,128,0,1,float16,float16,0,0.13622400164604187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,512,2,1,128,0,1,fp8,fp8,0,0.20085332791010538
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,2,128,0,1,float16,fp8,0,0.13333866993586221
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,2,128,0,1,fp8,fp8,0,0.11090133587519328
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,1,128,0,1,float16,float16,0,0.1244586706161499
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,2,128,0,1,float16,float16,0,0.07351999978224437
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,2,1,128,0,1,fp8,fp8,0,0.10160000125567119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,2,128,0,1,float16,fp8,0,0.07168533404668172
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,2,128,0,1,fp8,fp8,0,0.06387733419736226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,1,128,0,1,float16,float16,0,0.06736533343791962
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,1,128,0,1,float16,fp8,0,0.06600533425807953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,1,128,0,1,float16,float16,0,0.038191998998324074
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,2,1,128,0,1,fp8,fp8,0,0.058693334460258484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,2,128,0,1,float16,float16,0,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,1,128,0,1,fp8,fp8,0,0.03430933256944021
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,2,128,0,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,2,128,0,1,float16,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,2,128,0,1,fp8,fp8,0,0.03789333254098892
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,2,1,128,0,1,float16,fp8,0,0.037989333271980286
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,2,128,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,2,128,0,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,1,128,0,1,float16,float16,0,0.02666666607062022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,2,128,0,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,1,128,0,1,float16,fp8,0,0.026837334036827087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,1,128,0,1,float16,fp8,0,0.023823998868465424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,2,1,128,0,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,2,128,0,1,float16,float16,0,0.022143999735514324
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,2,128,0,1,float16,float16,0,0.024293333292007446
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,2,128,0,1,float16,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,1,128,0,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,2,1,128,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,2,128,0,1,float16,fp8,0,0.02274133265018463
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,2,128,0,1,float16,float16,0,0.022154666483402252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,2,128,0,1,fp8,fp8,0,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,1,128,0,1,float16,float16,0,0.021898667017618816
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,1,128,0,1,float16,fp8,0,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,2,1,128,0,1,fp8,fp8,0,0.019765333582957584
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,2,128,0,1,float16,fp8,0,0.021781332790851593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,2,128,0,1,fp8,fp8,0,0.019866666446129482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,1,128,0,1,float16,float16,0,0.021562665700912476
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,1,128,0,1,float16,fp8,0,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,2,1,128,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,2,128,0,1,float16,float16,0,0.02056533346573512
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,2,128,0,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,2,128,0,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,1,128,0,1,float16,float16,0,0.020703999946514767
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,1,128,0,1,float16,fp8,0,0.020831999679406483
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,2,1,128,0,1,fp8,fp8,0,0.019679999599854153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,2,1,128,0,1,float16,float16,0,0.10851200421651204
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,2,128,0,1,fp8,fp8,0,0.06052800019582113
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,2,1,128,0,1,float16,fp8,0,0.10802666346232097
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,2,128,0,1,float16,float16,0,0.06967466572920482
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,256,2,1,128,0,1,fp8,fp8,0,0.09204266468683879
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,2,128,0,1,float16,fp8,0,0.06816000243028005
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,1,128,0,1,float16,float16,0,0.06083733340104421
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,1,128,0,1,float16,fp8,0,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,2,1,128,0,1,fp8,fp8,0,0.05283733208974203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,2,128,0,1,float16,float16,0,0.036837334434191384
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,2,128,0,1,float16,fp8,0,0.0371573343873024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,2,128,0,1,fp8,fp8,0,0.03522666543722153
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,1,128,0,1,float16,float16,0,0.03462400039037069
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,1,128,0,1,float16,fp8,0,0.03408000121514002
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,2,1,128,0,1,fp8,fp8,0,0.03199466566244761
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,2,128,0,1,float16,float16,0,0.025583999852339428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,2,128,0,1,float16,fp8,0,0.024538666009902954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,2,128,0,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,1,128,0,1,float16,float16,0,0.02470933397610982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,1,128,0,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,2,1,128,0,1,fp8,fp8,0,0.02199999988079071
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,2,128,0,1,float16,float16,0,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,2,128,0,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,2,128,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,1,128,0,1,float16,float16,0,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,1,128,0,1,float16,fp8,0,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,2,1,128,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,2,128,0,1,float16,float16,0,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,2,128,0,1,float16,fp8,0,0.019823999454577763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,2,128,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,2,128,0,1,fp8,fp8,0,0.017759999881188076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,1,128,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,1,128,0,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,2,1,128,0,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,2,128,0,1,float16,float16,0,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,2,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,1,128,0,1,float16,float16,0,0.018719999740521114
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,1,128,0,1,float16,fp8,0,0.01783466711640358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,2,1,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,2,128,0,1,float16,float16,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,2,128,0,1,float16,fp8,0,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,2,128,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,1,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,1,128,0,1,float16,fp8,0,0.017616000026464462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,2,1,128,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,2,128,0,1,fp8,fp8,0,0.016634666671355564
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,1,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,1,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,2,1,128,0,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,2,1,128,0,1,float16,float16,0,0.05807999769846598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,2,1,128,0,1,float16,fp8,0,0.05735999842484792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,2,128,0,1,float16,float16,0,0.03660800059636434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,128,2,1,128,0,1,fp8,fp8,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,2,128,0,1,float16,fp8,0,0.035455999275048576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,2,128,0,1,fp8,fp8,0,0.03587199995915095
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,1,128,0,1,float16,float16,0,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,1,128,0,1,float16,fp8,0,0.03377600014209747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,2,1,128,0,1,fp8,fp8,0,0.03204799940188726
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,2,128,0,1,float16,float16,0,0.024282666544119518
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,2,128,0,1,float16,fp8,0,0.023845332364241283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,2,128,0,1,fp8,fp8,0,0.02275199939807256
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,1,128,0,1,float16,float16,0,0.02380799998839696
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,1,128,0,1,float16,fp8,0,0.024170666933059692
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,2,1,128,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,2,128,0,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,2,128,0,1,float16,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,2,128,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,1,128,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,2,128,0,1,fp8,fp8,0,0.01806933308641116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,1,128,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,2,1,128,0,1,fp8,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,2,128,0,1,float16,float16,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,2,128,0,1,float16,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,1,128,0,1,float16,float16,0,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,1,128,0,1,float16,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,2,1,128,0,1,fp8,fp8,0,0.017749333133300144
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,2,128,0,1,float16,float16,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,2,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,2,128,0,1,fp8,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,1,128,0,1,float16,float16,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,1,128,0,1,float16,fp8,0,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,2,1,128,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,2,128,0,1,float16,float16,0,0.01653333380818367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,2,128,0,1,float16,fp8,0,0.01655999943614006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,2,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,1,128,0,1,float16,float16,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,1,128,0,1,float16,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,2,1,128,0,1,fp8,fp8,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,2,128,0,1,float16,float16,0,0.016058667252461117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,2,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,2,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,1,128,0,1,float16,float16,0,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,1,128,0,1,float16,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,2,1,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,2,128,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,2,128,0,1,float16,fp8,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,2,128,0,1,fp8,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,1,128,0,1,float16,float16,0,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,1,128,0,1,float16,fp8,0,0.01642666632930438
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,2,1,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,2,1,128,0,1,float16,float16,0,0.0354720006386439
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,2,1,128,0,1,fp8,fp8,0,0.03939199944337209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,64,2,1,128,0,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,2,128,0,1,float16,float16,0,0.024885334074497223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,2,128,0,1,float16,fp8,0,0.0249439999461174
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,2,128,0,1,fp8,fp8,0,0.026954665780067444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,1,128,0,1,float16,float16,0,0.024405332903067272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,1,128,0,1,float16,fp8,0,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,2,1,128,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,2,128,0,1,float16,float16,0,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,2,128,0,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,2,128,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,1,128,0,1,float16,float16,0,0.0184906671444575
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,1,128,0,1,float16,fp8,0,0.018383999665578205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,2,1,128,0,1,fp8,fp8,0,0.018768000106016796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,2,128,0,1,float16,float16,0,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,2,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,2,128,0,1,float16,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,2,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,1,128,0,1,float16,float16,0,0.016106666376193363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,1,128,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,1,128,0,1,float16,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,2,1,128,0,1,fp8,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,2,128,0,1,float16,fp8,0,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,1,128,0,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,1,128,0,1,float16,float16,0,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,2,1,128,0,1,fp8,fp8,0,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,2,128,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,2,128,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,2,128,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,1,128,0,1,float16,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,2,1,128,0,1,fp8,fp8,0,0.016682667036851246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,2,128,0,1,float16,float16,0,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,2,128,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,2,128,0,1,fp8,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,1,128,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,1,128,0,1,float16,fp8,0,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,2,1,128,0,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,2,128,0,1,float16,float16,0,0.01469333345691363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,2,128,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,2,128,0,1,fp8,fp8,0,0.016069332758585613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,1,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,1,128,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,1,128,0,1,float16,fp8,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,2,1,128,0,1,fp8,fp8,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,2,128,0,1,float16,float16,0,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,2,128,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,2,128,0,1,fp8,fp8,0,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,1,128,0,1,float16,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,2,1,128,0,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,2,1,128,0,1,float16,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,2,1,128,0,1,float16,float16,0,0.028197333216667175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,2,128,0,1,float16,float16,0,0.019626667102177937
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,32,2,1,128,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,2,128,0,1,float16,fp8,0,0.020026666422684986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,2,128,0,1,fp8,fp8,0,0.023631999890009563
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,1,128,0,1,float16,float16,0,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,1,128,0,1,float16,fp8,0,0.02021866664290428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,2,1,128,0,1,fp8,fp8,0,0.022522665560245514
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,2,128,0,1,float16,float16,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,1,128,0,1,fp8,fp8,0,0.0182239996890227
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,2,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,2,128,0,1,float16,fp8,0,0.015770666301250458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,2,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,2,128,0,1,fp8,fp8,0,0.017994667092959087
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,1,128,0,1,float16,float16,0,0.015664000064134598
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,1,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,2,1,128,0,1,float16,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,2,128,0,1,float16,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,2,128,0,1,fp8,fp8,0,0.01661866654952367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,1,128,0,1,float16,float16,0,0.014303999642531076
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,2,1,128,0,1,float16,fp8,0,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,2,128,0,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,2,128,0,1,float16,fp8,0,0.014629332969586054
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,1,128,0,1,float16,float16,0,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,1,128,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,2,1,128,0,1,fp8,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,1,128,0,1,float16,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,1,128,0,1,fp8,fp8,0,0.016496000190575916
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,2,128,0,1,float16,float16,0,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,2,128,0,1,float16,fp8,0,0.014442666123310724
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,2,128,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,2,1,128,0,1,float16,float16,0,0.014032000054915747
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,2,128,0,1,float16,float16,0,0.0145066666106383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,2,128,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,2,128,0,1,fp8,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,1,128,0,1,float16,float16,0,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,1,128,0,1,float16,fp8,0,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,2,1,128,0,1,fp8,fp8,0,0.016480000068744022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,2,128,0,1,float16,float16,0,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,2,128,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,2,128,0,1,fp8,fp8,0,0.016437333077192307
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,1,128,0,1,float16,float16,0,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,1,128,0,1,float16,fp8,0,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,2,1,128,0,1,fp8,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,2,128,0,1,float16,float16,0,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,2,128,0,1,float16,fp8,0,0.014426667243242264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,2,128,0,1,fp8,fp8,0,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,1,128,0,1,float16,float16,0,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,1,128,0,1,float16,fp8,0,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,2,1,128,0,1,fp8,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,2,1,128,0,1,float16,float16,0,0.024442667762438457
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,2,1,128,0,1,float16,fp8,0,0.024714666108290356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,256,16,2,1,128,0,1,fp8,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,2,128,0,1,float16,float16,0,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,2,128,0,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,2,128,0,1,fp8,fp8,0,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,1,128,0,1,float16,float16,0,0.018751999984184902
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,1,128,0,1,float16,fp8,0,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,2,1,128,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,2,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,2,128,0,1,float16,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,2,128,0,1,fp8,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,1,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,1,128,0,1,float16,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,2,1,128,0,1,fp8,fp8,0,0.017738666385412216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,2,128,0,1,float16,float16,0,0.013957332819700241
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,2,128,0,1,float16,fp8,0,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,2,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,2,128,0,1,fp8,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,1,128,0,1,float16,float16,0,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,1,128,0,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,2,1,128,0,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,2,128,0,1,float16,float16,0,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,2,128,0,1,fp8,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,1,128,0,1,float16,float16,0,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,1,128,0,1,float16,fp8,0,0.014661333213249842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,2,1,128,0,1,fp8,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,2,128,0,1,float16,float16,0,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,2,128,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,2,128,0,1,fp8,fp8,0,0.016490666816631954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,2,128,0,1,fp8,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,1,128,0,1,float16,float16,0,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,1,128,0,1,float16,fp8,0,0.013701333353916803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,2,1,128,0,1,fp8,fp8,0,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,2,128,0,1,float16,float16,0,0.014127999544143677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,2,128,0,1,float16,fp8,0,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,1,128,0,1,float16,float16,0,0.01441066712141037
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,1,128,0,1,float16,fp8,0,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,2,1,128,0,1,fp8,fp8,0,0.0163680004576842
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,2,128,0,1,float16,float16,0,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,2,128,0,1,float16,fp8,0,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,2,128,0,1,fp8,fp8,0,0.015935999651749928
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,1,128,0,1,float16,float16,0,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,1,128,0,1,float16,fp8,0,0.014549333602190018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,2,1,128,0,1,fp8,fp8,0,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,2,128,0,1,float16,float16,0,0.013637332866589228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,2,128,0,1,float16,fp8,0,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,2,128,0,1,fp8,fp8,0,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,1,128,0,1,float16,float16,0,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,1,128,0,1,float16,fp8,0,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,2,1,128,0,1,fp8,fp8,0,0.016021333634853363
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,float16,0,0.4864586591720581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,fp8,0,0.48719998200734455
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16384,1,1,128,0,1,fp8,fp8,0,0.34999998410542804
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,float16,0,0.2534346580505371
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,fp8,0,0.24766399463017783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,fp8,0,0.2534559965133667
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16384,1,1,128,0,1,fp8,fp8,0,0.18580265839894614
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,float16,0,0.24612800280253092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16384,1,1,128,0,1,fp8,fp8,0,0.17368000745773315
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,float16,0,0.31404266754786175
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,fp8,0,0.31572266419728595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,12288,1,1,128,0,1,fp8,fp8,0,0.23215999205907187
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,float16,0,0.1972106695175171
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,fp8,0,0.19641600052515665
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,12288,1,1,128,0,1,fp8,fp8,0,0.14079999923706055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,float16,0,0.18592000007629395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,fp8,0,0.1876586675643921
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,1,1,128,0,1,fp8,fp8,0,0.17008533080418906
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,12288,1,1,128,0,1,fp8,fp8,0,0.13362666964530945
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,float16,0,0.22898666063944498
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,1,1,128,0,1,fp8,fp8,0,0.1197706659634908
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,fp8,0,0.2303253412246704
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,float16,0,0.16380799810091654
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,fp8,0,0.1637493371963501
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,float16,0,0.1569386621316274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,fp8,0,0.15825600425402322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,10240,1,1,128,0,1,fp8,fp8,0,0.11396800478299458
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,float16,0,0.2741706569989522
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,fp8,0,0.2764959931373596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,8192,1,1,128,0,1,fp8,fp8,0,0.2102186679840088
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,float16,0,0.14362133542696634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,fp8,0,0.1456106702486674
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,8192,1,1,128,0,1,fp8,fp8,0,0.11243733763694763
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,float16,0,0.13302933176358542
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,1,1,128,0,1,fp8,fp8,0,0.0935093363126119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,fp8,0,0.13387733697891235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,8192,1,1,128,0,1,fp8,fp8,0,0.09861866633097331
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,float16,0,0.1279039978981018
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,fp8,0,0.1288320024808248
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,float16,0,0.17969600359598795
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,fp8,0,0.18363734086354574
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,6144,1,1,128,0,1,fp8,fp8,0,0.13964266578356424
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,float16,0,0.1077280044555664
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,fp8,0,0.10989333192507426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,6144,1,1,128,0,1,fp8,fp8,0,0.0839359958966573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,float16,0,0.10200533270835876
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,fp8,0,0.1027946670850118
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,6144,1,1,128,0,1,fp8,fp8,0,0.07712533573309581
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,float16,0,0.09909333785374959
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,fp8,0,0.10014933347702026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,6144,1,1,128,0,1,fp8,fp8,0,0.07396266857783
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,float16,0,0.16938666502634683
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,fp8,0,0.16870399316151938
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,4096,1,1,128,0,1,fp8,fp8,0,0.1376159985860189
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,float16,0,0.0899786651134491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,fp8,0,0.09156266848246257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,4096,1,1,128,0,1,fp8,fp8,0,0.07642666498819987
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,float16,0,0.07656533519426982
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,fp8,0,0.0777759999036789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,4096,1,1,128,0,1,fp8,fp8,0,0.06057066718737284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,float16,0,0.07252266506354015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,fp8,0,0.07301333546638489
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,4096,1,1,128,0,1,fp8,fp8,0,0.05566933254400889
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,float16,0,0.07209066549936931
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,fp8,0,0.07086400190989177
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,4096,1,1,128,0,1,fp8,fp8,0,0.054287999868392944
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,float16,0,0.11353066563606262
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,fp8,0,0.11501866579055786
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,3072,1,1,128,0,1,fp8,fp8,0,0.09556266665458679
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,float16,0,0.06590400139490764
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,fp8,0,0.06718933085600536
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,3072,1,1,128,0,1,fp8,fp8,0,0.05663466453552246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,float16,0,0.059530665477116905
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,float16,0,0.05653866628805796
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,fp8,0,0.06113066772619883
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,3072,1,1,128,0,1,fp8,fp8,0,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,float16,0,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,fp8,0,0.05771199862162272
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,3072,1,1,128,0,1,fp8,fp8,0,0.04488533238569895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,fp8,0,0.05714133381843567
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,3072,1,1,128,0,1,fp8,fp8,0,0.04330666859944662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,float16,0,0.1127893328666687
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,fp8,0,0.11547733346621196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,2048,1,1,128,0,1,fp8,fp8,0,0.10207999746004741
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,float16,0,0.06321066617965698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,fp8,0,0.06403733293215434
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,2048,1,1,128,0,1,fp8,fp8,0,0.05817066629727682
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,float16,0,0.0497920016447703
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,fp8,0,0.05009066561857859
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,2048,1,1,128,0,1,fp8,fp8,0,0.04232533276081085
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,float16,0,0.04487466812133789
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,fp8,0,0.04543999830881754
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,2048,1,1,128,0,1,fp8,fp8,0,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,float16,0,0.04266666869322459
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,fp8,0,0.04331199824810028
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,2048,1,1,128,0,1,fp8,fp8,0,0.034789333740870156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,float16,0,0.04177600145339966
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,fp8,0,0.04228800038496653
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,fp8,0,0.047082667549451195
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,2048,1,1,128,0,1,fp8,fp8,0,0.03405333310365677
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,float16,0,0.08148266871770223
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,fp8,0,0.08268799881140391
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1536,1,1,128,0,1,fp8,fp8,0,0.07489066819349925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,float16,0,0.04689066608746847
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1536,1,1,128,0,1,fp8,fp8,0,0.04230933388074239
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,float16,0,0.039162665605545044
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,fp8,0,0.04061333338419596
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1536,1,1,128,0,1,fp8,fp8,0,0.03475199888149897
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,float16,0,0.03774933268626531
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,fp8,0,0.038165333370367684
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1536,1,1,128,0,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,float16,0,0.03578133384386698
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,1,1,128,0,1,fp8,fp8,0,0.029125332832336426
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,fp8,0,0.03631466627120972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1536,1,1,128,0,1,fp8,fp8,0,0.030623999734719593
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,float16,0,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,float16,0,0.08530132969220479
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,fp8,0,0.08497599760691325
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,1024,1,1,128,0,1,fp8,fp8,0,0.0749013324578603
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,float16,0,0.04754666487375895
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,1,1,128,0,1,fp8,fp8,0,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,fp8,0,0.04673600196838379
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,1024,1,1,128,0,1,fp8,fp8,0,0.04330666859944662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,float16,0,0.034128000338872276
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,fp8,0,0.03306666761636734
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,float16,0,0.030597334106763203
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,fp8,0,0.031040000418821972
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,float16,0,0.028346667687098186
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,1024,1,1,128,0,1,fp8,fp8,0,0.026165333886941273
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,float16,0,0.02997333308060964
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,fp8,0,0.029717333614826202
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,1024,1,1,128,0,1,fp8,fp8,0,0.02475733309984207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,fp8,0,0.02796799937884013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,1024,1,1,128,0,1,fp8,fp8,0,0.02441066751877467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,float16,0,0.026911998788515728
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,fp8,0,0.027888000011444092
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,1024,1,1,128,0,1,fp8,fp8,0,0.024010665714740753
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,1,1,128,0,1,float16,float16,0,0.07498666644096375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,1,1,128,0,1,fp8,fp8,0,0.06383466720581055
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,512,1,1,128,0,1,float16,fp8,0,0.07354133327802022
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,1,1,128,0,1,float16,float16,0,0.04085333396991094
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,1,1,128,0,1,float16,fp8,0,0.04030933231115341
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,512,1,1,128,0,1,fp8,fp8,0,0.0371573343873024
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,1,1,128,0,1,float16,float16,0,0.02809600035349528
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,1,1,128,0,1,float16,fp8,0,0.02807466685771942
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,512,1,1,128,0,1,fp8,fp8,0,0.02497600018978119
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,1,1,128,0,1,float16,float16,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,1,1,128,0,1,float16,fp8,0,0.024527999262015026
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,512,1,1,128,0,1,fp8,fp8,0,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,1,1,128,0,1,float16,float16,0,0.021989333132902782
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,1,1,128,0,1,float16,fp8,0,0.022181332111358643
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,512,1,1,128,0,1,fp8,fp8,0,0.020373333245515823
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,1,1,128,0,1,float16,float16,0,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,1,1,128,0,1,float16,fp8,0,0.022240000466505688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,512,1,1,128,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,1,1,128,0,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,1,1,128,0,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,512,1,1,128,0,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,1,1,128,0,1,float16,float16,0,0.019904000063737232
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,1,1,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,512,1,1,128,0,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,1,1,128,0,1,float16,float16,0,0.038202665746212006
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,1,1,128,0,1,float16,fp8,0,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,256,1,1,128,0,1,fp8,fp8,0,0.03640533238649368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,1,1,128,0,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,1,1,128,0,1,float16,fp8,0,0.025744001070658367
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,256,1,1,128,0,1,fp8,fp8,0,0.022266666094462078
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,1,1,128,0,1,float16,float16,0,0.020666666328907013
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,1,1,128,0,1,float16,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,256,1,1,128,0,1,fp8,fp8,0,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,1,1,128,0,1,float16,float16,0,0.018735999862353008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,1,1,128,0,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,1,1,128,0,1,float16,fp8,0,0.018239999810854595
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,256,1,1,128,0,1,fp8,fp8,0,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,1,1,128,0,1,float16,float16,0,0.018351999421914417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,1,1,128,0,1,float16,fp8,0,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,256,1,1,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,1,1,128,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,256,1,1,128,0,1,fp8,fp8,0,0.01634666696190834
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,1,1,128,0,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,1,1,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,1,1,128,0,1,float16,fp8,0,0.017952000101407368
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,256,1,1,128,0,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,1,1,128,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,256,1,1,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,1,1,128,0,1,float16,float16,0,0.024453334510326385
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,1,1,128,0,1,float16,fp8,0,0.02386666586001714
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,1,1,128,0,1,float16,float16,0,0.01940800001223882
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,128,1,1,128,0,1,fp8,fp8,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,1,1,128,0,1,float16,fp8,0,0.019786667078733444
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,1,1,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,128,1,1,128,0,1,fp8,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,1,1,128,0,1,float16,float16,0,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,1,1,128,0,1,float16,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,128,1,1,128,0,1,fp8,fp8,0,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,1,1,128,0,1,float16,float16,0,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,128,1,1,128,0,1,fp8,fp8,0,0.017621333400408428
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,1,1,128,0,1,float16,float16,0,0.016352000335852306
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,1,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,128,1,1,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,1,1,128,0,1,float16,float16,0,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,1,1,128,0,1,float16,fp8,0,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,128,1,1,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,1,1,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,1,1,128,0,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,128,1,1,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,1,1,128,0,1,float16,float16,0,0.016048000504573185
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,1,1,128,0,1,float16,fp8,0,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,1,1,128,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,128,1,1,128,0,1,fp8,fp8,0,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,1,1,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,1,1,128,0,1,float16,float16,0,0.018415999909241993
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,1,1,128,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,64,1,1,128,0,1,fp8,fp8,0,0.019930666933457058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,64,1,1,128,0,1,fp8,fp8,0,0.01803733284274737
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,1,1,128,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,1,1,128,0,1,float16,float16,0,0.016186666985352833
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,1,1,128,0,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,64,1,1,128,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,1,1,128,0,1,float16,float16,0,0.014282666146755219
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,1,1,128,0,1,float16,float16,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,64,1,1,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,1,1,128,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,1,1,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,1,1,128,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,64,1,1,128,0,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,1,1,128,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,64,1,1,128,0,1,fp8,fp8,0,0.015642666568358738
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,1,1,128,0,1,float16,float16,0,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,64,1,1,128,0,1,fp8,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,1,1,128,0,1,float16,float16,0,0.014352000008026758
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,1,1,128,0,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,1,1,128,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,64,1,1,128,0,1,fp8,fp8,0,0.016037333756685257
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,1,1,128,0,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,1,1,128,0,1,float16,fp8,0,0.016682667036851246
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,32,1,1,128,0,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,1,1,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,32,1,1,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,1,1,128,0,1,float16,float16,0,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,1,1,128,0,1,float16,fp8,0,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,32,1,1,128,0,1,fp8,fp8,0,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,1,1,128,0,1,float16,float16,0,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,1,1,128,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,32,1,1,128,0,1,fp8,fp8,0,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,1,1,128,0,1,float16,float16,0,0.014373333503802618
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,1,1,128,0,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,32,1,1,128,0,1,fp8,fp8,0,0.016341333587964375
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,1,1,128,0,1,float16,float16,0,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,1,1,128,0,1,float16,fp8,0,0.014592000593741735
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,32,1,1,128,0,1,fp8,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,1,1,128,0,1,float16,float16,0,0.014405333747466406
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,1,1,128,0,1,float16,fp8,0,0.014015999933083853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,1,1,128,0,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,32,1,1,128,0,1,fp8,fp8,0,0.016490666816631954
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,1,1,128,0,1,float16,float16,0,0.013690666606028875
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,1,1,128,0,1,float16,fp8,0,0.014581333845853806
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,32,1,1,128,0,1,fp8,fp8,0,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,1,1,128,0,1,float16,float16,0,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,128,16,1,1,128,0,1,fp8,fp8,0,0.01773333301146825
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,1,1,128,0,1,float16,float16,0,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,1,1,128,0,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,1,1,128,0,1,fp8,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,64,16,1,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,1,1,128,0,1,float16,float16,0,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,1,1,128,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,32,16,1,1,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,1,1,128,0,1,float16,float16,0,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,16,16,1,1,128,0,1,float16,fp8,0,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,1,1,128,0,1,float16,float16,0,0.013557333499193192
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,1,1,128,0,1,float16,float16,0,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,1,1,128,0,1,float16,fp8,0,0.013647999614477158
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,8,16,1,1,128,0,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,1,1,128,0,1,float16,float16,0,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,1,1,128,0,1,float16,fp8,0,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,4,16,1,1,128,0,1,fp8,fp8,0,0.016538667182127636
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,1,1,128,0,1,float16,fp8,0,0.014335999886194864
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,2,16,1,1,128,0,1,fp8,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,1,1,128,0,1,float16,float16,0,0.014362666755914688
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,1,1,128,0,1,float16,fp8,0,0.013466666142145792
TRTLLM,1.2.0rc5,NVIDIA H200,context_attention,torch_flow,1,16,1,1,128,0,1,fp8,fp8,0,0.016085332880417507
