framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,16,1,0,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,16,2,0,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,16,4,0,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,16,8,0,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,16,16,0,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,16,32,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,16,64,0,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,16,128,0,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,16,1,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,16,2,0,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,16,4,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,16,8,0,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,16,16,0,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,16,32,0,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,16,64,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,16,128,0,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,32,1,0,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,32,2,0,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,32,4,0,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,32,8,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,32,16,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,32,32,0,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,32,64,0,0.015274666249752045
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,32,128,0,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,32,1,0,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,32,2,0,0.01945066700379054
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,32,4,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,32,8,0,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,32,16,0,0.019589333484570186
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,32,32,0,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,32,64,0,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,32,128,0,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,64,1,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,64,2,0,0.015637333194414776
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,64,4,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,64,8,0,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,64,128,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,64,1,0,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,64,2,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,64,4,0,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,64,64,0,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,64,32,0,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,64,16,0,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,64,8,0,0.02250133454799652
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,64,128,0,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,64,16,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,128,1,0,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,64,64,0,0.019567999988794327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,64,32,0,0.019567999988794327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,128,2,0,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,128,4,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,128,8,0,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,128,16,0,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,128,32,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,128,64,0,0.015791999797026317
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,128,128,0,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,128,1,0,0.02571200082699458
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,128,2,0,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,128,4,0,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,128,8,0,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,128,16,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,128,64,0,0.021456000705560047
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,128,128,0,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,256,1,0,0.023578666150569916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,256,2,0,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,256,4,0,0.019754666835069656
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,256,16,0,0.019466667125622433
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,256,32,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,256,64,0,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,256,128,0,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,256,1,0,0.03173866619666418
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,256,2,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,256,4,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,256,8,0,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,256,16,0,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,256,32,0,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,256,64,0,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,256,128,0,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,512,1,0,0.042223999897638954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,512,2,0,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,512,4,0,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,512,8,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,512,16,0,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,512,32,0,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,512,64,0,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,512,128,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,512,1,0,0.062224000692367554
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,512,2,0,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,512,4,0,0.02962133288383484
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,512,8,0,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,512,16,0,0.027744000156720478
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,512,32,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,512,64,0,0.027786667148272198
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,512,128,0,0.025573333104451496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,1024,1,0,0.08686932921409607
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,1024,2,0,0.054117331902186074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,1024,4,0,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,1024,8,0,0.030981334547201794
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,1024,16,0,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,1024,32,0,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,1024,64,0,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,1024,128,0,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,1024,1,0,0.11570666233698527
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,1024,2,0,0.07108266651630402
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,1024,4,0,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,1024,8,0,0.035690667728583016
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,1024,16,0,0.034688000877698265
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,1024,32,0,0.03380800038576126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,1024,64,0,0.03257599969704946
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,1024,128,0,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,1536,1,0,0.14100799957911173
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,1536,2,0,0.08777599533398946
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,1536,4,0,0.05403733253479004
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,1536,8,0,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,1536,16,0,0.03790933390458425
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,1536,32,0,0.03626133253177007
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,1536,64,0,0.03602666656176249
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,1536,128,0,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,1536,1,0,0.1799359917640686
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,1536,2,0,0.10780800382296245
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,1536,4,0,0.06465599934260051
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,1536,8,0,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,1536,16,0,0.040405333042144775
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,1536,32,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,1536,64,0,0.03779733429352442
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,1536,128,0,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,2048,1,0,0.21074666579564413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,2048,2,0,0.12494933605194092
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,2048,4,0,0.07667199770609538
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,2048,8,0,0.04693866769472758
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,2048,16,0,0.04409066836039225
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,2048,32,0,0.04254400233427683
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,2048,64,0,0.041893333196640015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,2048,128,0,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,2048,1,0,0.24849599599838257
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,2048,2,0,0.1439253290494283
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,2048,4,0,0.08935999870300293
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,2048,8,0,0.05040533343950907
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,2048,16,0,0.047594666481018066
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,2048,32,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,2048,64,0,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,2048,128,0,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,3072,1,0,0.38661332925160724
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,3072,2,0,0.21634133656819662
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,3072,4,0,0.12628266215324402
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,3072,8,0,0.08085333307584126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,3072,16,0,0.05834666887919108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,3072,32,0,0.056202664971351624
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,3072,64,0,0.05638400216897329
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,3072,128,0,0.054645334680875145
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,3072,2,0,0.23512534300486246
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,3072,1,0,0.4169333378473918
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,3072,8,0,0.08341333270072937
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,3072,4,0,0.1399733324845632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,3072,16,0,0.05845866600672404
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,3072,32,0,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,3072,64,0,0.05555200080076853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,3072,128,0,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,4096,2,0,0.33553067843119305
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,4096,1,0,0.6100800037384033
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,4096,4,0,0.19638399283091226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,4096,8,0,0.12463999787966411
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,4096,16,0,0.07308800021807353
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,4096,32,0,0.07083733379840851
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,4096,64,0,0.06824000179767609
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,4096,128,0,0.06821333368619283
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,4096,1,0,0.6251360177993774
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,4096,2,0,0.34042131900787354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,4096,4,0,0.19495999813079834
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,4096,8,0,0.12281599640846252
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,4096,16,0,0.07450133562088013
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,4096,32,0,0.06884799897670746
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,4096,64,0,0.06646400193373363
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,4096,128,0,0.0665280024210612
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,6144,2,0,0.6420586506525675
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,6144,1,0,1.1689279874165852
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,6144,8,0,0.21925334135691324
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,6144,4,0,0.3468639850616455
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,6144,16,0,0.12949867049853006
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,6144,32,0,0.09717866778373718
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,6144,64,0,0.09781333804130554
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,6144,1,0,1.1305973529815674
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,6144,2,0,0.5976426601409912
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,6144,4,0,0.33461864789326984
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,6144,8,0,0.20177600781122842
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,6144,16,0,0.12386133273442586
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,6144,32,0,0.09100799759229024
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,6144,64,0,0.08917867143948872
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,6144,128,0,0.08699733018875122
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,8192,1,0,1.8819626172383626
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,8192,2,0,1.0267253716786702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,8192,4,0,0.5525866746902466
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,8192,8,0,0.32054932912190753
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,8192,16,0,0.20404267311096191
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,8192,32,0,0.1255466639995575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,8192,64,0,0.11987732847531636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,8192,128,0,0.11954666177431743
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,8192,1,0,1.7596960067749023
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,8192,2,0,0.93941330909729
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,8192,4,0,0.5042986472447714
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,8192,8,0,0.294597327709198
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,8192,16,0,0.18881599108378092
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,8192,32,0,0.11660800377527873
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,8192,64,0,0.11195199688275655
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,8192,128,0,0.10916800300280254
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,10240,1,0,2.830693244934082
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,10240,2,0,1.6255040168762207
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,10240,4,0,0.7874560356140137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,10240,8,0,0.5053439935048422
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,10240,16,0,0.2877066731452942
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,10240,32,0,0.170522669951121
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,10240,64,0,0.14614933729171753
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,10240,128,0,0.14430399735768637
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,10240,1,0,2.541269302368164
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,10240,2,0,1.3234399954477947
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,10240,4,0,0.726645310719808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,10240,8,0,0.406389315923055
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,10240,16,0,0.25403199593226117
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,10240,32,0,0.1548799971739451
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,10240,64,0,0.13409599661827087
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,10240,128,0,0.13005866607030234
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,12288,1,0,3.9133707682291665
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,12288,2,0,2.0300000508626304
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,12288,4,0,1.0872106552124023
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,12288,8,0,0.6143893400828043
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,12288,16,0,0.3784853219985962
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,12288,32,0,0.23739200830459595
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,12288,64,0,0.18749332427978516
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,12288,128,0,0.17249600092569986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,12288,1,0,3.4940481185913086
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,12288,2,0,1.7889599800109863
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,12288,4,0,0.9588800271352133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,12288,8,0,0.5347626606623331
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,12288,16,0,0.3334133227666219
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,12288,32,0,0.20701332887013754
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,12288,64,0,0.15869866808255514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,12288,128,0,0.15384533007939658
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,1,16384,1,0,7.493749618530273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,1,16384,2,0,3.7536532084147134
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,1,16384,4,0,1.857642650604248
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,16384,8,0,1.0260852972666423
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,1,16384,16,0,0.5633973280588785
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,1,16384,32,0,0.35952532291412354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,1,16384,64,0,0.25437333186467487
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,16384,128,0,0.22406399250030518
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,1,16384,1,0,5.827760060628255
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,1,16384,2,0,2.9437761306762695
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,1,16384,4,0,1.656709353129069
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,1,16384,8,0,0.8398986657460531
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,1,16384,16,0,0.4930613438288371
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,16384,32,0,0.31617067257563275
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,1,16384,64,0,0.20567999283472696
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,1,16384,128,0,0.1971893310546875
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,16,1,0,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,16,2,0,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,16,4,0,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,16,8,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,16,16,0,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,16,32,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,16,64,0,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,16,128,0,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,16,1,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,16,2,0,0.020101333657900494
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,16,4,0,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,16,8,0,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,16,16,0,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,16,32,0,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,16,64,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,16,128,0,0.019359999646743137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,32,1,0,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,32,2,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,32,4,0,0.01552533358335495
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,32,8,0,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,32,16,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,32,32,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,32,64,0,0.01575999955336253
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,32,128,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,32,1,0,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,32,2,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,32,4,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,32,8,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,32,16,0,0.020288000504175823
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,32,32,0,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,32,64,0,0.019424000134070713
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,32,128,0,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,64,1,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,64,2,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,64,4,0,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,64,8,0,0.01544533297419548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,64,16,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,64,32,0,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,64,64,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,64,128,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,64,1,0,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,64,2,0,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,64,4,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,64,8,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,64,16,0,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,64,32,0,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,64,64,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,64,128,0,0.02176533391078313
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,128,1,0,0.025888000925381977
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,128,2,0,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,128,4,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,128,8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,128,16,0,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,128,32,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,128,64,0,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,128,128,0,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,128,1,0,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,128,2,0,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,128,4,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,128,8,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,128,16,0,0.02292266736427943
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,128,32,0,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,128,64,0,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,128,128,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,256,1,0,0.034602666894594826
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,256,2,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,256,4,0,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,256,8,0,0.02075733368595441
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,256,16,0,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,256,32,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,256,64,0,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,256,128,0,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,256,1,0,0.05633600056171417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,256,2,0,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,256,4,0,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,256,8,0,0.025941332181294758
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,256,16,0,0.02571200082699458
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,256,32,0,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,256,64,0,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,256,128,0,0.02327466756105423
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,512,1,0,0.06845866640408833
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,512,2,0,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,512,4,0,0.026869334280490875
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,512,8,0,0.023765332996845245
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,512,16,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,512,32,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,512,64,0,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,512,128,0,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,512,1,0,0.10346666971842448
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,512,2,0,0.06205333272616068
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,512,4,0,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,512,8,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,512,16,0,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,512,32,0,0.0276053324341774
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,512,64,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,512,128,0,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,1024,1,0,0.14622400204340616
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,1024,2,0,0.08718933661778767
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,1024,4,0,0.05426666637261709
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,1024,8,0,0.03373866776625315
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,1024,16,0,0.031248000760873158
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,1024,32,0,0.03035199890534083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,1024,64,0,0.029813334345817566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,1024,128,0,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,1024,1,0,0.19974400599797568
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,1024,2,0,0.11541333794593811
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,1024,4,0,0.07183466851711273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,1024,8,0,0.03977066775163015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,1024,16,0,0.03531199942032496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,1024,32,0,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,1024,64,0,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,1024,128,0,0.033674667278925575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,1536,1,0,0.2528746724128723
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,1536,2,0,0.1402186652024587
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,1536,4,0,0.0849226713180542
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,1536,8,0,0.05389333268006643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,1536,16,0,0.039146666725476585
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,1536,32,0,0.03746666759252548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,1536,64,0,0.03763733307520548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,1536,128,0,0.03575466573238373
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,1536,1,0,0.3206186691919963
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,1536,2,0,0.17949867248535156
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,1536,4,0,0.10924266775449117
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,1536,8,0,0.06623466809590657
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,1536,16,0,0.041936000188191734
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,1536,32,0,0.03957866628964742
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,1536,64,0,0.03753600021203359
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,1536,128,0,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,2048,1,0,0.38205333550771076
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,2048,2,0,0.2079413334528605
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,2048,4,0,0.12235732873280843
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,2048,8,0,0.07707733412583669
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,2048,16,0,0.04637866715590159
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,2048,32,0,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,2048,64,0,0.04391466577847799
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,2048,128,0,0.04353066782156626
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,2048,1,0,0.45420801639556885
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,2048,2,0,0.24841066201527914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,2048,4,0,0.14444266756375632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,2048,8,0,0.09044266740481059
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,2048,16,0,0.05198400219281515
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,2048,32,0,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,2048,64,0,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,2048,128,0,0.04385599990685781
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,3072,1,0,0.7141706943511963
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,3072,2,0,0.37646933396657306
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,3072,4,0,0.21213332811991373
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,3072,8,0,0.1306986709435781
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,3072,16,0,0.0814933329820633
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,3072,32,0,0.058320000767707825
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,3072,64,0,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,3072,128,0,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,3072,1,0,0.7819573084513346
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,3072,2,0,0.41538135210673016
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,3072,4,0,0.23285333315531412
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,3072,8,0,0.14012799660364786
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,3072,16,0,0.08629866441090901
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,3072,32,0,0.060266668597857155
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,3072,64,0,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,3072,128,0,0.05593066910902659
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,4096,1,0,1.1496213277180989
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,4096,2,0,0.5941706498463949
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,4096,4,0,0.3258026639620463
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,4096,8,0,0.1917333404223124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,4096,16,0,0.12356799840927124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,4096,32,0,0.07513066629568736
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,4096,64,0,0.07032000025113423
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,4096,128,0,0.06984533369541168
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,4096,1,0,1.1980640093485515
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,4096,2,0,0.6234986782073975
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,4096,4,0,0.33931199709574383
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,4096,8,0,0.19513066609700522
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,4096,16,0,0.12353600064913432
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,4096,32,0,0.07406933108965556
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,4096,64,0,0.06877866884072621
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,4096,128,0,0.06744533280531566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,6144,1,0,2.226191997528076
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,6144,2,0,1.2227413654327393
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,6144,4,0,0.6715306440989176
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,6144,8,0,0.37236801783243817
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,6144,16,0,0.21609065930048624
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,6144,32,0,0.1349066694577535
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,6144,64,0,0.09883200128873189
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,6144,128,0,0.0950986643632253
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,6144,1,0,2.17905600865682
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,6144,2,0,1.1465013027191162
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,6144,4,0,0.5976639986038208
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,6144,8,0,0.3428639968236287
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,6144,16,0,0.20465600490570068
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,6144,32,0,0.130021333694458
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,6144,64,0,0.09302933017412822
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,6144,128,0,0.09091200431187947
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,8192,1,0,4.205930709838867
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,8192,2,0,1.9084587097167969
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,8192,4,0,0.9971360365549723
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,8192,8,0,0.5473866860071818
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,8192,16,0,0.34043200810750324
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,8192,32,0,0.2071146567662557
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,8192,64,0,0.14099733034769693
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,8192,128,0,0.12352533141771953
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,8192,1,0,3.483973185221354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,8192,2,0,1.8088000615437825
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,8192,4,0,0.9265226523081461
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,8192,8,0,0.5085013310114542
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,8192,16,0,0.30481600761413574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,8192,32,0,0.19284266233444214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,8192,64,0,0.1204906702041626
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,8192,128,0,0.11367467045783997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,10240,1,0,6.531786600748698
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,10240,2,0,2.8974666595458984
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,10240,4,0,1.6301013628641765
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,10240,8,0,0.8093760013580322
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,10240,16,0,0.4512853225072225
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,10240,32,0,0.2842506567637126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,10240,64,0,0.18069332838058472
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,10240,128,0,0.16146133343378702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,10240,1,0,5.102021217346191
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,10240,2,0,2.5521119435628257
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,10240,4,0,1.325984001159668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,10240,8,0,0.7135413487752279
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,10240,16,0,0.40961066881815594
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,10240,32,0,0.2569119930267334
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,10240,64,0,0.16005333264668783
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,10240,128,0,0.14331199725468954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,12288,1,0,9.516799926757812
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,12288,2,0,4.648181279500325
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,12288,4,0,2.406383991241455
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,12288,8,0,1.1094666322072346
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,12288,16,0,0.6104053258895874
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,12288,32,0,0.3677866856257121
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,12288,64,0,0.23450666666030884
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,12288,128,0,0.20153599977493286
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,12288,1,0,7.400512059529622
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,12288,2,0,3.4723733266194663
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,12288,4,0,1.8900960286458333
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,12288,8,0,0.9572266737620035
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,12288,16,0,0.5393333435058594
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,12288,32,0,0.3301066756248474
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,12288,64,0,0.2141866683959961
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,12288,128,0,0.16499732931454977
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,2,16384,1,0,16.301306406656902
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,2,16384,2,0,7.807423909505208
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,2,16384,4,0,3.6770025889078775
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,2,16384,8,0,1.7793280283610027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,2,16384,16,0,1.152400016784668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,2,16384,32,0,0.6395039955774943
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,2,16384,64,0,0.40455468495686847
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,2,16384,128,0,0.27341334025065106
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,2,16384,1,0,11.942474365234375
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,2,16384,2,0,6.011434555053711
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,2,16384,4,0,3.0571254094441733
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,2,16384,8,0,1.5975039800008137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,2,16384,16,0,0.8503519694010416
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,2,16384,32,0,0.49677332242329914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,2,16384,64,0,0.3213599920272827
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,2,16384,128,0,0.21619200706481934
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,16,1,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,16,2,0,0.019760000209013622
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,16,4,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,16,8,0,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,16,16,0,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,16,32,0,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,16,64,0,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,16,128,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,16,1,0,0.0317546675602595
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,16,2,0,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,16,4,0,0.021482666333516438
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,16,8,0,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,16,16,0,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,16,32,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,16,64,0,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,16,128,0,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,32,1,0,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,32,2,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,32,4,0,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,32,8,0,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,32,16,0,0.016629333297411602
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,32,32,0,0.01545599972208341
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,32,64,0,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,32,128,0,0.015386667102575302
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,32,1,0,0.03173866619666418
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,32,2,0,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,32,4,0,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,32,8,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,32,16,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,32,32,0,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,32,64,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,32,128,0,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,64,1,0,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,64,2,0,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,64,4,0,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,64,8,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,64,16,0,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,64,32,0,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,64,64,0,0.01563199982047081
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,64,128,0,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,64,1,0,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,64,2,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,64,4,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,64,8,0,0.020768000433842342
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,64,16,0,0.021429332594076794
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,64,32,0,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,64,64,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,64,128,0,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,128,1,0,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,128,2,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,128,4,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,128,8,0,0.017450666675964992
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,128,16,0,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,128,32,0,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,128,64,0,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,128,128,0,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,128,1,0,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,128,2,0,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,128,4,0,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,128,8,0,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,128,16,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,128,32,0,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,128,64,0,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,128,128,0,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,256,1,0,0.06122666597366333
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,256,2,0,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,256,4,0,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,256,8,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,256,16,0,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,256,32,0,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,256,64,0,0.02072000006834666
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,256,128,0,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,256,1,0,0.09529067079226176
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,256,2,0,0.05782400071620941
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,256,4,0,0.031397332747777305
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,256,8,0,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,256,16,0,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,256,32,0,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,256,64,0,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,256,128,0,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,512,1,0,0.11321066816647847
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,512,2,0,0.06637333333492279
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,512,4,0,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,512,8,0,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,512,16,0,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,512,32,0,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,512,64,0,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,512,128,0,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,512,1,0,0.1757919987042745
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,512,2,0,0.10333333412806193
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,512,4,0,0.0641653339068095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,512,8,0,0.03382399926582972
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,512,16,0,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,512,32,0,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,512,64,0,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,512,128,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,1024,1,0,0.2632586757342021
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,1024,2,0,0.14415466785430908
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,1024,4,0,0.08495466907819112
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,1024,8,0,0.05468800167242686
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,1024,16,0,0.03374933451414108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,1024,32,0,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,1024,64,0,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,1024,128,0,0.029663999875386555
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,1024,1,0,0.3677653471628825
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,1024,2,0,0.2013066609700521
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,1024,4,0,0.11692800124486287
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,1024,8,0,0.0726986676454544
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,1024,16,0,0.03982399900754293
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,1024,32,0,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,1024,64,0,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,1024,128,0,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,1536,1,0,0.46350399653116864
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,1536,2,0,0.24895467360814413
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,1536,4,0,0.1395786702632904
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,1536,8,0,0.08539199829101562
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,1536,16,0,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,1536,32,0,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,1536,64,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,1536,128,0,0.037632000943024956
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,1536,1,0,0.6013866662979126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,1536,2,0,0.32052799065907794
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,1536,4,0,0.1792479952176412
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,1536,8,0,0.10937600334485371
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,1536,16,0,0.065610667069753
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,1536,32,0,0.04177600145339966
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,1536,64,0,0.03984533250331879
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,1536,128,0,0.03976533313592275
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,2048,1,0,0.7147093613942465
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,2048,2,0,0.3781013488769531
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,2048,4,0,0.20788800716400146
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,2048,8,0,0.12237866719563802
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,2048,16,0,0.07898133496443431
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,2048,32,0,0.04781866570313772
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,2048,64,0,0.04403733213742574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,2048,128,0,0.04593066871166229
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,2048,1,0,0.8716213703155518
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,2048,2,0,0.45657066504160565
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,2048,4,0,0.24891199668248495
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,2048,8,0,0.14402666687965393
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,2048,16,0,0.09173333644866943
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,2048,32,0,0.05243733525276184
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,2048,64,0,0.046165332198143005
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,2048,128,0,0.044394666949907936
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,3072,1,0,1.3406079610188801
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,3072,2,0,0.7222826480865479
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,3072,4,0,0.3797973394393921
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,3072,8,0,0.21040000518163046
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,3072,16,0,0.13432000080744425
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,3072,32,0,0.08475733796755473
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,3072,64,0,0.06029333174228668
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,3072,128,0,0.058149332801500954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,3072,1,0,1.5243412653605144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,3072,2,0,0.7844213644663492
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,3072,4,0,0.4172319968541463
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,3072,8,0,0.23375467459360758
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,3072,16,0,0.14317867159843445
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,3072,32,0,0.09114133318265279
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,3072,64,0,0.06233599781990051
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,3072,128,0,0.058176000912984215
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,4096,1,0,2.1740266482035318
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,4096,2,0,1.1925013065338135
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,4096,4,0,0.5974826812744141
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,4096,8,0,0.3534453312555949
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,4096,16,0,0.19432532787322998
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,4096,32,0,0.1264959971110026
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,4096,64,0,0.0843946635723114
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,4096,128,0,0.07387733459472656
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,4096,1,0,2.359920024871826
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,4096,2,0,1.203210671742757
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,4096,4,0,0.6270879904429117
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,4096,8,0,0.34114134311676025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,4096,16,0,0.2020533283551534
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,4096,32,0,0.127893328666687
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,4096,64,0,0.07838400204976399
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,4096,128,0,0.07105599840482076
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,6144,1,0,5.013536135355632
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,6144,2,0,2.6307679812113443
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,6144,4,0,1.3207626342773438
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,6144,8,0,0.6403946479161581
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,6144,16,0,0.3674986759821574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,6144,32,0,0.22295467058817545
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,6144,64,0,0.13920000195503235
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,6144,128,0,0.11397866408030193
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,6144,1,0,4.324789365132649
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,6144,2,0,2.201456069946289
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,6144,4,0,1.1344799995422363
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,6144,8,0,0.6039520104726156
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,6144,16,0,0.34144532680511475
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,6144,32,0,0.21418132384618124
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,6144,64,0,0.14060266812642416
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,6144,128,0,0.09921066959698994
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,4,8192,1,0,8.496965408325195
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,4,8192,2,0,3.724085489908854
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,4,8192,4,0,1.9215413729349773
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,4,8192,8,0,1.1360586484273274
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,4,8192,16,0,0.5737919807434082
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,4,8192,32,0,0.33453865845998126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,4,8192,64,0,0.21283199389775595
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,4,8192,128,0,0.1546986699104309
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,4,8192,1,0,7.000352223714192
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,4,8192,2,0,3.466298739115397
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,4,8192,4,0,1.7758132616678874
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,4,8192,8,0,0.9298186302185059
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,4,8192,16,0,0.5129439830780029
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,4,8192,32,0,0.30243732531865436
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,4,8192,64,0,0.19800533850987753
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,4,8192,128,0,0.13145599762598673
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,16,1,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,16,2,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,16,4,0,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,16,8,0,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,16,16,0,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,16,32,0,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,16,64,0,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,16,128,0,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,16,1,0,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,16,2,0,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,16,4,0,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,16,8,0,0.0229066660006841
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,16,16,0,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,16,32,0,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,16,64,0,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,16,128,0,0.020154666155576706
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,32,1,0,0.03782399992148081
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,32,2,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,32,4,0,0.020448000480731327
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,32,8,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,32,16,0,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,32,32,0,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,32,64,0,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,32,128,0,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,32,1,0,0.04398400088151296
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,32,2,0,0.03186666717131933
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,32,4,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,32,8,0,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,32,16,0,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,32,32,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,32,64,0,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,32,128,0,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,64,1,0,0.045312002301216125
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,64,2,0,0.029765332738558452
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,64,4,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,64,8,0,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,64,16,0,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,64,32,0,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,64,64,0,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,64,128,0,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,64,1,0,0.06630933284759521
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,64,2,0,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,64,4,0,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,64,8,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,64,16,0,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,64,32,0,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,64,64,0,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,64,128,0,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,128,1,0,0.05861866474151611
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,128,2,0,0.03579733272393545
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,128,4,0,0.024197332561016083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,128,8,0,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,128,16,0,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,128,32,0,0.017360000560681026
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,128,64,0,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,128,128,0,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,128,1,0,0.09735467036565144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,128,2,0,0.06043200194835663
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,128,4,0,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,128,8,0,0.0245919997493426
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,128,16,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,128,32,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,128,64,0,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,128,128,0,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,256,1,0,0.1030613382657369
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,256,2,0,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,256,4,0,0.03384000062942505
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,256,8,0,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,256,16,0,0.02143466720978419
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,256,32,0,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,256,64,0,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,256,128,0,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,256,1,0,0.16381866733233133
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,256,2,0,0.09664533535639445
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,256,4,0,0.05625600119431814
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,256,8,0,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,256,16,0,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,256,32,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,256,64,0,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,256,128,0,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,512,1,0,0.208186666170756
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,512,2,0,0.11544000109036763
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,512,4,0,0.06889066596825917
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,512,8,0,0.04208533465862274
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,512,16,0,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,512,32,0,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,512,64,0,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,512,128,0,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,512,1,0,0.32414400577545166
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,512,2,0,0.17714667320251465
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,512,4,0,0.10172800223032634
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,512,8,0,0.06410666803518932
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,512,16,0,0.033733333150545754
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,512,32,0,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,512,64,0,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,512,128,0,0.02902399996916453
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,1024,1,0,0.5006826718648275
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,1024,2,0,0.2657439907391866
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,1024,4,0,0.14685866236686707
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,1024,8,0,0.08777067065238953
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,1024,16,0,0.05749333401521047
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,1024,32,0,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,1024,64,0,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,1024,128,0,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,1024,1,0,0.7111519972483317
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,1024,2,0,0.3732159932454427
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,1024,4,0,0.20388267437616983
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,1024,8,0,0.11814399560292561
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,1024,16,0,0.07473599910736084
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,1024,32,0,0.03984533250331879
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,1024,64,0,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,1024,128,0,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,1536,1,0,0.8981333573659261
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,1536,2,0,0.46857066949208576
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,1536,4,0,0.2520959973335266
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,1536,8,0,0.14225600163141885
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,1536,16,0,0.08889066179593404
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,1536,32,0,0.059877331058184304
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,1536,64,0,0.04180799921353658
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,1536,128,0,0.03937600056330363
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,1536,1,0,1.175978660583496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,1536,2,0,0.6043999989827474
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,1536,4,0,0.32255999247233075
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,1536,8,0,0.1808906594912211
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,1536,16,0,0.11340799927711487
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,1536,32,0,0.07252266506354015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,1536,64,0,0.0452159990866979
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,1536,128,0,0.041877334316571556
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,2048,1,0,1.3725333213806152
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,2048,2,0,0.7382826805114746
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,2048,4,0,0.3806026776631673
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,2048,8,0,0.21076265970865884
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,2048,16,0,0.1260533332824707
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,2048,32,0,0.08412800232569377
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,2048,64,0,0.05406400064627329
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,2048,128,0,0.04807466765244802
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,2048,1,0,1.7164427439371746
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,2048,2,0,0.8797120253245035
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,2048,4,0,0.46661333243052167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,2048,8,0,0.2501866618792216
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,2048,16,0,0.14843733112017313
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,2048,32,0,0.09531199932098389
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,2048,64,0,0.05612266560395559
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,2048,128,0,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,3072,1,0,2.8252960840861
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,3072,2,0,1.3762186368306477
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,3072,4,0,0.7069386641184489
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,3072,8,0,0.38050134976704914
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,3072,16,0,0.21895466248194376
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,3072,32,0,0.13886933525403342
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,3072,64,0,0.09227200349171956
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,3072,128,0,0.0721973329782486
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,3072,1,0,3.012981414794922
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,3072,2,0,1.5298506418863933
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,3072,4,0,0.7937493324279785
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,3072,8,0,0.42178134123484295
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,3072,16,0,0.23994133869806925
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,3072,32,0,0.15090133746465048
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,3072,64,0,0.09691733121871948
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,3072,128,0,0.06658133367697398
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,8,4096,1,0,4.558805465698242
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,8,4096,2,0,2.247999986012777
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,8,4096,4,0,1.249344031016032
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,8,4096,8,0,0.626421332359314
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,8,4096,16,0,0.33353598912556964
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,8,4096,32,0,0.19986667235692343
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,8,4096,64,0,0.13583466410636902
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,8,4096,128,0,0.09489599863688152
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,8,4096,1,0,4.682640075683594
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,8,4096,2,0,2.363114674886068
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,8,4096,4,0,1.2085973421732585
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,8,4096,8,0,0.6313386758168539
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,8,4096,16,0,0.34865065415700275
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,8,4096,32,0,0.2055893341700236
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,8,4096,64,0,0.13431466619173685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,8,4096,128,0,0.09110933542251587
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,16,16,1,0,0.0576800008614858
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,16,16,2,0,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,16,16,4,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,16,16,8,0,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,16,16,16,0,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,16,16,32,0,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,16,16,64,0,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,16,16,128,0,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,16,16,1,0,0.0635040005048116
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,16,16,2,0,0.04158399999141693
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,16,16,4,0,0.031301334500312805
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,16,16,8,0,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,16,16,16,0,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,16,16,32,0,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,16,16,64,0,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,16,16,128,0,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,16,32,1,0,0.064560001095136
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,16,32,2,0,0.037615999579429626
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,16,32,4,0,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,16,32,8,0,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,16,32,16,0,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,16,32,32,0,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,16,32,64,0,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,16,32,128,0,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,16,32,1,0,0.08258133133252461
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,16,32,2,0,0.045968001087506614
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,16,32,4,0,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,16,32,8,0,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,16,32,16,0,0.02149333308140437
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,16,32,32,0,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,16,32,64,0,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,16,32,128,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,16,64,1,0,0.07625600198904674
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,16,64,2,0,0.04518933097521464
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,16,64,4,0,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,16,64,8,0,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,16,64,16,0,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,16,64,32,0,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,16,64,64,0,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,16,64,128,0,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,16,64,1,0,0.11573867003122966
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,16,64,2,0,0.06831466654936473
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,16,64,4,0,0.03736533224582672
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,16,64,8,0,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,16,64,16,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,16,64,32,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,16,64,64,0,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,16,64,128,0,0.021642667551835377
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,16,128,1,0,0.10333866874376933
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,16,128,2,0,0.05851200222969055
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,16,128,4,0,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,16,128,8,0,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,16,128,16,0,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,16,128,32,0,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,16,128,64,0,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,16,128,128,0,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,16,128,1,0,0.1723466714223226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,16,128,2,0,0.09759466846783955
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,16,128,4,0,0.060175999999046326
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,16,128,8,0,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,16,128,16,0,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,16,128,32,0,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,16,128,64,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,16,128,128,0,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,16,256,1,0,0.18618667125701904
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,16,256,2,0,0.10354666908582051
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,16,256,4,0,0.06235733131567637
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,16,256,8,0,0.03570133447647095
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,16,256,16,0,0.023984000086784363
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,16,256,32,0,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,16,256,64,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,16,256,128,0,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,16,256,1,0,0.3020266691843669
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,16,256,2,0,0.1646560033162435
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,16,256,4,0,0.09735467036565144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,16,256,8,0,0.05835733314355215
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,16,256,16,0,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,16,256,32,0,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,16,256,64,0,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,16,256,128,0,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,16,512,1,0,0.3937973181406657
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,16,512,2,0,0.20918933550516763
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,16,512,4,0,0.1157973309357961
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,16,512,8,0,0.07010133564472198
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,16,512,16,0,0.045114666223526
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,16,512,32,0,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,16,512,64,0,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,16,512,128,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,16,512,1,0,0.6274240016937256
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,16,512,2,0,0.3294293284416199
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,16,512,4,0,0.17864533265431723
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,16,512,8,0,0.10314133763313293
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,16,512,16,0,0.06648533542950948
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,16,512,32,0,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,16,512,64,0,0.02997333308060964
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,16,512,128,0,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,16,1024,1,0,0.9776159922281901
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,16,1024,2,0,0.5080480178197225
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,16,1024,4,0,0.2709653377532959
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,16,1024,8,0,0.15037332971890768
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,16,1024,16,0,0.09116266171137492
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,16,1024,32,0,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,16,1024,64,0,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,16,1024,128,0,0.035877334574858345
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,16,1024,1,0,1.3989493052164714
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,16,1024,2,0,0.7167466481526693
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,16,1024,4,0,0.37401068210601807
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,1,128,32,0,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,16,1024,8,0,0.20594133933385214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,16,1024,16,0,0.12017599741617839
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,16,1024,32,0,0.07885866860548656
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,16,1024,64,0,0.04804266492525736
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,16,1024,128,0,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,16,1536,1,0,1.741967995961507
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,16,1536,2,0,0.905669371287028
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,16,1536,4,0,0.5039680004119873
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,16,1536,8,0,0.2672160069147746
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,16,1536,16,0,0.17109866937001547
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,16,1536,32,0,0.09514666597048442
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,16,1536,64,0,0.07694399853547414
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,16,1536,128,0,0.049866666396458946
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,16,1536,2,0,1.1844853560129802
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,16,1536,1,0,2.339914639790853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,16,1536,4,0,0.6120160023371378
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,16,1536,8,0,0.3280799984931946
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,16,1536,16,0,0.18572266896565756
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,16,1536,32,0,0.11767466862996419
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,16,1536,64,0,0.07870933413505554
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,16,1536,128,0,0.056618665655454
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,16,2048,2,0,1.3878560066223145
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,16,2048,1,0,2.7578932444254556
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,16,2048,4,0,0.7832000255584717
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,16,2048,8,0,0.40718400478363037
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,16,2048,16,0,0.23946134249369302
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,16,2048,32,0,0.13446399569511414
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,16,2048,64,0,0.09332266449928284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,16,2048,128,0,0.06548266609509786
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,16,2048,2,0,1.7279094060262044
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,16,2048,1,0,3.420111974080404
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,16,2048,4,0,0.8846080303192139
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,16,2048,8,0,0.4631679852803548
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,16,2048,16,0,0.25833600759506226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,16,2048,32,0,0.15262933572133383
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,16,2048,64,0,0.10127466917037964
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,16,2048,128,0,0.06828799843788147
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,32,16,1,0,0.10325866937637329
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,32,16,2,0,0.058431997895240784
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,32,16,4,0,0.035829332967599235
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,32,16,8,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,32,16,16,0,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,32,16,32,0,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,32,16,64,0,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,32,16,128,0,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,32,16,1,0,0.12153066198031108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,32,16,2,0,0.06434133152167003
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,32,16,4,0,0.04005866746107737
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,32,16,8,0,0.03179733455181122
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,32,16,16,0,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,32,16,32,0,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,32,16,64,0,0.021738665799299877
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,32,16,128,0,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,32,32,1,0,0.1139359970887502
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,32,32,2,0,0.06629866858323415
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,32,32,4,0,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,32,32,8,0,0.027674667537212372
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,32,32,16,0,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,32,32,32,0,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,32,32,64,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,32,32,128,0,0.018725333114465077
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,32,32,1,0,0.14432000120480856
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,32,32,2,0,0.08476266264915466
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,32,32,4,0,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,32,32,8,0,0.033759998778502144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,32,32,16,0,0.025455998877684276
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,32,32,32,0,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,32,32,64,0,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,32,32,128,0,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,32,64,1,0,0.13522133231163025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,32,64,2,0,0.07693866888682048
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,32,64,4,0,0.04398933549722036
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,32,64,8,0,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,32,64,16,0,0.02163733293612798
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,32,64,32,0,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,32,64,64,0,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,1,6144,128,0,0.09301867087682088
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,32,64,128,0,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,32,64,1,0,0.2076746622721354
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,32,64,2,0,0.11556800206502278
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,32,64,4,0,0.06843733290831248
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,32,64,8,0,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,32,64,32,0,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,32,64,16,0,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,32,64,64,0,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,32,64,128,0,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,32,128,1,0,0.18954133987426758
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,32,128,2,0,0.10422399640083313
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,32,128,4,0,0.06066133578618368
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,32,128,8,0,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,32,128,16,0,0.025600001215934753
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,32,128,32,0,0.019541333119074505
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,32,128,64,0,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,32,128,128,0,0.01859733338157336
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,32,128,1,0,0.32102400064468384
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,32,128,2,0,0.17357333501180014
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,32,128,8,0,0.0620000014702479
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,32,128,16,0,0.032672000428040825
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,32,128,32,0,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,32,128,64,0,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,32,128,128,0,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,32,256,1,0,0.35753067334493
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,32,256,2,0,0.18953599532445273
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,32,256,4,0,0.10558933019638062
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,32,256,8,0,0.06548266609509786
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,32,256,16,0,0.039247999588648476
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,32,256,32,0,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,32,256,64,0,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,32,256,128,0,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,32,256,1,0,0.5868426561355591
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,32,256,2,0,0.30801065762837726
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,32,256,4,0,0.16665599743525186
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,32,256,8,0,0.09850133458773296
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,32,256,16,0,0.06042666733264923
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,32,256,32,0,0.032458665470282234
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,32,256,64,0,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,32,256,128,0,0.027376001079877216
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,32,512,1,0,0.7699093023935953
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,32,512,4,0,0.21409066518147787
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,32,512,8,0,0.12262399991353352
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,32,512,16,0,0.07532266775767009
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,32,512,32,0,0.05171200136343638
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,32,512,64,0,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,32,512,128,0,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,32,512,1,0,1.236565351486206
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,32,512,2,0,0.6355786720911661
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,32,512,4,0,0.3301493326822917
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,32,512,8,0,0.18538665771484375
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,32,512,16,0,0.10653332869211833
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,32,512,32,0,0.07037333150704701
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,32,512,64,0,0.04165866722663244
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,32,512,128,0,0.03538133452335993
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,32,1024,1,0,1.903264045715332
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,32,1024,2,0,0.9839946428934733
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,32,1024,4,0,0.5128639936447144
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,32,1024,8,0,0.27587733666102093
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,32,1024,16,0,0.15980266531308493
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,32,1024,32,0,0.10075733065605164
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,32,1024,64,0,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,32,1024,128,0,0.05189866820971171
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,32,1024,1,0,2.7913812001546225
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,32,1024,2,0,1.4243839581807454
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,32,1024,4,0,0.7255520025889078
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,32,1024,8,0,0.37915201981862384
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,32,1024,16,0,0.21160000562667847
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,32,1024,32,0,0.1256480018297831
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,32,1024,64,0,0.084714670976003
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,32,1024,128,0,0.056101332108179726
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,64,16,1,0,0.19180800517400107
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,64,16,2,0,0.105295995871226
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,64,16,4,0,0.05858133236567179
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,64,16,8,0,0.03578133384386698
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,64,16,16,0,0.025674665967623394
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,64,16,32,0,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,64,16,64,0,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,64,16,128,0,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,64,16,1,0,0.22035199403762817
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,64,16,2,0,0.12046933174133301
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,64,16,4,0,0.06558933357397716
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,64,16,8,0,0.04197333256403605
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,64,16,16,0,0.031983998914559685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,64,16,32,0,0.025813333690166473
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,64,16,64,0,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,64,16,128,0,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,64,32,1,0,0.21306665738423666
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,64,32,2,0,0.11508267124493916
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,64,32,4,0,0.06659733255704244
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,64,32,8,0,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,64,32,16,0,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,64,32,32,0,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,64,32,64,0,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,64,32,128,0,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,64,32,1,0,0.2698026696840922
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,64,32,2,0,0.14619732896486917
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,64,32,4,0,0.08436800042788188
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,64,32,8,0,0.045663997530937195
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,64,32,16,0,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,64,32,32,0,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,64,32,64,0,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,64,32,128,0,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,64,64,1,0,0.26150933901468915
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,64,64,2,0,0.1371999979019165
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,64,64,4,0,0.07772799829641978
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,64,64,8,0,0.04775466521581014
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,64,64,16,0,0.031397332747777305
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,64,64,32,0,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,64,64,64,0,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,64,64,128,0,0.01740266631046931
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,64,64,1,0,0.3928266763687134
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,64,64,2,0,0.20752533276875815
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,64,64,4,0,0.11956800023714702
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,64,64,8,0,0.06865066786607106
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,64,64,16,0,0.03773866593837738
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,64,64,32,0,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,64,64,64,0,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,64,64,128,0,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,64,128,1,0,0.36050665378570557
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,64,128,2,0,0.1916373372077942
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,64,128,4,0,0.10525332887967427
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,64,128,8,0,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,64,128,16,0,0.04020266731580099
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,64,128,32,0,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,64,128,64,0,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,64,128,128,0,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,64,128,1,0,0.625930666923523
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,64,128,2,0,0.32505067189534503
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,64,128,4,0,0.17706666390101114
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,64,128,8,0,0.10136000315348308
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,64,128,16,0,0.06264000137646993
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,64,128,32,0,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,64,128,64,0,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,64,128,128,0,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,64,256,1,0,0.686469316482544
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,64,256,2,0,0.35753599802652997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,64,256,4,0,0.19444799423217773
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,64,256,8,0,0.11079466342926025
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,64,256,16,0,0.0687306672334671
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,64,256,32,0,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,64,256,64,0,0.02771199991305669
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,64,256,128,0,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,64,256,1,0,1.1544853051503499
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,64,256,2,0,0.5948373476664225
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,64,256,4,0,0.31196266412734985
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,64,256,8,0,0.17190933227539062
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,64,256,16,0,0.10107733805974324
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,64,256,32,0,0.06478400031725566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,64,256,64,0,0.037605332831541695
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,64,256,128,0,0.029663999875386555
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,64,512,1,0,1.5120320320129395
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,64,512,2,0,0.7840320269266764
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,64,512,4,0,0.4066026608149211
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,64,512,8,0,0.22233066956202188
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,64,512,16,0,0.12995200355847678
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,64,512,32,0,0.08291733264923096
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,64,512,64,0,0.0592853327592214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,64,512,128,0,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,64,512,1,0,2.4697653452555337
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,64,512,2,0,1.2510346571604412
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,64,512,4,0,0.640122652053833
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,64,512,8,0,0.33532265822092694
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,64,512,16,0,0.1885333259900411
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,64,512,32,0,0.11385066310564677
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,64,512,64,0,0.07663466533025105
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,64,512,128,0,0.04997866849104563
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,128,16,1,0,0.36790398756663006
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,128,16,2,0,0.1918720006942749
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,128,16,4,0,0.10472533106803894
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,128,16,8,0,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,128,16,16,0,0.03758399933576584
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,128,16,32,0,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,128,16,64,0,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,128,16,128,0,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,128,16,1,0,0.4190719922383626
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,128,16,2,0,0.22107199827829996
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,128,16,4,0,0.12304000059763591
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,128,16,8,0,0.06621866424878438
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,128,16,16,0,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,128,16,32,0,0.03173866619666418
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,128,16,64,0,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,128,16,128,0,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,128,32,1,0,0.4251573483149211
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,128,32,2,0,0.21343467632929483
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,128,32,4,0,0.11528000235557556
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,128,32,8,0,0.06593066453933716
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,128,32,16,0,0.04053333401679993
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,128,32,32,0,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,128,32,64,0,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,128,32,128,0,0.018245333184798557
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,128,32,1,0,0.515226682027181
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,128,32,2,0,0.27060800790786743
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,128,32,4,0,0.14804266889890036
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,128,32,8,0,0.08870933453241985
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,128,32,16,0,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,128,32,32,0,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,128,32,64,0,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,128,32,128,0,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,128,64,1,0,0.5063946644465128
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,128,64,2,0,0.2633066574732463
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,128,64,4,0,0.13941867152849832
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,128,64,8,0,0.08105599880218506
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,128,64,16,0,0.04966933528582255
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,128,64,32,0,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,128,64,64,0,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,128,64,128,0,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,128,64,1,0,0.7680426438649496
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,128,64,2,0,0.3952000141143799
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,128,64,4,0,0.21064533789952597
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,128,64,8,0,0.11988266309102376
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,128,64,16,0,0.07029866675535838
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,128,64,32,0,0.03987200061480204
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,128,64,64,0,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,128,64,128,0,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,128,128,1,0,0.698527971903483
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,128,128,2,0,0.3622986475626628
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,128,128,4,0,0.1938506762186686
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,128,128,8,0,0.11006933450698853
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,128,128,16,0,0.0673333356777827
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,128,128,32,0,0.04560000201066335
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,128,128,64,0,0.030378667016824085
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,128,128,128,0,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,128,128,1,0,1.2300960222880046
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,128,128,2,0,0.6347946723302206
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,128,128,4,0,0.3287786642710368
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,128,128,8,0,0.17901867628097534
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,128,128,16,0,0.10683733224868774
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,128,128,32,0,0.06654933094978333
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,128,128,64,0,0.039349332451820374
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,128,128,128,0,0.029157333076000214
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,128,256,1,0,1.3528000513712566
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,128,256,2,0,0.691215991973877
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,128,256,4,0,0.37034666538238525
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,128,256,8,0,0.2017973264058431
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,128,256,16,0,0.1193386713663737
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,128,256,32,0,0.07815466821193695
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,128,256,64,0,0.05354666709899902
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,128,256,128,0,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,128,256,1,0,2.2979520161946616
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,128,256,2,0,1.1710506280263264
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,128,256,4,0,0.5992746750513712
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,128,256,8,0,0.3158613244692485
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,128,256,16,0,0.17756799856821695
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,128,256,32,0,0.107205331325531
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,128,256,64,0,0.07037866612275441
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,128,256,128,0,0.04780800143877665
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,256,16,1,0,0.7138453324635824
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,256,16,2,0,0.36640000343322754
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,256,16,4,0,0.19270400206247965
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,256,16,8,0,0.10550399621327718
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,256,16,16,0,0.06003733476003011
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,256,16,32,0,0.03829866647720337
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,256,16,64,0,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,256,16,128,0,0.02293333411216736
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,256,16,1,0,0.8101440270741781
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,256,16,2,0,0.41919465859731037
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,256,16,4,0,0.2219466765721639
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,256,16,8,0,0.12365866700808208
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,256,16,16,0,0.0650079995393753
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,256,16,32,0,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,256,16,64,0,0.03350933392842611
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,256,16,128,0,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,256,32,1,0,0.8012693723042806
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,256,32,2,0,0.4100533326466878
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,256,32,4,0,0.21424533923467
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,256,32,8,0,0.11788266897201538
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,256,32,16,0,0.06862933437029521
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,256,32,32,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,256,32,64,0,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,256,32,128,0,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,256,32,1,0,1.0185173352559407
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,256,32,2,0,0.518181324005127
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,256,32,4,0,0.27318400144577026
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,256,32,8,0,0.14961066842079163
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,256,32,16,0,0.0888213316599528
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,256,32,32,0,0.04799466828505198
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,256,32,64,0,0.0352906659245491
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,256,32,128,0,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,256,64,1,0,0.9918773174285889
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,256,64,2,0,0.505349318186442
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,256,64,4,0,0.2656000057856242
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,256,64,8,0,0.1442346672217051
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,256,64,16,0,0.08384533723195393
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,256,64,32,0,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,256,64,64,0,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,256,64,128,0,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,256,64,1,0,1.5304053624471028
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,256,64,2,0,0.7731893062591553
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,256,64,4,0,0.40012800693511963
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,256,64,8,0,0.2137653430302938
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,256,64,16,0,0.12175466616948445
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,256,64,32,0,0.07457066575686137
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,256,64,64,0,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,256,64,128,0,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,128,256,128,1,0,1.3784373601277669
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,256,128,2,0,0.7101653416951498
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,32,256,128,4,0,0.36983466148376465
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,256,128,8,0,0.20189867417017618
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,8,256,128,16,0,0.11853333314259847
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,4,256,128,32,0,0.07441066702206929
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,2,256,128,64,0,0.054101333022117615
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,1,256,128,128,0,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,128,256,128,1,0,2.4636693000793457
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,64,256,128,2,0,1.2607573668162029
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,256,128,4,0,0.6391839981079102
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,16,256,128,8,0,0.3354613383611043
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,8,256,128,16,0,0.18701332807540894
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,4,256,128,32,0,0.10916266838709514
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,2,256,128,64,0,0.0740586668252945
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,1,256,128,128,0,0.04977599779764811
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,16,1,256,8,0,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,fp8,32,32,128,4,0,0.09801600376764934
TRTLLM,1.2.0rc5,NVIDIA GB200,mla_context,default,float16,float16,64,32,512,2,0,0.4076746702194214
