framework,version,device,op_name,kernel_source,allreduce_dtype,num_gpus,message_size,latency,backend
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,128,0.004665600061416626,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,128,0.020799679756164553,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,256,0.004727360010147095,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,256,0.020625920295715333,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,512,0.004758400022983551,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,512,0.024578559398651122,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,1024,0.004785279929637909,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,1024,0.020673599243164063,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,2048,0.004832319915294647,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,2048,0.021856000423431395,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,4096,0.005129920244216919,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,4096,0.024779520034790038,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,8192,0.00522271990776062,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,8192,0.020899200439453126,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,16384,0.005618240237236023,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,16384,0.020731520652770997,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,32768,0.006453440189361573,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,32768,0.02092128038406372,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,65536,0.008083840012550355,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,65536,0.020737600326538087,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,131072,0.019628159999847412,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,131072,0.02363231897354126,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,262144,0.021698880195617675,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,262144,0.023709120750427245,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,524288,0.02753216028213501,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,524288,0.028602240085601804,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,1048576,0.04152448177337646,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,1048576,0.04346752166748047,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,2097152,0.05535840034484864,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,2097152,0.05481152057647705,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,4194304,0.08389087677001952,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,4194304,0.0844048023223877,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,8388608,0.12381600379943847,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,8388608,0.12942303657531737,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,16777216,0.19806047439575197,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,16777216,0.20175264358520506,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,33554432,0.319835205078125,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,33554432,0.3464790344238281,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,67108864,0.5646950531005859,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,67108864,0.6263593673706055,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,134217728,1.0534979248046876,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,134217728,1.1783920288085938,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,268435456,2.033415069580078,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,268435456,2.2890985107421877,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,8,536870912,4.012770385742188,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,8,536870912,4.515662536621094,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,128,0.0043043199181556704,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,128,0.02013887882232666,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,256,0.004431999921798706,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,256,0.02039455890655518,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,512,0.004444800019264221,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,512,0.03306047916412354,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,1024,0.004483520090579987,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,1024,0.019150400161743165,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,2048,0.004517439901828766,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,2048,0.01924448013305664,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,4096,0.004567680060863495,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,4096,0.019154560565948487,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,8192,0.004542079865932465,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,8192,0.019270720481872557,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,16384,0.004579200148582459,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,16384,0.01926144003868103,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,32768,0.004721600115299225,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,32768,0.019182720184326173,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,65536,0.004826560020446778,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,65536,0.019340159893035887,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,131072,0.005178560018539428,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,131072,0.02342463970184326,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,262144,0.0064035201072692866,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,262144,0.01923424005508423,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,524288,0.008477439880371094,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,524288,0.01924064040184021,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,1048576,0.012558079957962038,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,1048576,0.019199999570846556,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,2097152,0.020557758808135988,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,2097152,0.027512640953063966,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,4194304,0.05110559940338135,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,4194304,0.052994241714477544,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,8388608,0.07907008171081543,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,8388608,0.09075200080871582,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,16777216,0.14442079544067382,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,16777216,0.16136703491210938,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,33554432,0.25112960815429686,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,33554432,0.284843521118164,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,67108864,0.46755233764648435,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,67108864,0.530285758972168,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,134217728,0.8893049621582032,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,134217728,1.015497589111328,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,268435456,1.684875183105469,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,268435456,1.9520172119140624,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,2,536870912,3.3101806640625,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,2,536870912,3.7904031372070315,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,128,0.004608640074729919,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,128,0.01947648048400879,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,256,0.004536319971084594,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,256,0.019415999650955203,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,512,0.004519360065460205,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,512,0.019591039419174193,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,1024,0.004607680141925812,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,1024,0.019322240352630617,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,2048,0.004682239890098572,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,2048,0.01913856029510498,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,4096,0.0047654399275779726,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,4096,0.019263360500335693,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,8192,0.004798400104045868,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,8192,0.02085952043533325,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,16384,0.00494623988866806,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,16384,0.019338879585266113,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,32768,0.010084480047225952,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,32768,0.019169600009918214,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,65536,0.005852479934692382,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,65536,0.019296319484710694,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,131072,0.007010560035705567,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,131072,0.019268800020217896,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,262144,0.010450559854507446,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,262144,0.01939103960990906,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,524288,0.012138559818267822,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,524288,0.019263999462127684,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,1048576,0.017808959484100342,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,1048576,0.024536321163177492,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,2097152,0.029856319427490237,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,2097152,0.037348160743713374,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,4194304,0.06423935890197754,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,4194304,0.06655231952667237,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,8388608,0.1018220806121826,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,8388608,0.11030271530151367,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,16777216,0.17870847702026366,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,16777216,0.19490367889404298,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,33554432,0.3252124786376953,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,33554432,0.35955806732177736,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,67108864,0.6190617752075196,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,67108864,0.6830332946777344,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,134217728,1.1819132995605468,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,134217728,1.30953857421875,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,268435456,2.316376342773437,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,268435456,2.5697576904296873,vllm_eager
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_graph,float16,4,536870912,4.519442749023438,vllm_graph
vLLM,0.12.0,NVIDIA H200,all_reduce,vLLM_custom_eager,float16,4,536870912,5.052057189941406,vllm_eager
