Coverage for src / eclipse / care / copyright / copyright_headers.py: 86%
125 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-24 09:38 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-24 09:38 +0100
1# Copyright (c) 2026 The Eclipse Foundation
2#
3# This program and the accompanying materials are made available under the
4# terms of the Eclipse Public License 2.0 which is available at
5# http://www.eclipse.org/legal/epl-2.0.
6#
7# SPDX-License-Identifier: EPL-2.0
8#
9# Contributors:
10# asgomes - Initial implementation
12import json
13import os
14from importlib import resources
15from math import ceil
16from pathlib import Path
17from tempfile import TemporaryDirectory
18from typing import Optional
20from scancode.api import get_copyrights
21from typing_extensions import Any
23from care.utils.cli_utils import print_debug, print_error
24from care.utils.eclipse import Eclipse
25from care.utils.git_provider import GitProviderEMO
26from care.utils.github import GitHubEmo
27from care.utils.gitlab import GitLabEmo
30def _load_known_extensions() -> list[dict[str, Any]] | None:
31 """
32 Load known source code file extensions.
33 """
35 try:
36 with resources.files('care').joinpath('templates/comment-templates.json').open('r') as fp:
37 return json.load(fp)
38 except FileNotFoundError:
39 print_error("No comment-templates.json found to load known source code file extensions.")
40 except json.JSONDecodeError as e:
41 print_error(f"The comment-templates.json could not be parsed to load known source code file extensions. "
42 f"Details: {e}")
43 except OSError as e: # Catches other I/O related errors like permissions issues
44 print_error(f"An IO error occurred while accessing comment-templates.json to load known source code file "
45 f"extensions. Details: {e}")
46 except Exception as e: # Fallback for any other unexpected errors
47 print_error(f"An unexpected error occurred while attempting to load known source code file extensions: {e}")
50def _detect_copyright(file_path: Path) -> dict | None:
51 """
52 Detect copyright header in a single file and return copyright holders.
54 Parameters
55 ----------
56 file_path : Path
57 The path to the file to process
58 """
60 try:
61 results = get_copyrights(str(file_path))
62 holders = {
63 holder["holder"].strip()
64 for holder in results.get("holders", [])
65 if holder.get("holder") and holder.get("start_line") <= 20
66 }
67 return sorted(holders) if holders else None
68 except Exception as e:
69 print_error(f"An unexpected error occurred during copyright detection: {e}")
70 return None
73def _get_copyright_info(provider: GitProviderEMO, repo: str, branch: Optional[str] = None,
74 verbose: Optional[bool] = False) -> tuple[int, list]:
75 """
76 Get copyright headers percentage and copyright holders for all files in a repository.
78 Parameters
79 ----------
80 provider : GitProviderEMO
81 The Git provider to connect to fetch repo files
82 repo : str
83 The repository to retrieve info for, e.g. eclipse-dash/dash
84 branch: Optional[str]
85 A specific branch to check out
86 verbose : Optional[bool]
87 Should we print more information on stdout?
88 """
90 # Get a list of files
91 files = provider.get_content_recursive(repo, branch=branch)
92 if files is None:
93 return -1, []
94 count = 0
95 with_header = 0
96 percentage_headers = 0
97 copyright_holders = set()
98 file_count = 0
99 for file in files:
100 file_count += 1
101 if verbose:
102 print_debug(f"Checking repository item {str(file_count)}/{str(len(files))}")
103 print_debug(f"Item location: {file['path']}")
104 if file['type'] != 'blob':
105 continue
106 # If the file has no extension, the name is the extension
107 extension = os.path.splitext(file['name'])[1]
108 if extension == "":
109 extension = os.path.splitext(file['name'])[0]
110 # Load known source code file extensions
111 extensions = _load_known_extensions()
112 # Match file extension
113 known = False
114 for i in extensions:
115 if extension.lower() in (ext.lower() for ext in i['extensions']):
116 known = True
117 # If extension is known
118 if known:
119 count += 1
120 with TemporaryDirectory() as tmpdir:
121 gl_file = provider.get_file(repo, tmpdir, file['path'], branch=branch)
122 holders = _detect_copyright(gl_file)
123 if not holders:
124 if verbose:
125 print_debug(f"No header found for: {file['path']}")
126 continue
127 else:
128 with_header += 1
129 for holder in holders:
130 copyright_holders.add(holder)
131 # Calculate the percentage of files with headers
132 if count > 0:
133 if verbose:
134 print_debug(f"Number of files with headers: {with_header}")
135 print_debug(f"Number of files with known extension: {count}")
136 # Round to the next integer (avoid 0% when something is found)
137 percentage_headers = ceil((with_header / count) * 100)
139 return percentage_headers, sorted(copyright_holders)
142def gh_analyze(organization: str, credentials: Optional[dict] = None, verbose: Optional[bool] = False) -> dict | None:
143 """
144 Analyze a list of GitHub repositories, looking for copyright headers.
146 Parameters
147 ----------
148 organization : str
149 GitHub organization to analyze.
150 credentials: Optional[dict]
151 A mapping of service providers (GitHub, GitLab) to their respective authentication tokens.
152 verbose : Optional[bool]
153 Should we print more information on stdout?
154 """
156 results = {}
157 ghe = GitHubEmo(credentials=credentials, verbose=verbose)
158 if not ghe:
159 return None
161 # Get list of repositories from the organization
162 repos = ghe.get_repos(organization)
163 if not repos:
164 return None
166 for repo in repos:
167 if verbose:
168 print(f"- Analysing GH repo {repo}.")
169 percentage_headers, copyright_holders = _get_copyright_info(ghe, repo, verbose=verbose)
170 results[repo] = {}
171 results[repo]['copyright_headers_percentage'] = percentage_headers
172 results[repo]['copyright_holders'] = sorted(copyright_holders)
173 return results
176def gl_analyze(group: str, credentials: Optional[dict] = None, verbose: Optional[bool] = False) -> dict | None:
177 """
178 Analyze a list of GitLab repositories, looking for copyright headers.
180 Parameters
181 ----------
182 group : list
183 GitLab group to analyze.
184 credentials: Optional[dict]
185 A mapping of service providers (GitHub, GitLab) to their respective authentication tokens.
186 verbose : Optional[bool]
187 Should we print more information on stdout?
188 """
190 results = {}
191 gle = GitLabEmo(credentials=credentials, verbose=verbose)
192 if not gle:
193 return None
195 # Get list of repositories from the group
196 repos = gle.get_repos(group)
197 if not repos:
198 return None
200 for repo in repos:
201 if verbose:
202 print(f"- Analysing GL repo {repo}.")
203 percentage_headers, copyright_holders = _get_copyright_info(gle, repo, verbose=verbose)
204 results[repo] = {}
205 results[repo]['copyright_headers_percentage'] = percentage_headers
206 results[repo]['copyright_holders'] = sorted(copyright_holders)
207 return results
210def analyse_project(project_id: str, credentials: Optional[dict] = None, verbose: bool = False):
211 """
212 Generic entrypoint to analyze a project. This function will identify the project
213 repositories and their type (GitHub/GitLab) and execute the corresponding functions.
215 Parameters
216 ----------
217 project_id : str
218 Project ID of the Eclipse project to analyze, e.g. `technology.dash`.
219 credentials: Optional[dict]
220 A mapping of service providers (GitHub, GitLab) to their respective authentication tokens.
221 verbose : Optional[bool]
222 Should we print more information on stdout?
223 """
225 eclipse = Eclipse()
226 project_api = eclipse.get_project_api(project_id)
228 if project_api is None:
229 return None
231 results = {}
233 if 'github' in project_api and 'org' in project_api['github'] and len(project_api['github']['org']) > 0:
234 gh_org = project_api['github']['org']
235 if verbose:
236 print(f" Looking for projects in GH org {gh_org}.")
237 results.update(gh_analyze(gh_org, credentials=credentials, verbose=verbose))
239 if 'gitlab' in project_api and 'project_group' in project_api['gitlab'] and len(project_api['gitlab']['project_group']) > 0:
240 gl_org = project_api['gitlab']['project_group']
241 if verbose:
242 print(f" Looking for projects in GL group {gl_org}.")
243 results.update(gl_analyze(gl_org, credentials=credentials, verbose=verbose))
245 return results