Coverage for src / eclipse / care / legal / recommended_files.py: 90%
87 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-24 09:38 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-24 09:38 +0100
1# Copyright (c) 2025 The Eclipse Foundation
2#
3# This program and the accompanying materials are made available under the
4# terms of the Eclipse Public License 2.0 which is available at
5# http://www.eclipse.org/legal/epl-2.0.
6#
7# SPDX-License-Identifier: EPL-2.0
8#
9# Contributors:
10# Boris Baldassari - Initial implementation
12"""
13A module to find and identify a set of recommended files, like README, CONTRIBUTING, etc.
15It can query GitHub or GitLab projects, and tries to follow the recommendations of the
16Eclipse Project Handbook [1] and the former analyser [2].
18[1] https://www.eclipse.org/projects/handbook/ \
19[2] https://www.eclipse.org/projects/tools/documentation.php
21It looks for a set of files for each category of best practices, namely:
23* Readme - `readme` or `readme.md`
24* Conduct - `code_of_conduct.md`
25* Contributing - `contributing` or `contributing.md`
26* License - `license`
28It only looks for files at the root of the repository. Search is case-insensitive.
30"""
32import re
33from os.path import basename
34from typing import Optional
36from care.utils.eclipse import Eclipse
37from care.utils.github import GitHubEmo
38from care.utils.gitlab import GitLabEmo
41files_readme = {
42 "readme",
43 "readme.md",
44 # "readme.txt"
45}
46""" The list of file names considered as readme's. """
48files_conduct = {
49 "code_of_conduct.md",
50 # "codeofconduct"
51}
52""" The list of file names considered as codes of conduct. """
54files_contributing = {
55 "contributing",
56 "contributing.md",
57 # "copying",
58}
59""" The list of file names considered as contributing guides. """
61files_license = {
62 "license",
63 # "license.txt",
64}
65""" The list of file names considered as license texts. """
67files_notice = {}
68""" The list of file names considered as notices. """
70files_security = {}
71""" The list of file names considered for security information. """
74def find_file_in_list(files_repo: list, files_lookup: str,
75 verbose: Optional[bool] = False):
76 """
77 Go through a list of regular expressions from `files_lookup`, matching strings from `files_repo`,
78 and stops and returns the first corresponding occurrence.
81 Parameters
82 ----------
83 files_repo : str
84 An array of files to search through, and match to the regular expressions.
85 files_lookup : str
86 An array of regular expressions (either string or raw string) to match against the list of files.
87 verbose : Optional[bool]
88 Should we print more information on stdout?
89 """
90 for file_lookup in files_lookup:
91 p = re.compile(file_lookup, re.IGNORECASE)
92 for file_repo in files_repo:
93 # if verbose:
94 # print(f" - Matching {file_lookup} against {file_repo}.")
95 if p.match(file_repo):
96 if verbose:
97 print(f" Matched {file_lookup} in {file_repo}.")
98 return file_repo
100 return None
103def gh_analyse(organisation: str, credentials: Optional[dict] = None, verbose: Optional[bool] = False):
104 """
105 Analyse one or more GH repositories, looking for all interesting files in its content.
107 Parameters
108 ----------
109 organisation: str
110 GitHub organisation to analyse.
111 credentials: Optional[dict]
112 A dict contining GitHub and GitLab token to use to connect to GitHub.
113 verbose : Optional[bool]
114 Should we print more information on stdout?
115 """
116 results = {}
118 ghe = GitHubEmo(credentials=credentials, verbose=verbose)
120 # Get list of repositories from the organisation.
121 repos = ghe.get_repos(organisation=organisation)
123 if repos is None:
124 return None
126 if verbose:
127 print(f" Found {len(repos)} GH repos to check.")
129 for repo in repos:
131 if verbose:
132 print(f"- Analysing GH repo {repo}.")
134 # List files at the root of the directory.
135 files_repo = ghe.get_content_root(repo=repo)
137 # If anything went wrong with the GH request..
138 if files_repo is None:
139 return None
141 results[repo] = {}
143 # Look for readme files.
144 results[repo]['readme'] = find_file_in_list(files_repo=files_repo,
145 files_lookup=files_readme,
146 verbose=verbose)
148 # Look for conduct files.
149 results[repo]['conduct'] = find_file_in_list(files_repo=files_repo,
150 files_lookup=files_conduct,
151 verbose=verbose)
153 # Look for contributing files.
154 results[repo]['contributing'] = find_file_in_list(files_repo=files_repo,
155 files_lookup=files_contributing,
156 verbose=verbose)
158 # Look for license files.
159 results[repo]['license'] = find_file_in_list(files_repo=files_repo,
160 files_lookup=files_license,
161 verbose=verbose)
163 return results
166def gl_analyse(organisation: str, credentials: Optional[dict] = None, verbose: Optional[bool] = False):
167 """
168 Analyse all repositories from a single organisation, looking for all interesting files.
170 This function in turns calls everything needed to execute the full analysis, from GL
171 authentication and initialisation to the actual query.
173 Parameters
174 ----------
175 organisation : str
176 An organisation to analyse.SSS
177 credentials: Optional[dict]
178 A dict containing GitHub and GitLab token to use to connect to GitHub.
179 verbose : Optional[bool]
180 Should we print more information on stdout?
181 """
182 results = {}
184 gle = GitLabEmo(credentials=credentials, verbose=verbose)
186 repos = gle.get_repos(group=organisation)
188 if repos is None:
189 return None
191 if verbose:
192 print(f" Found {len(repos)} GL repos to check.")
194 for repo in repos:
196 if verbose:
197 print(f"- Analysing GL repo {repo}.")
199 # List files at the root of the directory.
200 files_repo = gle.get_content_root(repo=repo)
202 # If anything went wrong with the GL request..
203 if files_repo is None:
204 return None
206 results[repo] = {}
208 # Look for readme files.
209 file_readme = gle.get_readme(repo=repo)
210 results[repo]['readme'] = basename(file_readme) if file_readme else None
212 # Look for conduct files.
213 results[repo]['conduct'] = find_file_in_list(files_repo=files_repo,
214 files_lookup=files_conduct,
215 verbose=verbose)
217 # Look for contributing files.
218 results[repo]['contributing'] = find_file_in_list(files_repo=files_repo,
219 files_lookup=files_contributing,
220 verbose=verbose)
222 # Look for license files.
223 file_licence = gle.get_licence(repo=repo)
224 results[repo]['license'] = basename(file_licence) if file_licence else None
226 return results
229def analyse_project(project_id: str, credentials: Optional[dict] = None, verbose: bool = False):
230 """
231 Generic entrypoint to analyse a project. This function will identify the project
232 repositories and their type (github/gitlab) and execute the corresponding functions.
234 Parameters
235 ----------
236 project_id : str
237 Project ID of the Eclipse project to analyse, e.g. `technology.dash`.
238 credentials: Optional[dict]
239 A dict contining GitHub and GitLab token to use to connect to GitHub.
240 verbose : Optional[bool]
241 Should we print more information on stdout?
242 """
244 eclipse = Eclipse()
245 project_api = eclipse.get_project_api(project_id)
247 if project_api is None:
248 return None
250 repos = []
251 results = {}
253 if 'github' in project_api and 'org' in project_api['github'] and len(project_api['github']['org']) > 0:
254 gh_org = project_api['github']['org']
255 if verbose:
256 print(f" Looking for projects in GH org {gh_org}.")
257 results.update(gh_analyse(organisation=gh_org, credentials=credentials, verbose=verbose))
259 if 'gitlab' in project_api and 'project_group' in project_api['gitlab'] and len(project_api['gitlab']['project_group']) > 0:
260 gl_org = project_api['gitlab']['project_group']
261 if verbose:
262 print(f" Looking for projects in GL group {gl_org}.")
263 results.update(gl_analyse(organisation=gl_org, credentials=credentials, verbose=verbose))
265 return results