Coverage for src / eclipse / care / utils / gitlab.py: 58%
137 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-24 09:38 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-24 09:38 +0100
1# Copyright (c) 2025 The Eclipse Foundation
2#
3# This program and the accompanying materials are made available under the
4# terms of the Eclipse Public License 2.0 which is available at
5# http://www.eclipse.org/legal/epl-2.0.
6#
7# SPDX-License-Identifier: EPL-2.0
8#
9# Contributors:
10# asgomes - Additional methods
11# Boris Baldassari - Initial implementation
13"""
14A module to provide GitLab functionalities, in the context of the EMO assessment
15checks. It manages authentication (based on environment variables or a configuration
16file), and several utilities to interact with the Eclipse GitLab instance.
17"""
19import os
20from pathlib import Path
21from time import sleep
22from typing import Optional, Any, List
24from gitlab import Gitlab, exceptions as gl_exceptions, GitlabGetError
25from requests import exceptions
27from care.utils.cli_utils import print_error, print_warning
28from care.utils.git_provider import GitProviderEMO
30# Max connection attempts to GitLab API
31MAX_API_ATTEMPTS = 3
34class GitLabEmo(GitProviderEMO):
35 """A Class to retrieve all GitLab-related information, in the context
36 of the EMO needs. It notably takes care of the authentication during
37 the initialisation of the module, and provides several utility functions.
39 """
41 gl = None
42 """ The gitlab object. """
44 repos = {}
45 """ Caching repos when working on objects. """
47 def __init__(self, credentials: Optional[dict] = None, verbose: bool = False):
48 """Initialises a class to interact with GitHub-based data sources.
50 Parameters
51 ----------
52 credentials : dict
53 An optional dict of tokens to use for GitLab/GitHub auth.
54 verbose : bool
55 Boolean to display more information on stdout (optional).
56 """
57 super().__init__()
59 gl_token = credentials['GL_TOKEN'] if credentials else None
61 # If we got credentials, use them. Otherwise, just proceed without
62 # authentication, hoping we won't be throttled.
63 if gl_token:
64 # using an access token
65 if verbose:
66 print(f" Connecting to GitLab with auth.")
67 gl = Gitlab(url='https://gitlab.eclipse.org', private_token=gl_token)
68 gl.auth()
69 else:
70 if verbose:
71 print(" Connecting to GitLab without authentication.")
72 gl = Gitlab(url='https://gitlab.eclipse.org')
74 self.gl = gl
76 def get_gl(self):
77 """Retrieve the GitLab handle for this connection.
78 """
79 return self.gl
81 def get_repo(self, repo: str) -> Any:
82 """Return the repository object, either from local cache if it has been
83 already requested, or from the remote otherwise.
85 Parameters
86 ----------
87 repo : str
88 The repository to get a handle on.
89 """
90 if repo in self.repos:
91 return self.repos[repo]
92 else:
93 try:
94 # Get repository details.
95 gl_repo = self.gl.projects.get(repo, license=True)
97 # Store repo in cache.
98 self.repos[repo] = gl_repo
100 except gl_exceptions.GitlabAuthenticationError:
101 print("Invalid GitLab Authentication Token provided. Exiting...")
102 return None
103 except gl_exceptions.GitlabGetError:
104 print(f"Invalid GitLab Repository found ({repo}). Exiting...")
105 return None
106 except BaseException as e:
107 print(f"GitLab connection error: {e}. Exiting...")
108 return None
110 return gl_repo
112 def get_repos(self, group: str):
113 """Return a list of repositories names (as strings) belonging to the group,
114 either from local cache or from remote if it doesn't exist locally.
116 Parameters
117 ----------
118 group : str
119 The group to get the repositories.
120 """
121 try:
122 gl_group = self.gl.groups.get(id=group)
123 except gl_exceptions.GitlabAuthenticationError:
124 print("Invalid GitLab Authentication Token provided. Exiting...")
125 return None
126 except gl_exceptions.GitlabGetError:
127 print(f"Invalid GitLab Group found ({group}). Exiting...")
128 return None
129 except BaseException as e:
130 print(f"GitLab connection error: {e}. Exiting...")
131 return None
132 repos = gl_group.projects.list(get_all=True, archived=False, include_subgroups=True)
133 repos = [ r.path_with_namespace for r in repos ]
134 return repos
136 def get_content_root(self, repo: str) -> Optional[List[str]]:
137 """List files and directories at the root of the repository.
139 Parameters
140 ----------
141 repo : str
142 The repository to retrieve content for, e.g. eclipse-dash/dash
143 """
144 gl_repo = self.get_repo(repo=repo)
146 # List files at the root of the directory.
147 contents = gl_repo.repository_tree(ref=gl_repo.default_branch, all=True)
149 # Have all files at the root of the repository in an array.
150 files_repo = []
151 for content_file in contents:
152 if 'path' in content_file:
153 files_repo.append(content_file['path'])
155 return files_repo
157 def get_content_recursive(self, repo: str, branch: Optional[str] = None) -> Optional[List[dict]]:
158 """List files and directories of the repository recursively.
160 Parameters
161 ----------
162 repo : str
163 The repository to retrieve content for, e.g. eclipse-dash/dash
164 branch: Optional[str]
165 A specific branch to check out
166 """
167 gl_repo = self.get_repo(repo=repo)
168 if not gl_repo:
169 return None
170 if branch is None:
171 branch = gl_repo.default_branch
172 for i in range(MAX_API_ATTEMPTS):
173 try:
174 files_repo = gl_repo.repository_tree(ref=branch, recursive=True, all=True)
175 return files_repo
176 except (exceptions.ConnectionError, GitlabGetError) as e:
177 # Default values
178 is_retryable = False
179 error_message = ""
180 wait_time = 2 ** (i + 1)
182 if isinstance(e, exceptions.ConnectionError):
183 # Connection errors are always treated as retryable network issues
184 error_message = str(e)
185 is_retryable = True
186 elif isinstance(e, GitlabGetError):
187 # GitlabGetErrors are only retryable if they are 5xx server errors
188 response_code = e.response_code
189 error_message = e.error_message
190 if 500 <= response_code < 600:
191 is_retryable = True
193 # Max attempts reached or not retryable (e.g. 404)
194 if i == MAX_API_ATTEMPTS - 1 or not is_retryable:
195 if not is_retryable:
196 print_error(f"Got {error_message} attempting to get repository tree "
197 f"({gl_repo.path_with_namespace}). Ignoring copyright headers for the project...")
198 return None
199 print_error(f"Got {error_message} after {MAX_API_ATTEMPTS} attempts to get repository tree "
200 f"({gl_repo.path_with_namespace}). Ignoring copyright headers for the project...")
201 return None
202 print_warning(f"Got {error_message} attempting to get repository tree "
203 f"({gl_repo.path_with_namespace}). Retrying in {wait_time} seconds "
204 f"(Attempt {i + 1}/{MAX_API_ATTEMPTS}).")
205 sleep(wait_time)
206 return None
208 def get_readme(self, repo: str):
209 """Return the path to the README file of the repo, according to GL standards.
211 Parameters
212 ----------
213 repo : str
214 The repository to retrieve the readme for, e.g. eclipse-dash/dash
215 """
216 gl_repo = self.get_repo(repo=repo)
218 return gl_repo.readme_url
220 def get_licence(self, repo: str):
221 """Return the path to the LICENSE file of the repo, according to GL standards.
223 Parameters
224 ----------
225 repo : str
226 The repository to retrieve the license for, e.g. eclipse-dash/dash
227 """
228 gl_repo = self.get_repo(repo=repo)
230 return gl_repo.license_url
232 def get_file(self, repo: str, tmpdir: str, fpath: str, branch: Optional[str] = None,
233 bytes_to_read: Optional[int] = 2048) -> Optional[Path]:
234 """Return the content of a file in the repo.
236 Parameters
237 ----------
238 repo : str
239 The repository to retrieve the license for, e.g. eclipse-dash/dash
240 tmpdir : str
241 The temporary directory where to store the file
242 fpath : str
243 The path to the file relative to the root of the repository
244 branch: Optional[str]
245 A specific branch to check out
246 bytes_to_read: Optional[int]
247 The number of bytes to fetch (ignored in this implementation for compatibility)
248 """
249 gl_repo = self.get_repo(repo=repo)
250 if not gl_repo:
251 return None
252 if branch is None:
253 branch = gl_repo.default_branch
254 for i in range(MAX_API_ATTEMPTS):
255 try:
256 wpath = Path(f'{os.path.join(tmpdir, os.path.basename(fpath))}')
257 with open(wpath, 'w+b') as f:
258 gl_repo.files.raw(file_path=fpath, ref=branch, streamed=True, action=f.write, timeout=30)
259 return wpath
260 except (exceptions.ConnectionError, GitlabGetError) as e:
261 # Default values
262 is_retryable = False
263 error_message = ""
264 wait_time = 2 ** (i + 1)
266 if isinstance(e, exceptions.ConnectionError):
267 # Connection errors are always treated as retryable network issues
268 error_message = str(e)
269 is_retryable = True
270 elif isinstance(e, GitlabGetError):
271 # GitlabGetErrors are only retryable if they are 5xx server errors
272 response_code = e.response_code
273 error_message = e.error_message
274 if 500 <= response_code < 600:
275 is_retryable = True
277 # Max attempts reached or not retryable (e.g. 404)
278 if i == MAX_API_ATTEMPTS - 1 or not is_retryable:
279 if not is_retryable:
280 print_error(f"Got {error_message} attempting to obtain file "
281 f"({gl_repo.path_with_namespace}/{fpath}) from GitLab. Skipping...")
282 return None
283 print_error(f"Got {error_message} after {MAX_API_ATTEMPTS} "
284 f"attempts to obtain file {gl_repo.path_with_namespace}/{fpath}. Skipping...")
285 return None
286 print_warning(f"Got {error_message} attempting to obtain file "
287 f"{gl_repo.path_with_namespace}/{fpath}. Retrying in {wait_time} seconds "
288 f"(Attempt {i + 1}/{MAX_API_ATTEMPTS}).")
289 sleep(wait_time)
290 return None