Coverage for src / eclipse / care / utils / gitlab.py: 58%

137 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-24 09:38 +0100

1# Copyright (c) 2025 The Eclipse Foundation 

2# 

3# This program and the accompanying materials are made available under the 

4# terms of the Eclipse Public License 2.0 which is available at 

5# http://www.eclipse.org/legal/epl-2.0. 

6# 

7# SPDX-License-Identifier: EPL-2.0 

8# 

9# Contributors: 

10# asgomes - Additional methods 

11# Boris Baldassari - Initial implementation 

12 

13""" 

14A module to provide GitLab functionalities, in the context of the EMO assessment 

15checks. It manages authentication (based on environment variables or a configuration 

16file), and several utilities to interact with the Eclipse GitLab instance. 

17""" 

18 

19import os 

20from pathlib import Path 

21from time import sleep 

22from typing import Optional, Any, List 

23 

24from gitlab import Gitlab, exceptions as gl_exceptions, GitlabGetError 

25from requests import exceptions 

26 

27from care.utils.cli_utils import print_error, print_warning 

28from care.utils.git_provider import GitProviderEMO 

29 

30# Max connection attempts to GitLab API 

31MAX_API_ATTEMPTS = 3 

32 

33 

34class GitLabEmo(GitProviderEMO): 

35 """A Class to retrieve all GitLab-related information, in the context 

36 of the EMO needs. It notably takes care of the authentication during  

37 the initialisation of the module, and provides several utility functions. 

38 

39 """ 

40 

41 gl = None 

42 """ The gitlab object. """ 

43 

44 repos = {} 

45 """ Caching repos when working on objects. """ 

46 

47 def __init__(self, credentials: Optional[dict] = None, verbose: bool = False): 

48 """Initialises a class to interact with GitHub-based data sources. 

49 

50 Parameters 

51 ---------- 

52 credentials : dict 

53 An optional dict of tokens to use for GitLab/GitHub auth. 

54 verbose : bool 

55 Boolean to display more information on stdout (optional). 

56 """ 

57 super().__init__() 

58 

59 gl_token = credentials['GL_TOKEN'] if credentials else None 

60 

61 # If we got credentials, use them. Otherwise, just proceed without 

62 # authentication, hoping we won't be throttled. 

63 if gl_token: 

64 # using an access token 

65 if verbose: 

66 print(f" Connecting to GitLab with auth.") 

67 gl = Gitlab(url='https://gitlab.eclipse.org', private_token=gl_token) 

68 gl.auth() 

69 else: 

70 if verbose: 

71 print(" Connecting to GitLab without authentication.") 

72 gl = Gitlab(url='https://gitlab.eclipse.org') 

73 

74 self.gl = gl 

75 

76 def get_gl(self): 

77 """Retrieve the GitLab handle for this connection. 

78 """ 

79 return self.gl 

80 

81 def get_repo(self, repo: str) -> Any: 

82 """Return the repository object, either from local cache if it has been  

83 already requested, or from the remote otherwise. 

84 

85 Parameters 

86 ---------- 

87 repo : str 

88 The repository to get a handle on. 

89 """ 

90 if repo in self.repos: 

91 return self.repos[repo] 

92 else: 

93 try: 

94 # Get repository details. 

95 gl_repo = self.gl.projects.get(repo, license=True) 

96 

97 # Store repo in cache. 

98 self.repos[repo] = gl_repo 

99 

100 except gl_exceptions.GitlabAuthenticationError: 

101 print("Invalid GitLab Authentication Token provided. Exiting...") 

102 return None 

103 except gl_exceptions.GitlabGetError: 

104 print(f"Invalid GitLab Repository found ({repo}). Exiting...") 

105 return None 

106 except BaseException as e: 

107 print(f"GitLab connection error: {e}. Exiting...") 

108 return None 

109 

110 return gl_repo 

111 

112 def get_repos(self, group: str): 

113 """Return a list of repositories names (as strings) belonging to the group,  

114 either from local cache or from remote if it doesn't exist locally. 

115 

116 Parameters 

117 ---------- 

118 group : str 

119 The group to get the repositories. 

120 """ 

121 try: 

122 gl_group = self.gl.groups.get(id=group) 

123 except gl_exceptions.GitlabAuthenticationError: 

124 print("Invalid GitLab Authentication Token provided. Exiting...") 

125 return None 

126 except gl_exceptions.GitlabGetError: 

127 print(f"Invalid GitLab Group found ({group}). Exiting...") 

128 return None 

129 except BaseException as e: 

130 print(f"GitLab connection error: {e}. Exiting...") 

131 return None 

132 repos = gl_group.projects.list(get_all=True, archived=False, include_subgroups=True) 

133 repos = [ r.path_with_namespace for r in repos ] 

134 return repos 

135 

136 def get_content_root(self, repo: str) -> Optional[List[str]]: 

137 """List files and directories at the root of the repository. 

138  

139 Parameters 

140 ---------- 

141 repo : str 

142 The repository to retrieve content for, e.g. eclipse-dash/dash 

143 """ 

144 gl_repo = self.get_repo(repo=repo) 

145 

146 # List files at the root of the directory. 

147 contents = gl_repo.repository_tree(ref=gl_repo.default_branch, all=True) 

148 

149 # Have all files at the root of the repository in an array. 

150 files_repo = [] 

151 for content_file in contents: 

152 if 'path' in content_file: 

153 files_repo.append(content_file['path']) 

154 

155 return files_repo 

156 

157 def get_content_recursive(self, repo: str, branch: Optional[str] = None) -> Optional[List[dict]]: 

158 """List files and directories of the repository recursively. 

159 

160 Parameters 

161 ---------- 

162 repo : str 

163 The repository to retrieve content for, e.g. eclipse-dash/dash 

164 branch: Optional[str] 

165 A specific branch to check out 

166 """ 

167 gl_repo = self.get_repo(repo=repo) 

168 if not gl_repo: 

169 return None 

170 if branch is None: 

171 branch = gl_repo.default_branch 

172 for i in range(MAX_API_ATTEMPTS): 

173 try: 

174 files_repo = gl_repo.repository_tree(ref=branch, recursive=True, all=True) 

175 return files_repo 

176 except (exceptions.ConnectionError, GitlabGetError) as e: 

177 # Default values 

178 is_retryable = False 

179 error_message = "" 

180 wait_time = 2 ** (i + 1) 

181 

182 if isinstance(e, exceptions.ConnectionError): 

183 # Connection errors are always treated as retryable network issues 

184 error_message = str(e) 

185 is_retryable = True 

186 elif isinstance(e, GitlabGetError): 

187 # GitlabGetErrors are only retryable if they are 5xx server errors 

188 response_code = e.response_code 

189 error_message = e.error_message 

190 if 500 <= response_code < 600: 

191 is_retryable = True 

192 

193 # Max attempts reached or not retryable (e.g. 404) 

194 if i == MAX_API_ATTEMPTS - 1 or not is_retryable: 

195 if not is_retryable: 

196 print_error(f"Got {error_message} attempting to get repository tree " 

197 f"({gl_repo.path_with_namespace}). Ignoring copyright headers for the project...") 

198 return None 

199 print_error(f"Got {error_message} after {MAX_API_ATTEMPTS} attempts to get repository tree " 

200 f"({gl_repo.path_with_namespace}). Ignoring copyright headers for the project...") 

201 return None 

202 print_warning(f"Got {error_message} attempting to get repository tree " 

203 f"({gl_repo.path_with_namespace}). Retrying in {wait_time} seconds " 

204 f"(Attempt {i + 1}/{MAX_API_ATTEMPTS}).") 

205 sleep(wait_time) 

206 return None 

207 

208 def get_readme(self, repo: str): 

209 """Return the path to the README file of the repo, according to GL standards. 

210  

211 Parameters 

212 ---------- 

213 repo : str 

214 The repository to retrieve the readme for, e.g. eclipse-dash/dash 

215 """ 

216 gl_repo = self.get_repo(repo=repo) 

217 

218 return gl_repo.readme_url 

219 

220 def get_licence(self, repo: str): 

221 """Return the path to the LICENSE file of the repo, according to GL standards. 

222  

223 Parameters 

224 ---------- 

225 repo : str 

226 The repository to retrieve the license for, e.g. eclipse-dash/dash 

227 """ 

228 gl_repo = self.get_repo(repo=repo) 

229 

230 return gl_repo.license_url 

231 

232 def get_file(self, repo: str, tmpdir: str, fpath: str, branch: Optional[str] = None, 

233 bytes_to_read: Optional[int] = 2048) -> Optional[Path]: 

234 """Return the content of a file in the repo. 

235 

236 Parameters 

237 ---------- 

238 repo : str 

239 The repository to retrieve the license for, e.g. eclipse-dash/dash 

240 tmpdir : str 

241 The temporary directory where to store the file 

242 fpath : str 

243 The path to the file relative to the root of the repository 

244 branch: Optional[str] 

245 A specific branch to check out 

246 bytes_to_read: Optional[int] 

247 The number of bytes to fetch (ignored in this implementation for compatibility) 

248 """ 

249 gl_repo = self.get_repo(repo=repo) 

250 if not gl_repo: 

251 return None 

252 if branch is None: 

253 branch = gl_repo.default_branch 

254 for i in range(MAX_API_ATTEMPTS): 

255 try: 

256 wpath = Path(f'{os.path.join(tmpdir, os.path.basename(fpath))}') 

257 with open(wpath, 'w+b') as f: 

258 gl_repo.files.raw(file_path=fpath, ref=branch, streamed=True, action=f.write, timeout=30) 

259 return wpath 

260 except (exceptions.ConnectionError, GitlabGetError) as e: 

261 # Default values 

262 is_retryable = False 

263 error_message = "" 

264 wait_time = 2 ** (i + 1) 

265 

266 if isinstance(e, exceptions.ConnectionError): 

267 # Connection errors are always treated as retryable network issues 

268 error_message = str(e) 

269 is_retryable = True 

270 elif isinstance(e, GitlabGetError): 

271 # GitlabGetErrors are only retryable if they are 5xx server errors 

272 response_code = e.response_code 

273 error_message = e.error_message 

274 if 500 <= response_code < 600: 

275 is_retryable = True 

276 

277 # Max attempts reached or not retryable (e.g. 404) 

278 if i == MAX_API_ATTEMPTS - 1 or not is_retryable: 

279 if not is_retryable: 

280 print_error(f"Got {error_message} attempting to obtain file " 

281 f"({gl_repo.path_with_namespace}/{fpath}) from GitLab. Skipping...") 

282 return None 

283 print_error(f"Got {error_message} after {MAX_API_ATTEMPTS} " 

284 f"attempts to obtain file {gl_repo.path_with_namespace}/{fpath}. Skipping...") 

285 return None 

286 print_warning(f"Got {error_message} attempting to obtain file " 

287 f"{gl_repo.path_with_namespace}/{fpath}. Retrying in {wait_time} seconds " 

288 f"(Attempt {i + 1}/{MAX_API_ATTEMPTS}).") 

289 sleep(wait_time) 

290 return None