Coverage for src / eclipse / care / utils / github.py: 78%

97 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-24 09:38 +0100

1# Copyright (c) 2025 The Eclipse Foundation 

2# 

3# This program and the accompanying materials are made available under the 

4# terms of the Eclipse Public License 2.0 which is available at 

5# http://www.eclipse.org/legal/epl-2.0. 

6# 

7# SPDX-License-Identifier: EPL-2.0 

8# 

9# Contributors: 

10# asgomes - Additional methods 

11# Boris Baldassari - Initial implementation 

12 

13import os 

14from pathlib import Path 

15from typing import Optional, Any, List 

16 

17from github import Github, Auth, UnknownObjectException, GithubException 

18from requests import get 

19 

20from care import print_error 

21from care.utils.git_provider import GitProviderEMO 

22 

23# Max connection attempts to GitHub API 

24MAX_API_ATTEMPTS = 3 

25 

26 

27class GitHubEmo(GitProviderEMO): 

28 """A Class to retrieve all GitHub-related information, in the context 

29 of the EMO needs. 

30 """ 

31 

32 gh = None 

33 """ The github object. """ 

34 

35 repos = {} 

36 """ Caching repos when working on objects. """ 

37 

38 def __init__(self, credentials: Optional[dict] = None, verbose: bool = False): 

39 """Initialises a class to interact with GitHub-based data sources. 

40 

41 Parameters 

42 ---------- 

43 credentials : str 

44 An optional token to use for GitHub connection. 

45 verbose : bool 

46 Boolean to display more information on stdout (optional). 

47 """ 

48 super().__init__() 

49 

50 gh_token = credentials['GH_TOKEN'] if credentials else None 

51 

52 # If we got credentials, use them. Otherwise, just proceed without 

53 # authentication, hoping we won't be throttled. 

54 if gh_token: 

55 # using an access token 

56 if verbose: 

57 print(f" Connecting to GitHub with auth.") 

58 auth = Auth.Token(gh_token) 

59 gh = Github(auth=auth) 

60 else: 

61 if verbose: 

62 print(" Connecting to GitHub without authentication.") 

63 gh = Github() 

64 

65 self.gh = gh 

66 

67 def get_gh(self): 

68 """Retrieve the GitHub handle for this connection. 

69 """ 

70 return self.gh 

71 

72 def get_repo(self, repo: str) -> Any: 

73 """Return the repository object, either from local cache if it has been 

74 already requested, or from the remote otherwise. 

75 

76 Parameters 

77 ---------- 

78 repo : str 

79 The repository to get a handle on. 

80 """ 

81 if repo in self.repos: 

82 return self.repos[repo] 

83 else: 

84 try: 

85 # Get repository details. 

86 gh_repo = self.gh.get_repo(repo) 

87 

88 # Store repo in cache. 

89 self.repos[repo] = gh_repo 

90 except UnknownObjectException: 

91 print(f"Github: cannot find {repo}.") 

92 return None 

93 except Exception as e: 

94 print(f"Unknown exception when querying Github: {e}") 

95 return None 

96 

97 return gh_repo 

98 

99 def get_repos(self, organisation: str): 

100 """List all repositories from a given organisation. 

101 

102 Parameters 

103 ---------- 

104 organisation : str 

105 The organisation name to query (e.g. eclipse-dash). 

106 """ 

107 try: 

108 gh_org = self.gh.get_organization(org=organisation) 

109 except UnknownObjectException as uoe: 

110 print(f"Github: cannot find {organisation}.") 

111 return None 

112 repos = gh_org.get_repos() 

113 repos = [r.url.removeprefix('https://api.github.com/repos/') for r in repos] 

114 

115 return repos 

116 

117 def get_content_root(self, repo: str) -> Optional[List[str]]: 

118 """List files and directories at the root of the repository. 

119 

120 Parameters 

121 ---------- 

122 repo : str 

123 The repository to retrieve content for, e.g. eclipse-dash/dash 

124 """ 

125 gh_repo = self.get_repo(repo=repo) 

126 try: 

127 # List files at the root of the directory. 

128 contents = gh_repo.get_contents("") 

129 except Exception as e: 

130 print(f"Unknown exception when querying Github: {e}") 

131 return None 

132 

133 # Have all files at the root of the repository in an array. 

134 files_repo = [] 

135 for content_file in contents: 

136 files_repo.append(content_file.path) 

137 

138 return files_repo 

139 

140 def get_content_recursive(self, repo: str, branch: Optional[str] = None) -> Optional[List[dict]]: 

141 """List files and directories of the repository recursively. 

142 

143 Parameters 

144 ---------- 

145 repo : str 

146 The repository to retrieve content for, e.g. eclipse-dash/dash 

147 branch: Optional[str] 

148 A specific branch to check out 

149 """ 

150 gh_repo = self.get_repo(repo=repo) 

151 if not gh_repo: 

152 return None 

153 if branch is None: 

154 branch = gh_repo.default_branch 

155 # Get a list of files in the project repository 

156 files_repo = [] 

157 try: 

158 tree = gh_repo.get_git_tree(sha=branch, recursive=True) 

159 for element in tree.tree: 

160 # 'blob' corresponds to a file; 'tree' corresponds to a directory 

161 if element.type == "blob": 

162 file_data = element.raw_data 

163 file_data['name'] = os.path.basename(file_data['path']) 

164 files_repo.append(file_data) 

165 return files_repo 

166 except GithubException as ghe: 

167 print_error(f"Got {ghe.message} trying to get repository contents for {gh_repo.full_name}.") 

168 return None 

169 

170 def get_file(self, repo: str, tmpdir: str, fpath: str, branch: Optional[str] = None, 

171 bytes_to_read: Optional[int] = 2048) -> Optional[Path]: 

172 """Return the content of a file in the repo. 

173 

174 Parameters 

175 ---------- 

176 repo : str 

177 The repository to retrieve the license for, e.g. eclipse-dash/dash 

178 tmpdir : str 

179 The temporary directory where to store the file 

180 fpath : str 

181 The path to the file relative to the root of the repository 

182 branch: Optional[str] 

183 A specific branch to check out 

184 bytes_to_read: Optional[int] 

185 The number of bytes to fetch from the start of the file (defaults to 2048) 

186 """ 

187 gh_repo = self.get_repo(repo=repo) 

188 if not gh_repo: 

189 return None 

190 if branch is None: 

191 branch = gh_repo.default_branch 

192 

193 try: 

194 wpath = Path(f'{os.path.join(tmpdir, os.path.basename(fpath))}') 

195 # Using the raw CDN to allow partial byte-range requests 

196 url = f"https://raw.githubusercontent.com/{gh_repo.full_name}/{branch}/{fpath}" 

197 

198 # Request only the first few KB 

199 response = get( 

200 url, 

201 headers={"Range": f"bytes=0-{bytes_to_read}"}, 

202 timeout=10 

203 ) 

204 

205 # 200 (Full) or 206 (Partial) are both valid successes 

206 if response.status_code in [200, 206]: 

207 with open(wpath, 'w+b') as f: 

208 f.write(response.content) 

209 return wpath 

210 else: 

211 print_error(f"Got status {response.status_code} trying to get file {gh_repo.full_name}/{fpath}.") 

212 return None 

213 

214 except Exception as e: 

215 print_error(f"Got {str(e)} trying to get file {gh_repo.full_name}/{fpath}.") 

216 return None