Coverage for src / eclipse / care / utils / github.py: 78%
97 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-24 09:38 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-24 09:38 +0100
1# Copyright (c) 2025 The Eclipse Foundation
2#
3# This program and the accompanying materials are made available under the
4# terms of the Eclipse Public License 2.0 which is available at
5# http://www.eclipse.org/legal/epl-2.0.
6#
7# SPDX-License-Identifier: EPL-2.0
8#
9# Contributors:
10# asgomes - Additional methods
11# Boris Baldassari - Initial implementation
13import os
14from pathlib import Path
15from typing import Optional, Any, List
17from github import Github, Auth, UnknownObjectException, GithubException
18from requests import get
20from care import print_error
21from care.utils.git_provider import GitProviderEMO
23# Max connection attempts to GitHub API
24MAX_API_ATTEMPTS = 3
27class GitHubEmo(GitProviderEMO):
28 """A Class to retrieve all GitHub-related information, in the context
29 of the EMO needs.
30 """
32 gh = None
33 """ The github object. """
35 repos = {}
36 """ Caching repos when working on objects. """
38 def __init__(self, credentials: Optional[dict] = None, verbose: bool = False):
39 """Initialises a class to interact with GitHub-based data sources.
41 Parameters
42 ----------
43 credentials : str
44 An optional token to use for GitHub connection.
45 verbose : bool
46 Boolean to display more information on stdout (optional).
47 """
48 super().__init__()
50 gh_token = credentials['GH_TOKEN'] if credentials else None
52 # If we got credentials, use them. Otherwise, just proceed without
53 # authentication, hoping we won't be throttled.
54 if gh_token:
55 # using an access token
56 if verbose:
57 print(f" Connecting to GitHub with auth.")
58 auth = Auth.Token(gh_token)
59 gh = Github(auth=auth)
60 else:
61 if verbose:
62 print(" Connecting to GitHub without authentication.")
63 gh = Github()
65 self.gh = gh
67 def get_gh(self):
68 """Retrieve the GitHub handle for this connection.
69 """
70 return self.gh
72 def get_repo(self, repo: str) -> Any:
73 """Return the repository object, either from local cache if it has been
74 already requested, or from the remote otherwise.
76 Parameters
77 ----------
78 repo : str
79 The repository to get a handle on.
80 """
81 if repo in self.repos:
82 return self.repos[repo]
83 else:
84 try:
85 # Get repository details.
86 gh_repo = self.gh.get_repo(repo)
88 # Store repo in cache.
89 self.repos[repo] = gh_repo
90 except UnknownObjectException:
91 print(f"Github: cannot find {repo}.")
92 return None
93 except Exception as e:
94 print(f"Unknown exception when querying Github: {e}")
95 return None
97 return gh_repo
99 def get_repos(self, organisation: str):
100 """List all repositories from a given organisation.
102 Parameters
103 ----------
104 organisation : str
105 The organisation name to query (e.g. eclipse-dash).
106 """
107 try:
108 gh_org = self.gh.get_organization(org=organisation)
109 except UnknownObjectException as uoe:
110 print(f"Github: cannot find {organisation}.")
111 return None
112 repos = gh_org.get_repos()
113 repos = [r.url.removeprefix('https://api.github.com/repos/') for r in repos]
115 return repos
117 def get_content_root(self, repo: str) -> Optional[List[str]]:
118 """List files and directories at the root of the repository.
120 Parameters
121 ----------
122 repo : str
123 The repository to retrieve content for, e.g. eclipse-dash/dash
124 """
125 gh_repo = self.get_repo(repo=repo)
126 try:
127 # List files at the root of the directory.
128 contents = gh_repo.get_contents("")
129 except Exception as e:
130 print(f"Unknown exception when querying Github: {e}")
131 return None
133 # Have all files at the root of the repository in an array.
134 files_repo = []
135 for content_file in contents:
136 files_repo.append(content_file.path)
138 return files_repo
140 def get_content_recursive(self, repo: str, branch: Optional[str] = None) -> Optional[List[dict]]:
141 """List files and directories of the repository recursively.
143 Parameters
144 ----------
145 repo : str
146 The repository to retrieve content for, e.g. eclipse-dash/dash
147 branch: Optional[str]
148 A specific branch to check out
149 """
150 gh_repo = self.get_repo(repo=repo)
151 if not gh_repo:
152 return None
153 if branch is None:
154 branch = gh_repo.default_branch
155 # Get a list of files in the project repository
156 files_repo = []
157 try:
158 tree = gh_repo.get_git_tree(sha=branch, recursive=True)
159 for element in tree.tree:
160 # 'blob' corresponds to a file; 'tree' corresponds to a directory
161 if element.type == "blob":
162 file_data = element.raw_data
163 file_data['name'] = os.path.basename(file_data['path'])
164 files_repo.append(file_data)
165 return files_repo
166 except GithubException as ghe:
167 print_error(f"Got {ghe.message} trying to get repository contents for {gh_repo.full_name}.")
168 return None
170 def get_file(self, repo: str, tmpdir: str, fpath: str, branch: Optional[str] = None,
171 bytes_to_read: Optional[int] = 2048) -> Optional[Path]:
172 """Return the content of a file in the repo.
174 Parameters
175 ----------
176 repo : str
177 The repository to retrieve the license for, e.g. eclipse-dash/dash
178 tmpdir : str
179 The temporary directory where to store the file
180 fpath : str
181 The path to the file relative to the root of the repository
182 branch: Optional[str]
183 A specific branch to check out
184 bytes_to_read: Optional[int]
185 The number of bytes to fetch from the start of the file (defaults to 2048)
186 """
187 gh_repo = self.get_repo(repo=repo)
188 if not gh_repo:
189 return None
190 if branch is None:
191 branch = gh_repo.default_branch
193 try:
194 wpath = Path(f'{os.path.join(tmpdir, os.path.basename(fpath))}')
195 # Using the raw CDN to allow partial byte-range requests
196 url = f"https://raw.githubusercontent.com/{gh_repo.full_name}/{branch}/{fpath}"
198 # Request only the first few KB
199 response = get(
200 url,
201 headers={"Range": f"bytes=0-{bytes_to_read}"},
202 timeout=10
203 )
205 # 200 (Full) or 206 (Partial) are both valid successes
206 if response.status_code in [200, 206]:
207 with open(wpath, 'w+b') as f:
208 f.write(response.content)
209 return wpath
210 else:
211 print_error(f"Got status {response.status_code} trying to get file {gh_repo.full_name}/{fpath}.")
212 return None
214 except Exception as e:
215 print_error(f"Got {str(e)} trying to get file {gh_repo.full_name}/{fpath}.")
216 return None