Coverage for src / eclipse / care / legal / recommended_files.py: 90%

87 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-24 09:38 +0100

1# Copyright (c) 2025 The Eclipse Foundation 

2# 

3# This program and the accompanying materials are made available under the 

4# terms of the Eclipse Public License 2.0 which is available at 

5# http://www.eclipse.org/legal/epl-2.0. 

6# 

7# SPDX-License-Identifier: EPL-2.0 

8# 

9# Contributors: 

10# Boris Baldassari - Initial implementation 

11 

12""" 

13A module to find and identify a set of recommended files, like README, CONTRIBUTING, etc. 

14 

15It can query GitHub or GitLab projects, and tries to follow the recommendations of the  

16Eclipse Project Handbook [1] and the former analyser [2].  

17 

18[1] https://www.eclipse.org/projects/handbook/ \ 

19[2] https://www.eclipse.org/projects/tools/documentation.php 

20 

21It looks for a set of files for each category of best practices, namely: 

22 

23* Readme - `readme` or `readme.md` 

24* Conduct - `code_of_conduct.md` 

25* Contributing - `contributing` or `contributing.md` 

26* License - `license` 

27 

28It only looks for files at the root of the repository. Search is case-insensitive. 

29 

30""" 

31 

32import re 

33from os.path import basename 

34from typing import Optional 

35 

36from care.utils.eclipse import Eclipse 

37from care.utils.github import GitHubEmo 

38from care.utils.gitlab import GitLabEmo 

39 

40 

41files_readme = { 

42 "readme", 

43 "readme.md", 

44 # "readme.txt" 

45} 

46""" The list of file names considered as readme's. """ 

47 

48files_conduct = { 

49 "code_of_conduct.md", 

50 # "codeofconduct" 

51} 

52""" The list of file names considered as codes of conduct. """ 

53 

54files_contributing = { 

55 "contributing", 

56 "contributing.md", 

57 # "copying", 

58} 

59""" The list of file names considered as contributing guides. """ 

60 

61files_license = { 

62 "license", 

63 # "license.txt", 

64} 

65""" The list of file names considered as license texts. """ 

66 

67files_notice = {} 

68""" The list of file names considered as notices. """ 

69 

70files_security = {} 

71""" The list of file names considered for security information. """ 

72 

73 

74def find_file_in_list(files_repo: list, files_lookup: str, 

75 verbose: Optional[bool] = False): 

76 """ 

77 Go through a list of regular expressions from `files_lookup`, matching strings from `files_repo`, 

78 and stops and returns the first corresponding occurrence. 

79 

80 

81 Parameters 

82 ---------- 

83 files_repo : str 

84 An array of files to search through, and match to the regular expressions. 

85 files_lookup : str 

86 An array of regular expressions (either string or raw string) to match against the list of files. 

87 verbose : Optional[bool] 

88 Should we print more information on stdout? 

89 """ 

90 for file_lookup in files_lookup: 

91 p = re.compile(file_lookup, re.IGNORECASE) 

92 for file_repo in files_repo: 

93 # if verbose: 

94 # print(f" - Matching {file_lookup} against {file_repo}.") 

95 if p.match(file_repo): 

96 if verbose: 

97 print(f" Matched {file_lookup} in {file_repo}.") 

98 return file_repo 

99 

100 return None 

101 

102 

103def gh_analyse(organisation: str, credentials: Optional[dict] = None, verbose: Optional[bool] = False): 

104 """ 

105 Analyse one or more GH repositories, looking for all interesting files in its content. 

106 

107 Parameters 

108 ---------- 

109 organisation: str 

110 GitHub organisation to analyse. 

111 credentials: Optional[dict] 

112 A dict contining GitHub and GitLab token to use to connect to GitHub. 

113 verbose : Optional[bool] 

114 Should we print more information on stdout? 

115 """ 

116 results = {} 

117 

118 ghe = GitHubEmo(credentials=credentials, verbose=verbose) 

119 

120 # Get list of repositories from the organisation. 

121 repos = ghe.get_repos(organisation=organisation) 

122 

123 if repos is None: 

124 return None 

125 

126 if verbose: 

127 print(f" Found {len(repos)} GH repos to check.") 

128 

129 for repo in repos: 

130 

131 if verbose: 

132 print(f"- Analysing GH repo {repo}.") 

133 

134 # List files at the root of the directory. 

135 files_repo = ghe.get_content_root(repo=repo) 

136 

137 # If anything went wrong with the GH request.. 

138 if files_repo is None: 

139 return None 

140 

141 results[repo] = {} 

142 

143 # Look for readme files. 

144 results[repo]['readme'] = find_file_in_list(files_repo=files_repo, 

145 files_lookup=files_readme, 

146 verbose=verbose) 

147 

148 # Look for conduct files. 

149 results[repo]['conduct'] = find_file_in_list(files_repo=files_repo, 

150 files_lookup=files_conduct, 

151 verbose=verbose) 

152 

153 # Look for contributing files. 

154 results[repo]['contributing'] = find_file_in_list(files_repo=files_repo, 

155 files_lookup=files_contributing, 

156 verbose=verbose) 

157 

158 # Look for license files. 

159 results[repo]['license'] = find_file_in_list(files_repo=files_repo, 

160 files_lookup=files_license, 

161 verbose=verbose) 

162 

163 return results 

164 

165 

166def gl_analyse(organisation: str, credentials: Optional[dict] = None, verbose: Optional[bool] = False): 

167 """ 

168 Analyse all repositories from a single organisation, looking for all interesting files. 

169 

170 This function in turns calls everything needed to execute the full analysis, from GL 

171 authentication and initialisation to the actual query. 

172 

173 Parameters 

174 ---------- 

175 organisation : str 

176 An organisation to analyse.SSS 

177 credentials: Optional[dict] 

178 A dict containing GitHub and GitLab token to use to connect to GitHub. 

179 verbose : Optional[bool] 

180 Should we print more information on stdout? 

181 """ 

182 results = {} 

183 

184 gle = GitLabEmo(credentials=credentials, verbose=verbose) 

185 

186 repos = gle.get_repos(group=organisation) 

187 

188 if repos is None: 

189 return None 

190 

191 if verbose: 

192 print(f" Found {len(repos)} GL repos to check.") 

193 

194 for repo in repos: 

195 

196 if verbose: 

197 print(f"- Analysing GL repo {repo}.") 

198 

199 # List files at the root of the directory. 

200 files_repo = gle.get_content_root(repo=repo) 

201 

202 # If anything went wrong with the GL request.. 

203 if files_repo is None: 

204 return None 

205 

206 results[repo] = {} 

207 

208 # Look for readme files. 

209 file_readme = gle.get_readme(repo=repo) 

210 results[repo]['readme'] = basename(file_readme) if file_readme else None 

211 

212 # Look for conduct files. 

213 results[repo]['conduct'] = find_file_in_list(files_repo=files_repo, 

214 files_lookup=files_conduct, 

215 verbose=verbose) 

216 

217 # Look for contributing files. 

218 results[repo]['contributing'] = find_file_in_list(files_repo=files_repo, 

219 files_lookup=files_contributing, 

220 verbose=verbose) 

221 

222 # Look for license files. 

223 file_licence = gle.get_licence(repo=repo) 

224 results[repo]['license'] = basename(file_licence) if file_licence else None 

225 

226 return results 

227 

228 

229def analyse_project(project_id: str, credentials: Optional[dict] = None, verbose: bool = False): 

230 """ 

231 Generic entrypoint to analyse a project. This function will identify the project 

232 repositories and their type (github/gitlab) and execute the corresponding functions. 

233 

234 Parameters 

235 ---------- 

236 project_id : str 

237 Project ID of the Eclipse project to analyse, e.g. `technology.dash`. 

238 credentials: Optional[dict] 

239 A dict contining GitHub and GitLab token to use to connect to GitHub. 

240 verbose : Optional[bool] 

241 Should we print more information on stdout? 

242 """ 

243 

244 eclipse = Eclipse() 

245 project_api = eclipse.get_project_api(project_id) 

246 

247 if project_api is None: 

248 return None 

249 

250 repos = [] 

251 results = {} 

252 

253 if 'github' in project_api and 'org' in project_api['github'] and len(project_api['github']['org']) > 0: 

254 gh_org = project_api['github']['org'] 

255 if verbose: 

256 print(f" Looking for projects in GH org {gh_org}.") 

257 results.update(gh_analyse(organisation=gh_org, credentials=credentials, verbose=verbose)) 

258 

259 if 'gitlab' in project_api and 'project_group' in project_api['gitlab'] and len(project_api['gitlab']['project_group']) > 0: 

260 gl_org = project_api['gitlab']['project_group'] 

261 if verbose: 

262 print(f" Looking for projects in GL group {gl_org}.") 

263 results.update(gl_analyse(organisation=gl_org, credentials=credentials, verbose=verbose)) 

264 

265 return results