Coverage for src / codeaudit / reporting.py: 13%

399 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-09 09:33 +0200

1""" 

2License GPLv3 or higher. 

3 

4(C) 2025 Created by Maikel Mardjan - https://nocomplexity.com/ 

5 

6This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. 

7 

8This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 

9 

10You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. 

11 

12 

13Reporting functions for codeaudit 

14""" 

15 

16import re 

17import os 

18from pathlib import Path 

19import sys 

20 

21import pandas as pd 

22import html 

23import datetime 

24 

25from codeaudit.security_checks import perform_validations, ast_security_checks 

26from codeaudit.filehelpfunctions import ( 

27 get_filename_from_path, 

28 collect_python_source_files, 

29 read_in_source_file, 

30 has_python_files, 

31 is_ast_parsable, 

32) 

33from codeaudit.altairplots import multi_bar_chart 

34from codeaudit.totals import ( 

35 get_statistics, 

36 overview_count, 

37 overview_per_file, 

38 total_modules, 

39) 

40from codeaudit.checkmodules import ( 

41 get_imported_modules, 

42 check_module_vulnerability, 

43 get_all_modules, 

44 get_imported_modules_by_file, 

45) 

46from codeaudit.htmlhelpfunctions import json_to_html, dict_list_to_html_table 

47from codeaudit import __version__ 

48from codeaudit.pypi_package_scan import get_pypi_download_info, get_package_source 

49from codeaudit.privacy_lint import data_egress_scan, has_privacy_findings 

50from codeaudit.suppression import filter_sast_results 

51from codeaudit.api_helpers import _collect_issue_lines 

52 

53from importlib.resources import files 

54 

55PYTHON_CODE_AUDIT_TEXT = '<a href="https://github.com/nocomplexity/codeaudit" target="_blank"><b>Python Code Audit</b></a>' 

56DISCLAIMER_TEXT = ( 

57 "<p><b>Disclaimer:</b> <i>This SAST tool " 

58 + PYTHON_CODE_AUDIT_TEXT 

59 + " provides a powerful, automatic security analysis for Python source code. However, it's not a substitute for human review in combination with business knowledge. Undetected vulnerabilities may still exist.</i></p>" 

60) 

61 

62NOSEC_WARNING = "<p><b>INFO</b>: The --nosec flag is active. Security findings with in-line suppressions will be excluded from the report.</p>" 

63 

64SIMPLE_CSS_FILE = files("codeaudit") / "simple.css" 

65 

66DEFAULT_OUTPUT_FILE = "codeaudit-report.html" 

67 

68 

69def overview_report(directory, filename=DEFAULT_OUTPUT_FILE): 

70 """Generates an overview report of code complexity and security indicators. 

71 

72 This function analyzes a Python project to produce a high-level overview of 

73 complexity and security-related metrics. The input may be either: 

74 

75 - A local directory containing Python source files 

76 - The name of a package hosted on PyPI.org 

77 

78 So: 

79 codeaudit overview <package-name|directory> [reportname.html] 

80 

81 For PyPI packages, the source distribution (sdist) is downloaded, 

82 extracted to a temporary directory, scanned, and removed after the report 

83 is generated. 

84 

85 The report includes summary statistics, security risk indicators based on 

86 complexity and total lines of code, a list of discovered modules, per-file 

87 metrics, and a visual overview. Results are written to a static HTML file. 

88 

89 Examples: 

90 Generate an overview report for a local project directory:: 

91 

92 codeaudit overview /projects/mycolleaguesproject 

93 

94 Generate an overview report for a PyPI package:: 

95 

96 codeaudit overview linkaudit #A nice project on PyPI.org 

97 

98 codeaudit overview pydantic #A complex project on PyPI.org from a security perspective? 

99 

100 Args: 

101 directory (str): Path to a local directory containing Python source files 

102 or the name of a package available on PyPI.org. 

103 filename (str, optional): Name (and optional path) of the HTML file to 

104 write the overview report to. The filename should use the ``.html`` 

105 extension. Defaults to ``DEFAULT_OUTPUT_FILE``. 

106 

107 Returns: 

108 None. The function writes a static HTML overview report to disk. 

109 

110 Raises: 

111 SystemExit: If the provided path is not a directory, contains no Python 

112 files, or is neither a valid local directory nor a valid PyPI 

113 package name. 

114 """ 

115 clean_up = False 

116 advice = None 

117 if os.path.exists(directory): 

118 # Check if the path is actually a directory 

119 if not os.path.isdir(directory): 

120 print(f"ERROR: '{directory}' is not a directory.") 

121 print( 

122 "This function only works for directories containing Python files (*.py)." 

123 ) 

124 exit(1) 

125 # Check if the directory contains any .py files 

126 if not has_python_files(directory): 

127 print(f"ERROR: Directory '{directory}' contains no Python files.") 

128 exit(1) 

129 elif get_pypi_download_info(directory): 

130 # If local path doesn't exist, try to treat it as a PyPI package 

131 print( 

132 f"No local directory with name:{directory} found locally. Checking if package exist on PyPI..." 

133 ) 

134 package_name = ( 

135 directory # The variable input_path is now equal to the package name 

136 ) 

137 print(f"Package: {package_name} exist on PyPI.org!") 

138 pypi_data = get_pypi_download_info(package_name) 

139 url = pypi_data["download_url"] 

140 release = pypi_data["release"] 

141 advice = f'<p>&#128073; To perform a SAST scan on the source code, run:<pre><code class="language-python">codeaudit filescan {package_name}</code></pre></p>' 

142 if url is not None: 

143 print(f"Creating Python Code Audit overview for package:\n{url}") 

144 src_dir, tmp_handle = get_package_source(url) 

145 directory = src_dir 

146 clean_up = True 

147 else: 

148 # Neither a local directory nor a valid PyPI package 

149 print(f"ERROR: '{directory}' is not a local directory or a valid PyPI package.") 

150 exit(1) 

151 result = get_statistics(directory) 

152 modules = total_modules(directory) 

153 df = pd.DataFrame(result) 

154 df["Std-Modules"] = modules["Std-Modules"] 

155 df["External-Modules"] = modules["External-Modules"] 

156 overview_df = overview_count(df) 

157 output = "<h1>" + f"Python Code Audit overview report" + "</h1><br>" 

158 if clean_up: 

159 output += f"<p>Codeaudit overview scan of package:<b> {package_name}</b></p>" 

160 output += f"<p>Version:<b>{release}</b></p>" 

161 else: 

162 output += f"<p>Overview for the directory:<b> {directory}</b></p>" 

163 output += f"<h2>Summary</h2>" 

164 output += overview_df.to_html(escape=True, index=False) 

165 output += "<br><br>" 

166 security_based_on_max_complexity = overview_df.loc[0, "Maximum_Complexity"] 

167 if security_based_on_max_complexity > 40: 

168 output += "<p>Based on the maximum found complexity in a source file: Security concern rate is <b>&#10060; HIGH</b>." 

169 else: 

170 output += "<p>Based on the maximum found complexity in a source file: Security concern rate is <b>&#x2705; LOW</b>." 

171 security_based_on_loc = overview_df.loc[0, "Number_Of_Lines"] 

172 if security_based_on_loc > 2000: 

173 output += "<p>Based on the total Lines of Code (LoC) : Security concern rate is <b>&#10060; HIGH</b>." 

174 else: 

175 output += "<p>Based on the total Lines of Code (LoC) : Security concern rate is <b>&#x2705; LOW</b>." 

176 output += "<br>" 

177 ## Module overview 

178 modules_discovered = get_all_modules(directory) 

179 if clean_up: 

180 tmp_handle.cleanup() # Clean up tmp directory if overview is created directly from PyPI package 

181 output += "<details>" 

182 output += "<summary>View all discovered modules.</summary>" 

183 output += display_found_modules(modules_discovered) 

184 output += "</details>" 

185 output += "<h2>Detailed overview per source file</h2>" 

186 output += "<details>" 

187 output += "<summary>View the report details.</summary>" 

188 df_plot = pd.DataFrame(result) # again make the df from the result variable 

189 output += df_plot.to_html(escape=True, index=False) 

190 output += "</details>" 

191 # I now want only a plot for LoC, so drop other columns from Dataframe 

192 df_plot = pd.DataFrame(result) # again make the df from the result variable 

193 df_plot = df_plot.drop(columns=["FilePath"]) 

194 plot = multi_bar_chart(df_plot) 

195 plot_html = plot.to_html() 

196 output += "<br><br>" 

197 output += "<h2>Visual Overview</h2>" 

198 output += extract_altair_html(plot_html) 

199 output += "<p><b>&#128172; Advice:</b></p>" 

200 if advice is not None and advice != "": 

201 output += advice 

202 else: 

203 output += f'<p>&#128073; To perform a SAST scan on the source code, run:<pre><code class="language-python">codeaudit filescan {directory}</code></pre></p>' 

204 create_htmlfile(output, filename) 

205 

206 

207def display_found_modules(modules_discovered): 

208 """Formats discovered Python modules into an HTML string. 

209 

210 Args: 

211 modules_discovered (dict): Dictionary containing discovered modules with 

212 keys 'core_modules' and 'imported_modules', each mapping to an 

213 iterable of module names. 

214 

215 Returns: 

216 str: HTML-formatted string listing standard library modules and 

217 imported external packages. 

218 """ 

219 core_modules = modules_discovered["core_modules"] 

220 external_modules = modules_discovered["imported_modules"] 

221 output = "<p><b>Used Python Standard libraries:</b></p>" 

222 output += ( 

223 "<ul>\n" 

224 + "\n".join(f" <li>{module}</li>" for module in core_modules) 

225 + "\n</ul>" 

226 ) 

227 output += "<p><b>Imported libraries (packages):</b></p>" 

228 output += ( 

229 "<ul>\n" 

230 + "\n".join(f" <li>{module}</li>" for module in external_modules) 

231 + "\n</ul>" 

232 ) 

233 return output 

234 

235 

236def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE, nosec=False): 

237 """Scans Python source code or PyPI packages for security weaknesses. 

238 This function performs static application security testing (SAST) on a 

239 specified input. The input can be one of the following: 

240 

241 * A local directory containing Python source code 

242 * A single local Python file 

243 * The name of a package hosted on PyPI 

244 

245 codeaudit filescan <pythonfile|package-name|directory> [reportname.html] [--nosec] 

246 

247 Based on the input type, the function analyzes the source code for potential 

248 security issues, generates an HTML report summarizing the findings, and 

249 writes the report to disk. 

250 

251 If a PyPI package name is provided, the function downloads the source 

252 distribution (sdist), extracts it to a temporary directory, scans the 

253 extracted source code, and cleans up all temporary files after the scan 

254 completes. 

255 

256 Examples: 

257 

258 Scan a local directory and write the report to ``report.html``:: 

259 

260 codeaudit filescan /path/to/custompythonmodule report.html 

261 

262 Scan a local directory:: 

263 

264 codeaudit filescan /path/to/project 

265 

266 Scan a single Python file:: 

267 

268 codeaudit filescan myexample.py 

269 

270 Scan a package hosted on PyPI:: 

271 

272 codeaudit filescan linkaudit 

273 

274 codeaudit filescan requests 

275 

276 

277 Specify an output report file:: 

278 

279 codeaudit filescan /path/to/project report.html 

280 

281 Enable filtering of issues marked with ``#nosec`` or another marker on potential code weaknesses that mitigated or known :: 

282 

283 codeaudit filescan myexample.py --nosec 

284 

285 POSITIONAL ARGUMENTS 

286 INPUT_PATH 

287 Path to a local Python file or directory, or the name of a package available on PyPI. 

288 

289 

290 FLAGS 

291 -f, --filename=FILENAME 

292 Default: 'codeaudit-report.html' 

293 -n, --nosec=NOSEC 

294 Default: False 

295 

296 

297 Args: 

298 

299 -f, --filename=FILENAME 

300 Default: 'codeaudit-report.html' 

301 Name (and optional path) of the HTML file to write the scan report to. The filename should use the ``.html`` extension. Defaults to ``DEFAULT_OUTPUT_FILE``. 

302 -n, --nosec=NOSEC 

303 Default: False 

304 Whether to filter out issues marked as reviewed or ignored in the source code. Defaults to ``False``, no filtering. 

305 

306 input_path (str): Path to a local Python file or directory, or the name 

307 of a package available on PyPI. 

308 filename (str, optional): Name (and optional path) of the HTML file to 

309 write the scan report to. The filename should use the ``.html`` 

310 extension. Defaults to ``DEFAULT_OUTPUT_FILE``. 

311 nosec (bool, optional): Whether to filter out issues marked as reviewed 

312 or ignored in the source code. Defaults to ``False``, no filtering. 

313 

314 Returns: 

315 None: The function writes a static HTML security report to disk. 

316 

317 Raises: 

318 None: Errors and invalid inputs are reported to stdout. 

319 """ 

320 # Check if the input is a valid directory or a single valid Python file 

321 # In case no local file or directory is found, check if the givin input is pypi package name 

322 file_path = Path(input_path) 

323 if file_path.is_dir(): 

324 directory_scan_report( 

325 input_path, nosec_flag=nosec, filename=filename 

326 ) # create a package aka directory scan report 

327 elif ( 

328 file_path.suffix == ".py" 

329 and file_path.is_file() 

330 and is_ast_parsable(input_path) 

331 ): 

332 # create a sast file check report 

333 if not nosec: # no filtering on reviewed items with markers in code 

334 scan_output = perform_validations( 

335 input_path 

336 ) # scans for weaknesses in the file 

337 else: 

338 unfiltered_scan_output = perform_validations( 

339 input_path 

340 ) # scans for weaknesses in the file 

341 scan_output = filter_sast_results(unfiltered_scan_output) 

342 spy_output = data_egress_scan(input_path) # scans for secrets in the file 

343 file_report_html = single_file_report(input_path, scan_output) 

344 secrets_report_html = secrets_report(spy_output) 

345 name_of_file = get_filename_from_path(input_path) 

346 html_output = "<h1>Python Code Audit Report</h1>" # prepared to be embedded to display multiple reports, so <h2> used 

347 html_output += f"<h2>Security scan: {name_of_file}</h2>" 

348 html_output += "<p>" + f"Location of the file: {input_path} </p>" 

349 if nosec: 

350 html_output += NOSEC_WARNING 

351 html_output += file_report_html 

352 html_output += secrets_report_html 

353 html_output += "<br>" 

354 html_output += DISCLAIMER_TEXT 

355 create_htmlfile(html_output, filename) 

356 elif get_pypi_download_info(input_path): 

357 package_name = ( 

358 input_path # The variable input_path is now equal to the package name 

359 ) 

360 print(f"Package: {package_name} exist on PyPI.org!") 

361 print( 

362 f"Now SAST scanning package from the remote location: https://pypi.org/pypi/{package_name}" 

363 ) 

364 pypi_data = get_pypi_download_info(package_name) 

365 url = pypi_data["download_url"] 

366 release = pypi_data["release"] 

367 if url is not None: 

368 print(url) 

369 print(release) 

370 src_dir, tmp_handle = get_package_source(url) 

371 directory_scan_report( 

372 src_dir, 

373 nosec_flag=nosec, 

374 filename=filename, 

375 package_name=package_name, 

376 release=release, 

377 ) # create a package aka directory scan report 

378 # Cleaning up temp directory 

379 tmp_handle.cleanup() # deletes everything from temp directory 

380 else: 

381 print( 

382 f"Error:A source distribution (sdist in .tar.gz format) for package: {package_name} can not be found or does not exist on PyPi.org.\n" 

383 ) 

384 print( 

385 f"Make a local git clone of the {package_name} using `git clone` and run `codeaudit filescan <directory-with-src-cloned-of-{package_name}>` to check for weaknesses." 

386 ) 

387 else: 

388 # File is NOT a valid Python file, can not be parsed or directory is invalid. 

389 print( 

390 f"Error: '{input_path}' isn't a valid Python file, directory path to a package or a package on PyPI.org." 

391 ) 

392 

393 

394def secrets_report(spy_output): 

395 """ 

396 Generate an HTML report section for detected secrets and external egress risks. 

397 

398 This function analyzes the provided static analysis output to determine 

399 whether logic for connecting to external or remote services is present. 

400 If such logic is detected, it generates an HTML report section describing 

401 the potential external egress risk and includes a detailed, tabular analysis 

402 of where connection-related variables are used. If no such logic is found, 

403 a success message indicating low data exfiltration risk is returned. 

404 

405 Args: 

406 filename (str): Name of the file being analyzed. This parameter is used 

407 for contextual identification and reporting purposes. 

408 spy_output (object): Output from the secrets or static analysis process 

409 containing findings used to detect external service connections. 

410 

411 Returns: 

412 str: An HTML string representing the secrets and external egress risk 

413 report section. 

414 """ 

415 if has_privacy_findings(spy_output): 

416 output = "<br><p>&#9888;&#65039; <b>External Egress Risk</b>: Detected outbound connection logic or API keys that may facilitate data egress.</p>" 

417 output += "<details>" 

418 output += "<summary>View detailed analysis of possible data egress logic or external service usage.</summary>" 

419 pylint_df = pylint_reporting(spy_output) 

420 output += pylint_df.to_html(escape=False, index=False) 

421 output += "</details>" 

422 output += "<br>" 

423 else: 

424 output = f"<br><p>&#x2705; No Logic for connecting to remote services found. Risk of data exfiltration to external systems is <b>low</b>.</p>" 

425 return output 

426 

427 

428def pylint_reporting(result): 

429 """ 

430 Creates a pandas DataFrame of privacy findings with columns: 

431 'line', 'found', and 'code'. 

432 

433 - Escapes HTML for safe rendering 

434 - Converts newlines to <br> 

435 - Wraps code in <pre><code> block 

436 - Optimized for performance (fewer lookups, reusable template) 

437 """ 

438 rows = [] 

439 append_row = rows.append # local reference (faster in loops) 

440 

441 # Predefine template (faster than rebuilding strings each loop) 

442 template = '<pre><code class="language-python">{}</code></pre>' 

443 

444 # Safely get dict 

445 file_checks = result.get("file_privacy_check") or {} 

446 

447 for item in file_checks.values(): 

448 entries = item.get("privacy_check_result", []) 

449 for entry in entries: 

450 code = entry.get("code", "") 

451 lineno = entry.get("lineno") 

452 matched = entry.get("matched") 

453 

454 # Escape HTML and replace newlines (done once per entry) 

455 escaped_code = html.escape(code).replace("\n", "<br>") 

456 

457 # Format HTML block (faster than f-string in tight loops) 

458 code_html = template.format(escaped_code) 

459 

460 append_row( 

461 { 

462 "line": lineno, 

463 "found": matched, 

464 "code": code_html, 

465 } 

466 ) 

467 

468 return pd.DataFrame(rows, columns=["line", "found", "code"]) 

469 

470 

471def single_file_report(filename, scan_output): 

472 """Function to DRY for a codescan when used for single for CLI or within a directory scan""" 

473 data = scan_output["result"] 

474 df = pd.DataFrame( 

475 [(key, lineno) for key, linenos in data.items() for lineno in linenos], 

476 columns=["validation", "line"], 

477 ) 

478 number_of_issues = len(df) 

479 df["severity"] = None 

480 df["info"] = None 

481 for error_str in data: 

482 severity, info_text = get_info_on_test(error_str) 

483 matching_rows = df[df["validation"] == error_str] 

484 if not matching_rows.empty: 

485 # Update all matching rows 

486 df.loc[matching_rows.index, ["severity", "info"]] = [severity, info_text] 

487 df["code"] = None 

488 filename_location = scan_output["file_location"] 

489 for idx, row in df.iterrows(): 

490 line_num = row["line"] 

491 df.at[idx, "code"] = _collect_issue_lines(filename_location, line_num) 

492 

493 df["code"] = df["code"].str.replace( 

494 r"\n", "<br>", regex=True 

495 ) # to convert \n to \\n for display 

496 df["validation"] = df["validation"].apply( 

497 replace_second_dot 

498 ) # Make the validation column smaller - this is the simplest way! without using styling options from Pandas! 

499 df = df[ 

500 ["line", "validation", "severity", "info", "code"] 

501 ] # reorder the columns before converting to html 

502 df = df.sort_values(by="line") # sort by line number 

503 if number_of_issues > 0: 

504 # output = f'<p>&#9888;&#65039; <b>{number_of_issues}</b> potential <b>security issues</b> found!</p>' 

505 output = f'<p>&#9888;&#65039; <b>{number_of_issues}</b> potential <b>security issue{"s" if number_of_issues != 1 else ""}</b> found!</p>' 

506 output += "<details>" 

507 output += "<summary>View identified security weaknesses.</summary>" 

508 output += df.to_html(escape=False, index=False) 

509 output += "</details>" 

510 output += "<br>" 

511 else: 

512 output = "" # No weaknesses found, no message, since privacy breaches may be present. 

513 file_overview = overview_per_file(filename) 

514 df_overview = pd.DataFrame([file_overview]) 

515 output += "<details>" 

516 output += ( 

517 f"<summary>View detailed analysis of security relevant file details.</summary>" 

518 ) 

519 output += df_overview.to_html(escape=True, index=False) 

520 output += "</details>" 

521 output += "<br>" 

522 output += "<details>" 

523 output += "<summary>View used modules in this file.</summary>" 

524 modules_found = get_imported_modules_by_file(filename) 

525 output += display_found_modules(modules_found) 

526 output += f'<p>To check for <b>reported vulnerabilities</b> in external modules used by this file, use the command:<br><div class="code-box">codeaudit modulescan {filename}</div><br></p>' 

527 output += "</details>" 

528 return output 

529 

530 

531def directory_scan_report( 

532 directory_to_scan, 

533 nosec_flag, 

534 filename=DEFAULT_OUTPUT_FILE, 

535 package_name=None, 

536 release=None, 

537): 

538 """Reports potential security issues for all Python files found in a directory. 

539 

540 This function performs security validations on all files found in a specified directory. 

541 The result is written to a HTML report. 

542 

543 You can specify the name and directory for the generated HTML report. 

544 

545 Parameters: 

546 directory_to_scan (str) : The full path to the Python source files to be scanned. Can be present in temp directory. 

547 filename (str, optional): The name of the HTML file to save the report to. 

548 Defaults to `DEFAULT_OUTPUT_FILE`. 

549 

550 Returns: 

551 None - A HTML report is written as output 

552 """ 

553 # Check if the provided path is a valid directory 

554 if not os.path.isdir(directory_to_scan): 

555 print(f"Error: '{directory_to_scan}' is not a valid directory.") 

556 exit(1) 

557 

558 collection_ok_files = [] # create a collection of files with no issues found 

559 output = "<h1>Python Code Audit Report</h1>" 

560 files_to_check = collect_python_source_files(directory_to_scan) 

561 output += "<h2>Directory scan report</h2>" 

562 name_of_package = get_filename_from_path(directory_to_scan) 

563 if package_name is not None: 

564 # Use real package name and retrieved release info 

565 output += f"<p>Below the result of the Codeaudit scan of (Package name - Release):</p>" 

566 output += f"<p><b> {package_name} - {release} </b></p>" 

567 else: 

568 output += f"<p>Below the result of the Codeaudit scan of the directory:<b> {name_of_package}</b></p>" 

569 output += f"<p>Total Python files found: <b>{len(files_to_check)}</b></p>" 

570 if nosec_flag: 

571 output += NOSEC_WARNING 

572 number_of_files = len(files_to_check) 

573 print(f"Number of files that are checked for security issues:{number_of_files}") 

574 printProgressBar( 

575 0, number_of_files, prefix="Progress:", suffix="Complete", length=50 

576 ) 

577 for i, file_to_scan in enumerate(files_to_check): 

578 printProgressBar( 

579 i + 1, number_of_files, prefix="Progress:", suffix="Complete", length=50 

580 ) 

581 if not nosec_flag: # no filtering on reviewed items with markers in code 

582 scan_output = perform_validations( 

583 file_to_scan 

584 ) # scans for weaknesses in the file 

585 else: 

586 unfiltered_scan_output = perform_validations( 

587 file_to_scan 

588 ) # scans for weaknesses in the file 

589 scan_output = filter_sast_results(unfiltered_scan_output) 

590 spy_output = data_egress_scan(file_to_scan) # scans for secrets in the file 

591 data = scan_output["result"] 

592 if data or has_privacy_findings(spy_output): 

593 file_report_html = single_file_report(file_to_scan, scan_output) 

594 name_of_file = get_filename_from_path(file_to_scan) 

595 output += f"<h3>Security scan: {name_of_file}</h3>" 

596 if package_name is None: 

597 output += "<p>" + f"Location of the file: {file_to_scan} </p>" 

598 output += file_report_html 

599 secrets_report_html = secrets_report(spy_output) 

600 output += secrets_report_html 

601 else: 

602 file_name_with_no_issue = get_filename_from_path(file_to_scan) 

603 collection_ok_files.append( 

604 {"filename": file_name_with_no_issue, "directory": file_to_scan} 

605 ) 

606 output += "<h2>Files in directory with no security issues</h2>" 

607 output += f"<p>&#x2705; Total Python files <b>without</b> detected security issues: {len(collection_ok_files)}</p>" 

608 output += "<p>The Python files with no security issues <b>detected</b> by codeaudit are:<p>" 

609 output += dict_list_to_html_table(collection_ok_files) 

610 output += "<br>" 

611 if package_name is not None: 

612 output += f"<p><b>Note:</b><i>Since this check is done on a package on PyPI.org, the temporary local directories are deleted. To examine the package in detail, you should download the sources locally and run the command:<code>codeaudit filescan</code> again.</i></p>" 

613 output += "<p><b>Disclaimer:</b><i>This scan only evaluates Python files. Please note that security vulnerabilities may also exist in other files associated with the Python module.</i></p>" 

614 output += DISCLAIMER_TEXT 

615 create_htmlfile(output, filename) 

616 

617 

618def report_module_information(inputfile, reportname=DEFAULT_OUTPUT_FILE): 

619 """ 

620 Generate a report on known vulnerabilities in Python modules and packages. 

621 

622 This function analyzes a single Python file to identify imported 

623 external modules and checks those modules against the OSV vulnerability 

624 database. The collected results are written to a static HTML report. 

625 

626 If the input refers to a valid PyPI package name instead of a local Python 

627 file, the function generates a vulnerability report directly for that 

628 package. 

629 

630 While processing modules, progress information is printed to standard 

631 output. 

632 

633 Example: 

634 Generate a module vulnerability report for a Python file:: 

635 

636 codeaudit modulescan <pythonfile>|<package> [yourreportname.html] 

637 

638 codeaudit modulescan mypythonfile.py 

639 

640 Args: 

641 inputfile (str): Path to a Python source file (*.py) to analyze, or the 

642 name of a package available on PyPI. 

643 reportname (str, optional): Name (and optional path) of the HTML file to 

644 write the vulnerability report to. The filename should use the 

645 ``.html`` extension. Defaults to ``DEFAULT_OUTPUT_FILE``. 

646 

647 Returns: 

648 None: The function writes a static HTML report to disk. 

649 

650 Raises: 

651 SystemExit: If the input is not a valid Python file or a valid PyPI 

652 package. File parsing and I/O errors are reported via standard 

653 output before exiting. 

654 """ 

655 html_output = "<h1>Python Code Audit Report</h1>" 

656 file_path = Path(inputfile) 

657 if file_path.is_dir(): 

658 print( 

659 "codeaudit modulescan only works on single python files (*.py) or packages present on PyPI.org" 

660 ) 

661 print( 

662 "See codeaudit modulescan -h or check the manual https://codeaudit.nocomplexity.com" 

663 ) 

664 exit(1) 

665 elif ( 

666 file_path.suffix == ".py" and file_path.is_file() and is_ast_parsable(inputfile) 

667 ): 

668 source = read_in_source_file(inputfile) 

669 used_modules = get_imported_modules(source) 

670 # Initial call to print 0% progress 

671 external_modules = used_modules["imported_modules"] 

672 l = len(external_modules) 

673 printProgressBar(0, l, prefix="Progress:", suffix="Complete", length=50) 

674 html_output += f"<h2>Module scan report</h2>" 

675 html_output += f"<p>Security information for file: <b>{inputfile}</b></p>" 

676 html_output += f"<p>Total Dependencies Scanned: {l} </p>" 

677 if external_modules: 

678 html_output += "<details>" 

679 html_output += "<summary>View scanned module dependencies(imported packages).</summary>" 

680 html_output += ( 

681 "<ul>\n" 

682 + "\n".join(f" <li>{module}</li>" for module in external_modules) 

683 + "\n</ul>" 

684 ) 

685 html_output += "</details>" 

686 else: 

687 html_output += "<p>&#x2705; No external modules found!" 

688 # Now vuln info per external module 

689 if external_modules: 

690 html_output += "<h3>Vulnerability information for detected modules</h3>" 

691 for i, module in enumerate(external_modules): # sorted for nicer report 

692 printProgressBar(i + 1, l, prefix="Progress:", suffix="Complete", length=50) 

693 html_output += module_vulnerability_check(module) + "<br>" 

694 html_output += f'<br><p>&#128161; To check for <b>security weaknesses</b> in this package, use the command:<div class="code-box">codeaudit filescan {inputfile}</div><br></p>' 

695 html_output += "<br>" + DISCLAIMER_TEXT 

696 create_htmlfile(html_output, reportname) 

697 elif get_pypi_download_info(inputfile): 

698 package_name = inputfile # The input variable is now equal to the package name 

699 html_output += f"<h2>Package scan report for known vulnerabilities</h2>" 

700 html_output += module_vulnerability_check(package_name) 

701 html_output += f'<br><p>&#128161; To check for <b>security weaknesses</b> in this package, use the command:<div class="code-box">codeaudit filescan {package_name}</div><br></p>' 

702 html_output += "<br>" + DISCLAIMER_TEXT 

703 create_htmlfile(html_output, reportname) 

704 else: 

705 # File is NOT a valid Python file, or package does not exist on PyPI. 

706 print( 

707 f"Error: '{inputfile}' isn't a valid Python file(*.py), or a valid package on PyPI.org." 

708 ) 

709 exit(1) 

710 

711 

712def module_vulnerability_check(module): 

713 """ 

714 Build the HTML fragment for the module vulnerability section of a code audit 

715 module scan report. 

716 

717 The function checks whether vulnerability information is available for the 

718 given Python package/module and returns an HTML snippet accordingly: 

719 - If no vulnerabilities are found, a success message is rendered. 

720 - If vulnerabilities are found, a collapsible HTML <details> section is 

721 generated containing the formatted vulnerability data. 

722 

723 Args: 

724 module (str): Name of the Python package/module to check. 

725 

726 Returns: 

727 str: HTML string representing the vulnerability scan result for the module. 

728 """ 

729 output = "" 

730 vuln_info = check_module_vulnerability(module) 

731 if not vuln_info: 

732 # here SAST scan for package? - not needed (now)- do a filescan on Python package manually - dependency trees can be deep and for complex package are never Python only. 

733 output += f"<p>&#x2705; No known vulnerabilities found for package: <b>{module}</b>.</p>" 

734 else: 

735 output += "<details>" 

736 output += f"<summary>&#9888;&#65039; View vulnerability information for package <b>{module}</b>.</summary>" 

737 output += json_to_html(vuln_info) 

738 output += "</details>" 

739 return output 

740 

741 

742def create_htmlfile(html_input, outputfile): 

743 """Creates a clean html file based on html input given""" 

744 

745 output_path = Path(outputfile).expanduser().resolve() 

746 

747 # Validate output directory (CLI-friendly) 

748 if not output_path.parent.is_dir(): 

749 print( 

750 f"Error: output directory does not exist:\n {output_path.parent}", 

751 file=sys.stderr, 

752 ) 

753 sys.exit(1) 

754 

755 # Read CSS so it is included in the reporting HTML file 

756 css_content = Path(SIMPLE_CSS_FILE).read_text(encoding="utf-8") 

757 

758 # Start building the HTML 

759 output = '<!DOCTYPE html><html lang="en-US"><head>' 

760 output += '<meta charset="UTF-8"/>' 

761 output += "<title>Python_Code_Audit_SecurityReport</title>" 

762 output += f"<style>\n{css_content}\n</style>" 

763 output += '<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>' 

764 output += '<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>' 

765 output += '<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>' 

766 output += "</head><body>" 

767 output += '<div class="container">' 

768 output += html_input 

769 

770 now = datetime.datetime.now() 

771 timestamp_str = now.strftime("%Y-%m-%d %H:%M") 

772 code_audit_version = __version__ 

773 

774 output += ( 

775 f"<p>This Python security report was created on: <b>{timestamp_str}</b> with " 

776 + PYTHON_CODE_AUDIT_TEXT 

777 + f" version <b>{code_audit_version}</b></p>" 

778 ) 

779 

780 output += "<hr>" 

781 output += "<footer>" 

782 output += ( 

783 '<div class="footer-links">' 

784 'Check the <a href="https://nocomplexity.com/documents/codeaudit/intro.html" ' 

785 'target="_blank">documentation</a> for help on found issues.<br>' 

786 'Codeaudit is made with <span class="heart">&#10084;</span> by cyber security ' 

787 'professionals who advocate for <a href="https://nocomplexity.com/simplify-security/" ' 

788 'target="_blank">open simple security solutions</a>.<br>' 

789 '<a href="https://nocomplexity.com/documents/codeaudit/CONTRIBUTE.html" ' 

790 'target="_blank">Join the community</a> and contribute to make this tool better!' 

791 "</div>" 

792 ) 

793 output += "</footer>" 

794 output += "</div>" 

795 output += "</body></html>" 

796 

797 # Write the HTML file 

798 output_path.write_text(output, encoding="utf-8") 

799 

800 print("\n=====================================================================") 

801 print( 

802 "Code Audit report file created!\n" 

803 "Paste the line below directly into your browser bar:\n" 

804 f"\t{output_path.as_uri()}\n" 

805 ) 

806 print("=====================================================================\n") 

807 

808 

809# def create_htmlfile(html_input,outputfile): 

810# """ Creates a clean html file based on html input given """ 

811# # Read CSS from the file - So it is included in the reporting HTML file 

812 

813# with open(SIMPLE_CSS_FILE, 'r') as css_file: 

814# css_content = css_file.read() 

815# # Start building the HTML 

816# output = '<!DOCTYPE html><html lang="en-US"><head>' 

817# output += '<meta charset="UTF-8"/>' 

818# output += '<title>Python_Code_Audit_SecurityReport</title>' 

819# # Inline CSS inside <style> block 

820# output += f'<style>\n{css_content}\n</style>' 

821# output += '<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>' # needed for altair plots 

822# output += '<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>' # needed for altair plots 

823# output += '<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>' # needed for altair plots 

824# output += '</head><body>' 

825# output += '<div class="container">' 

826# output += html_input 

827# now = datetime.datetime.now() 

828# timestamp_str = now.strftime("%Y-%m-%d %H:%M") 

829# code_audit_version = __version__ 

830# output += ( 

831# f"<p>This Python security report was created on: <b>{timestamp_str}</b> with " 

832# + PYTHON_CODE_AUDIT_TEXT 

833# + f" version <b>{code_audit_version}</b></p>" 

834# ) 

835# output += '<hr>' 

836# output += '<footer>' 

837# output += ( 

838# '<div class="footer-links">' 

839# 'Check the <a href="https://nocomplexity.com/documents/codeaudit/intro.html" ' 

840# 'target="_blank">documentation</a> for help on found issues.<br>' 

841# 'Codeaudit is made with <span class="heart">&#10084;</span> by cyber security ' 

842# 'professionals who advocate for <a href="https://nocomplexity.com/simplify-security/" target="_blank">open simple security solutions</a>.<br>' 

843# '<a href="https://nocomplexity.com/documents/codeaudit/CONTRIBUTE.html" target="_blank">Join the community</a> and contribute to make this tool better!' 

844# "</div>" 

845# ) 

846# output += "</footer>" 

847# output += '</div>' #base container 

848# output += '</body></html>' 

849# # Now create the HTML output file 

850# with open(outputfile, 'w') as f: 

851# f.write(output) 

852# current_directory = os.getcwd() 

853# # Get the directory of the output file (if any) 

854# directory_for_output = os.path.dirname(os.path.abspath(outputfile)) 

855# filename_only = os.path.basename(outputfile) 

856# # Determine the effective directory to use in the file URL 

857# if not directory_for_output or directory_for_output == current_directory: 

858# file_url = f'file://{current_directory}/{filename_only}' 

859# else: 

860# file_url = f'file://{directory_for_output}/{filename_only}' 

861# # Print the result 

862# print("\n=====================================================================") 

863# print(f'Code Audit report file created!\nPaste the line below directly into your browser bar:\n\t{file_url}\n') 

864# print("=====================================================================\n") 

865 

866 

867def extract_altair_html(plot_html): 

868 match = re.search(r"<body[^>]*>(.*?)</body>", plot_html, re.DOTALL | re.IGNORECASE) 

869 if match: 

870 body_content = match.group(1).strip() 

871 minimal_html = f"{body_content}\n" 

872 return minimal_html 

873 else: 

874 return "<p>Altair plot was supposed to be here: But something went wrong! Fix needed." # Empty fallback if <body> not found 

875 

876 

877# Replace the second dot with <br> 

878def replace_second_dot(s): 

879 parts = s.split(".") 

880 if len(parts) > 2: 

881 return ".".join(parts[:2]) + "<br>" + ".".join(parts[2:]) 

882 return s 

883 

884 

885def get_info_on_test(error): 

886 """ 

887 Selects row in the checks DataFrame to print help text and severity. 

888 

889 Args: 

890 error (str): A string to search for in the ['construct'] column. 

891 

892 Returns: 

893 tuple: (severity, info_text) 

894 """ 

895 severity = "tbd" 

896 info_text = "tbd" 

897 checks = ast_security_checks() 

898 df = checks 

899 # Try to find exact match in 'construct' 

900 found_rows_exact = df[df["construct"] == error] 

901 if not found_rows_exact.empty: 

902 row = found_rows_exact.iloc[0] # get the first matching row 

903 severity = row["severity"] 

904 info_text = row["info"] 

905 elif "extractall" in error: 

906 # fallback if extractall is mentioned 

907 # see also open issues : When both tarfile and zipfile module are used with aliases detection works, but static AST resolution parsing is not 100% possible. Human data flow analyse is needed since aliases can be used. So shortcut taken here, since aliases and usage should be automatic detected! 

908 fallback_rows = df[df["construct"] == "tarfile.TarFile"] 

909 if not fallback_rows.empty: 

910 row = fallback_rows.iloc[0] 

911 severity = row["severity"] 

912 info_text = row["info"] 

913 else: 

914 print(f"\nERROR: No fallback row found for 'tarfile.extractall'") 

915 exit(1) 

916 else: 

917 print(f"\nERROR: No row found for '{error}'") 

918 print(f"No rows found exactly matching '{error}'.") 

919 exit(1) 

920 

921 return severity, info_text 

922 

923 

924def report_implemented_tests(filename=DEFAULT_OUTPUT_FILE): 

925 """ 

926 Creates an HTML report of all implemented security checks. 

927 

928 This report provides a user-friendly overview of the static security checks 

929 currently supported by Python Code Audit. It is intended to make it easier to review 

930 the available validations without digging through the codebase. 

931 

932 The generated HTML includes: 

933 - A table of all implemented checks 

934 - The number of validations 

935 - The version of Python Code Audit (codeaudit) used 

936 - A disclaimer about version-specific reporting 

937 

938 The report is saved to the specified filename and is formatted to be 

939 embeddable in larger multi-report documents. 

940 

941 Help me continue developing Python Code Audit as free and open-source software. 

942 Join the community to contribute to the most complete, local first , Python Security Static scanner. 

943 Help!! Join the journey, check: https://github.com/nocomplexity/codeaudit#contributing 

944 

945 

946 Parameters: 

947 filename (str): The output HTML filename. Defaults to 'codeaudit_checks.html'. 

948 """ 

949 df_checks = ast_security_checks() 

950 df_checks["construct"] = df_checks["construct"].apply( 

951 replace_second_dot 

952 ) # Make the validation column smaller - this is the simplest way! without using styling options from Pandas! 

953 df_checks_sorted = df_checks.sort_values(by="construct") 

954 output = "<h1>Python Code Audit Implemented validations</h1>" # prepared to be embedded to display multiple reports, so <h2> used 

955 number_of_test = len(df_checks) 

956 

957 output += df_checks_sorted.to_html(escape=False, index=False) 

958 code_audit_version = __version__ 

959 output += "<br>" 

960 output += ( 

961 f"<p>Number of implemented security validations:<b>{number_of_test}</b></p>" 

962 ) 

963 output += f"<p>Version of codeaudit: <b>{code_audit_version}</b>" 

964 output += "<p>Because Python and cybersecurity are constantly changing, issue reports <b>SHOULD</b> specify the codeaudit version used.</p>" 

965 output += DISCLAIMER_TEXT 

966 create_htmlfile(output, filename) 

967 

968 

969def printProgressBar( 

970 iteration, 

971 total, 

972 prefix="", 

973 suffix="", 

974 decimals=1, 

975 length=100, 

976 fill="█", 

977 printEnd="\r", 

978): 

979 """ 

980 Call in a loop to create terminal progress bar 

981 @params: 

982 iteration - Required : current iteration (Int) 

983 total - Required : total iterations (Int) 

984 prefix - Optional : prefix string (Str) 

985 suffix - Optional : suffix string (Str) 

986 decimals - Optional : positive number of decimals in percent complete (Int) 

987 length - Optional : character length of bar (Int) 

988 fill - Optional : bar fill character (Str) 

989 printEnd - Optional : end character (e.g. "\r", "\r\n") (Str) 

990 

991 """ 

992 if total == 0: 

993 percent = "100" 

994 filledLength = 0 

995 bar = "-" * length 

996 else: 

997 percent = ("{0:." + str(decimals) + "f}").format( 

998 100 * (iteration / float(total)) 

999 ) 

1000 filledLength = int(length * iteration // total) 

1001 bar = fill * filledLength + "-" * (length - filledLength) 

1002 

1003 print(f"\r{prefix} |{bar}| {percent}% {suffix}", end=printEnd) 

1004 

1005 if total != 0 and iteration >= total: 

1006 print() # New line on completion