Coverage for src / codeaudit / reporting.py: 13%
399 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-09 09:33 +0200
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-09 09:33 +0200
1"""
2License GPLv3 or higher.
4(C) 2025 Created by Maikel Mardjan - https://nocomplexity.com/
6This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
8This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
10You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
13Reporting functions for codeaudit
14"""
16import re
17import os
18from pathlib import Path
19import sys
21import pandas as pd
22import html
23import datetime
25from codeaudit.security_checks import perform_validations, ast_security_checks
26from codeaudit.filehelpfunctions import (
27 get_filename_from_path,
28 collect_python_source_files,
29 read_in_source_file,
30 has_python_files,
31 is_ast_parsable,
32)
33from codeaudit.altairplots import multi_bar_chart
34from codeaudit.totals import (
35 get_statistics,
36 overview_count,
37 overview_per_file,
38 total_modules,
39)
40from codeaudit.checkmodules import (
41 get_imported_modules,
42 check_module_vulnerability,
43 get_all_modules,
44 get_imported_modules_by_file,
45)
46from codeaudit.htmlhelpfunctions import json_to_html, dict_list_to_html_table
47from codeaudit import __version__
48from codeaudit.pypi_package_scan import get_pypi_download_info, get_package_source
49from codeaudit.privacy_lint import data_egress_scan, has_privacy_findings
50from codeaudit.suppression import filter_sast_results
51from codeaudit.api_helpers import _collect_issue_lines
53from importlib.resources import files
55PYTHON_CODE_AUDIT_TEXT = '<a href="https://github.com/nocomplexity/codeaudit" target="_blank"><b>Python Code Audit</b></a>'
56DISCLAIMER_TEXT = (
57 "<p><b>Disclaimer:</b> <i>This SAST tool "
58 + PYTHON_CODE_AUDIT_TEXT
59 + " provides a powerful, automatic security analysis for Python source code. However, it's not a substitute for human review in combination with business knowledge. Undetected vulnerabilities may still exist.</i></p>"
60)
62NOSEC_WARNING = "<p><b>INFO</b>: The --nosec flag is active. Security findings with in-line suppressions will be excluded from the report.</p>"
64SIMPLE_CSS_FILE = files("codeaudit") / "simple.css"
66DEFAULT_OUTPUT_FILE = "codeaudit-report.html"
69def overview_report(directory, filename=DEFAULT_OUTPUT_FILE):
70 """Generates an overview report of code complexity and security indicators.
72 This function analyzes a Python project to produce a high-level overview of
73 complexity and security-related metrics. The input may be either:
75 - A local directory containing Python source files
76 - The name of a package hosted on PyPI.org
78 So:
79 codeaudit overview <package-name|directory> [reportname.html]
81 For PyPI packages, the source distribution (sdist) is downloaded,
82 extracted to a temporary directory, scanned, and removed after the report
83 is generated.
85 The report includes summary statistics, security risk indicators based on
86 complexity and total lines of code, a list of discovered modules, per-file
87 metrics, and a visual overview. Results are written to a static HTML file.
89 Examples:
90 Generate an overview report for a local project directory::
92 codeaudit overview /projects/mycolleaguesproject
94 Generate an overview report for a PyPI package::
96 codeaudit overview linkaudit #A nice project on PyPI.org
98 codeaudit overview pydantic #A complex project on PyPI.org from a security perspective?
100 Args:
101 directory (str): Path to a local directory containing Python source files
102 or the name of a package available on PyPI.org.
103 filename (str, optional): Name (and optional path) of the HTML file to
104 write the overview report to. The filename should use the ``.html``
105 extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
107 Returns:
108 None. The function writes a static HTML overview report to disk.
110 Raises:
111 SystemExit: If the provided path is not a directory, contains no Python
112 files, or is neither a valid local directory nor a valid PyPI
113 package name.
114 """
115 clean_up = False
116 advice = None
117 if os.path.exists(directory):
118 # Check if the path is actually a directory
119 if not os.path.isdir(directory):
120 print(f"ERROR: '{directory}' is not a directory.")
121 print(
122 "This function only works for directories containing Python files (*.py)."
123 )
124 exit(1)
125 # Check if the directory contains any .py files
126 if not has_python_files(directory):
127 print(f"ERROR: Directory '{directory}' contains no Python files.")
128 exit(1)
129 elif get_pypi_download_info(directory):
130 # If local path doesn't exist, try to treat it as a PyPI package
131 print(
132 f"No local directory with name:{directory} found locally. Checking if package exist on PyPI..."
133 )
134 package_name = (
135 directory # The variable input_path is now equal to the package name
136 )
137 print(f"Package: {package_name} exist on PyPI.org!")
138 pypi_data = get_pypi_download_info(package_name)
139 url = pypi_data["download_url"]
140 release = pypi_data["release"]
141 advice = f'<p>👉 To perform a SAST scan on the source code, run:<pre><code class="language-python">codeaudit filescan {package_name}</code></pre></p>'
142 if url is not None:
143 print(f"Creating Python Code Audit overview for package:\n{url}")
144 src_dir, tmp_handle = get_package_source(url)
145 directory = src_dir
146 clean_up = True
147 else:
148 # Neither a local directory nor a valid PyPI package
149 print(f"ERROR: '{directory}' is not a local directory or a valid PyPI package.")
150 exit(1)
151 result = get_statistics(directory)
152 modules = total_modules(directory)
153 df = pd.DataFrame(result)
154 df["Std-Modules"] = modules["Std-Modules"]
155 df["External-Modules"] = modules["External-Modules"]
156 overview_df = overview_count(df)
157 output = "<h1>" + f"Python Code Audit overview report" + "</h1><br>"
158 if clean_up:
159 output += f"<p>Codeaudit overview scan of package:<b> {package_name}</b></p>"
160 output += f"<p>Version:<b>{release}</b></p>"
161 else:
162 output += f"<p>Overview for the directory:<b> {directory}</b></p>"
163 output += f"<h2>Summary</h2>"
164 output += overview_df.to_html(escape=True, index=False)
165 output += "<br><br>"
166 security_based_on_max_complexity = overview_df.loc[0, "Maximum_Complexity"]
167 if security_based_on_max_complexity > 40:
168 output += "<p>Based on the maximum found complexity in a source file: Security concern rate is <b>❌ HIGH</b>."
169 else:
170 output += "<p>Based on the maximum found complexity in a source file: Security concern rate is <b>✅ LOW</b>."
171 security_based_on_loc = overview_df.loc[0, "Number_Of_Lines"]
172 if security_based_on_loc > 2000:
173 output += "<p>Based on the total Lines of Code (LoC) : Security concern rate is <b>❌ HIGH</b>."
174 else:
175 output += "<p>Based on the total Lines of Code (LoC) : Security concern rate is <b>✅ LOW</b>."
176 output += "<br>"
177 ## Module overview
178 modules_discovered = get_all_modules(directory)
179 if clean_up:
180 tmp_handle.cleanup() # Clean up tmp directory if overview is created directly from PyPI package
181 output += "<details>"
182 output += "<summary>View all discovered modules.</summary>"
183 output += display_found_modules(modules_discovered)
184 output += "</details>"
185 output += "<h2>Detailed overview per source file</h2>"
186 output += "<details>"
187 output += "<summary>View the report details.</summary>"
188 df_plot = pd.DataFrame(result) # again make the df from the result variable
189 output += df_plot.to_html(escape=True, index=False)
190 output += "</details>"
191 # I now want only a plot for LoC, so drop other columns from Dataframe
192 df_plot = pd.DataFrame(result) # again make the df from the result variable
193 df_plot = df_plot.drop(columns=["FilePath"])
194 plot = multi_bar_chart(df_plot)
195 plot_html = plot.to_html()
196 output += "<br><br>"
197 output += "<h2>Visual Overview</h2>"
198 output += extract_altair_html(plot_html)
199 output += "<p><b>💬 Advice:</b></p>"
200 if advice is not None and advice != "":
201 output += advice
202 else:
203 output += f'<p>👉 To perform a SAST scan on the source code, run:<pre><code class="language-python">codeaudit filescan {directory}</code></pre></p>'
204 create_htmlfile(output, filename)
207def display_found_modules(modules_discovered):
208 """Formats discovered Python modules into an HTML string.
210 Args:
211 modules_discovered (dict): Dictionary containing discovered modules with
212 keys 'core_modules' and 'imported_modules', each mapping to an
213 iterable of module names.
215 Returns:
216 str: HTML-formatted string listing standard library modules and
217 imported external packages.
218 """
219 core_modules = modules_discovered["core_modules"]
220 external_modules = modules_discovered["imported_modules"]
221 output = "<p><b>Used Python Standard libraries:</b></p>"
222 output += (
223 "<ul>\n"
224 + "\n".join(f" <li>{module}</li>" for module in core_modules)
225 + "\n</ul>"
226 )
227 output += "<p><b>Imported libraries (packages):</b></p>"
228 output += (
229 "<ul>\n"
230 + "\n".join(f" <li>{module}</li>" for module in external_modules)
231 + "\n</ul>"
232 )
233 return output
236def scan_report(input_path, filename=DEFAULT_OUTPUT_FILE, nosec=False):
237 """Scans Python source code or PyPI packages for security weaknesses.
238 This function performs static application security testing (SAST) on a
239 specified input. The input can be one of the following:
241 * A local directory containing Python source code
242 * A single local Python file
243 * The name of a package hosted on PyPI
245 codeaudit filescan <pythonfile|package-name|directory> [reportname.html] [--nosec]
247 Based on the input type, the function analyzes the source code for potential
248 security issues, generates an HTML report summarizing the findings, and
249 writes the report to disk.
251 If a PyPI package name is provided, the function downloads the source
252 distribution (sdist), extracts it to a temporary directory, scans the
253 extracted source code, and cleans up all temporary files after the scan
254 completes.
256 Examples:
258 Scan a local directory and write the report to ``report.html``::
260 codeaudit filescan /path/to/custompythonmodule report.html
262 Scan a local directory::
264 codeaudit filescan /path/to/project
266 Scan a single Python file::
268 codeaudit filescan myexample.py
270 Scan a package hosted on PyPI::
272 codeaudit filescan linkaudit
274 codeaudit filescan requests
277 Specify an output report file::
279 codeaudit filescan /path/to/project report.html
281 Enable filtering of issues marked with ``#nosec`` or another marker on potential code weaknesses that mitigated or known ::
283 codeaudit filescan myexample.py --nosec
285 POSITIONAL ARGUMENTS
286 INPUT_PATH
287 Path to a local Python file or directory, or the name of a package available on PyPI.
290 FLAGS
291 -f, --filename=FILENAME
292 Default: 'codeaudit-report.html'
293 -n, --nosec=NOSEC
294 Default: False
297 Args:
299 -f, --filename=FILENAME
300 Default: 'codeaudit-report.html'
301 Name (and optional path) of the HTML file to write the scan report to. The filename should use the ``.html`` extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
302 -n, --nosec=NOSEC
303 Default: False
304 Whether to filter out issues marked as reviewed or ignored in the source code. Defaults to ``False``, no filtering.
306 input_path (str): Path to a local Python file or directory, or the name
307 of a package available on PyPI.
308 filename (str, optional): Name (and optional path) of the HTML file to
309 write the scan report to. The filename should use the ``.html``
310 extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
311 nosec (bool, optional): Whether to filter out issues marked as reviewed
312 or ignored in the source code. Defaults to ``False``, no filtering.
314 Returns:
315 None: The function writes a static HTML security report to disk.
317 Raises:
318 None: Errors and invalid inputs are reported to stdout.
319 """
320 # Check if the input is a valid directory or a single valid Python file
321 # In case no local file or directory is found, check if the givin input is pypi package name
322 file_path = Path(input_path)
323 if file_path.is_dir():
324 directory_scan_report(
325 input_path, nosec_flag=nosec, filename=filename
326 ) # create a package aka directory scan report
327 elif (
328 file_path.suffix == ".py"
329 and file_path.is_file()
330 and is_ast_parsable(input_path)
331 ):
332 # create a sast file check report
333 if not nosec: # no filtering on reviewed items with markers in code
334 scan_output = perform_validations(
335 input_path
336 ) # scans for weaknesses in the file
337 else:
338 unfiltered_scan_output = perform_validations(
339 input_path
340 ) # scans for weaknesses in the file
341 scan_output = filter_sast_results(unfiltered_scan_output)
342 spy_output = data_egress_scan(input_path) # scans for secrets in the file
343 file_report_html = single_file_report(input_path, scan_output)
344 secrets_report_html = secrets_report(spy_output)
345 name_of_file = get_filename_from_path(input_path)
346 html_output = "<h1>Python Code Audit Report</h1>" # prepared to be embedded to display multiple reports, so <h2> used
347 html_output += f"<h2>Security scan: {name_of_file}</h2>"
348 html_output += "<p>" + f"Location of the file: {input_path} </p>"
349 if nosec:
350 html_output += NOSEC_WARNING
351 html_output += file_report_html
352 html_output += secrets_report_html
353 html_output += "<br>"
354 html_output += DISCLAIMER_TEXT
355 create_htmlfile(html_output, filename)
356 elif get_pypi_download_info(input_path):
357 package_name = (
358 input_path # The variable input_path is now equal to the package name
359 )
360 print(f"Package: {package_name} exist on PyPI.org!")
361 print(
362 f"Now SAST scanning package from the remote location: https://pypi.org/pypi/{package_name}"
363 )
364 pypi_data = get_pypi_download_info(package_name)
365 url = pypi_data["download_url"]
366 release = pypi_data["release"]
367 if url is not None:
368 print(url)
369 print(release)
370 src_dir, tmp_handle = get_package_source(url)
371 directory_scan_report(
372 src_dir,
373 nosec_flag=nosec,
374 filename=filename,
375 package_name=package_name,
376 release=release,
377 ) # create a package aka directory scan report
378 # Cleaning up temp directory
379 tmp_handle.cleanup() # deletes everything from temp directory
380 else:
381 print(
382 f"Error:A source distribution (sdist in .tar.gz format) for package: {package_name} can not be found or does not exist on PyPi.org.\n"
383 )
384 print(
385 f"Make a local git clone of the {package_name} using `git clone` and run `codeaudit filescan <directory-with-src-cloned-of-{package_name}>` to check for weaknesses."
386 )
387 else:
388 # File is NOT a valid Python file, can not be parsed or directory is invalid.
389 print(
390 f"Error: '{input_path}' isn't a valid Python file, directory path to a package or a package on PyPI.org."
391 )
394def secrets_report(spy_output):
395 """
396 Generate an HTML report section for detected secrets and external egress risks.
398 This function analyzes the provided static analysis output to determine
399 whether logic for connecting to external or remote services is present.
400 If such logic is detected, it generates an HTML report section describing
401 the potential external egress risk and includes a detailed, tabular analysis
402 of where connection-related variables are used. If no such logic is found,
403 a success message indicating low data exfiltration risk is returned.
405 Args:
406 filename (str): Name of the file being analyzed. This parameter is used
407 for contextual identification and reporting purposes.
408 spy_output (object): Output from the secrets or static analysis process
409 containing findings used to detect external service connections.
411 Returns:
412 str: An HTML string representing the secrets and external egress risk
413 report section.
414 """
415 if has_privacy_findings(spy_output):
416 output = "<br><p>⚠️ <b>External Egress Risk</b>: Detected outbound connection logic or API keys that may facilitate data egress.</p>"
417 output += "<details>"
418 output += "<summary>View detailed analysis of possible data egress logic or external service usage.</summary>"
419 pylint_df = pylint_reporting(spy_output)
420 output += pylint_df.to_html(escape=False, index=False)
421 output += "</details>"
422 output += "<br>"
423 else:
424 output = f"<br><p>✅ No Logic for connecting to remote services found. Risk of data exfiltration to external systems is <b>low</b>.</p>"
425 return output
428def pylint_reporting(result):
429 """
430 Creates a pandas DataFrame of privacy findings with columns:
431 'line', 'found', and 'code'.
433 - Escapes HTML for safe rendering
434 - Converts newlines to <br>
435 - Wraps code in <pre><code> block
436 - Optimized for performance (fewer lookups, reusable template)
437 """
438 rows = []
439 append_row = rows.append # local reference (faster in loops)
441 # Predefine template (faster than rebuilding strings each loop)
442 template = '<pre><code class="language-python">{}</code></pre>'
444 # Safely get dict
445 file_checks = result.get("file_privacy_check") or {}
447 for item in file_checks.values():
448 entries = item.get("privacy_check_result", [])
449 for entry in entries:
450 code = entry.get("code", "")
451 lineno = entry.get("lineno")
452 matched = entry.get("matched")
454 # Escape HTML and replace newlines (done once per entry)
455 escaped_code = html.escape(code).replace("\n", "<br>")
457 # Format HTML block (faster than f-string in tight loops)
458 code_html = template.format(escaped_code)
460 append_row(
461 {
462 "line": lineno,
463 "found": matched,
464 "code": code_html,
465 }
466 )
468 return pd.DataFrame(rows, columns=["line", "found", "code"])
471def single_file_report(filename, scan_output):
472 """Function to DRY for a codescan when used for single for CLI or within a directory scan"""
473 data = scan_output["result"]
474 df = pd.DataFrame(
475 [(key, lineno) for key, linenos in data.items() for lineno in linenos],
476 columns=["validation", "line"],
477 )
478 number_of_issues = len(df)
479 df["severity"] = None
480 df["info"] = None
481 for error_str in data:
482 severity, info_text = get_info_on_test(error_str)
483 matching_rows = df[df["validation"] == error_str]
484 if not matching_rows.empty:
485 # Update all matching rows
486 df.loc[matching_rows.index, ["severity", "info"]] = [severity, info_text]
487 df["code"] = None
488 filename_location = scan_output["file_location"]
489 for idx, row in df.iterrows():
490 line_num = row["line"]
491 df.at[idx, "code"] = _collect_issue_lines(filename_location, line_num)
493 df["code"] = df["code"].str.replace(
494 r"\n", "<br>", regex=True
495 ) # to convert \n to \\n for display
496 df["validation"] = df["validation"].apply(
497 replace_second_dot
498 ) # Make the validation column smaller - this is the simplest way! without using styling options from Pandas!
499 df = df[
500 ["line", "validation", "severity", "info", "code"]
501 ] # reorder the columns before converting to html
502 df = df.sort_values(by="line") # sort by line number
503 if number_of_issues > 0:
504 # output = f'<p>⚠️ <b>{number_of_issues}</b> potential <b>security issues</b> found!</p>'
505 output = f'<p>⚠️ <b>{number_of_issues}</b> potential <b>security issue{"s" if number_of_issues != 1 else ""}</b> found!</p>'
506 output += "<details>"
507 output += "<summary>View identified security weaknesses.</summary>"
508 output += df.to_html(escape=False, index=False)
509 output += "</details>"
510 output += "<br>"
511 else:
512 output = "" # No weaknesses found, no message, since privacy breaches may be present.
513 file_overview = overview_per_file(filename)
514 df_overview = pd.DataFrame([file_overview])
515 output += "<details>"
516 output += (
517 f"<summary>View detailed analysis of security relevant file details.</summary>"
518 )
519 output += df_overview.to_html(escape=True, index=False)
520 output += "</details>"
521 output += "<br>"
522 output += "<details>"
523 output += "<summary>View used modules in this file.</summary>"
524 modules_found = get_imported_modules_by_file(filename)
525 output += display_found_modules(modules_found)
526 output += f'<p>To check for <b>reported vulnerabilities</b> in external modules used by this file, use the command:<br><div class="code-box">codeaudit modulescan {filename}</div><br></p>'
527 output += "</details>"
528 return output
531def directory_scan_report(
532 directory_to_scan,
533 nosec_flag,
534 filename=DEFAULT_OUTPUT_FILE,
535 package_name=None,
536 release=None,
537):
538 """Reports potential security issues for all Python files found in a directory.
540 This function performs security validations on all files found in a specified directory.
541 The result is written to a HTML report.
543 You can specify the name and directory for the generated HTML report.
545 Parameters:
546 directory_to_scan (str) : The full path to the Python source files to be scanned. Can be present in temp directory.
547 filename (str, optional): The name of the HTML file to save the report to.
548 Defaults to `DEFAULT_OUTPUT_FILE`.
550 Returns:
551 None - A HTML report is written as output
552 """
553 # Check if the provided path is a valid directory
554 if not os.path.isdir(directory_to_scan):
555 print(f"Error: '{directory_to_scan}' is not a valid directory.")
556 exit(1)
558 collection_ok_files = [] # create a collection of files with no issues found
559 output = "<h1>Python Code Audit Report</h1>"
560 files_to_check = collect_python_source_files(directory_to_scan)
561 output += "<h2>Directory scan report</h2>"
562 name_of_package = get_filename_from_path(directory_to_scan)
563 if package_name is not None:
564 # Use real package name and retrieved release info
565 output += f"<p>Below the result of the Codeaudit scan of (Package name - Release):</p>"
566 output += f"<p><b> {package_name} - {release} </b></p>"
567 else:
568 output += f"<p>Below the result of the Codeaudit scan of the directory:<b> {name_of_package}</b></p>"
569 output += f"<p>Total Python files found: <b>{len(files_to_check)}</b></p>"
570 if nosec_flag:
571 output += NOSEC_WARNING
572 number_of_files = len(files_to_check)
573 print(f"Number of files that are checked for security issues:{number_of_files}")
574 printProgressBar(
575 0, number_of_files, prefix="Progress:", suffix="Complete", length=50
576 )
577 for i, file_to_scan in enumerate(files_to_check):
578 printProgressBar(
579 i + 1, number_of_files, prefix="Progress:", suffix="Complete", length=50
580 )
581 if not nosec_flag: # no filtering on reviewed items with markers in code
582 scan_output = perform_validations(
583 file_to_scan
584 ) # scans for weaknesses in the file
585 else:
586 unfiltered_scan_output = perform_validations(
587 file_to_scan
588 ) # scans for weaknesses in the file
589 scan_output = filter_sast_results(unfiltered_scan_output)
590 spy_output = data_egress_scan(file_to_scan) # scans for secrets in the file
591 data = scan_output["result"]
592 if data or has_privacy_findings(spy_output):
593 file_report_html = single_file_report(file_to_scan, scan_output)
594 name_of_file = get_filename_from_path(file_to_scan)
595 output += f"<h3>Security scan: {name_of_file}</h3>"
596 if package_name is None:
597 output += "<p>" + f"Location of the file: {file_to_scan} </p>"
598 output += file_report_html
599 secrets_report_html = secrets_report(spy_output)
600 output += secrets_report_html
601 else:
602 file_name_with_no_issue = get_filename_from_path(file_to_scan)
603 collection_ok_files.append(
604 {"filename": file_name_with_no_issue, "directory": file_to_scan}
605 )
606 output += "<h2>Files in directory with no security issues</h2>"
607 output += f"<p>✅ Total Python files <b>without</b> detected security issues: {len(collection_ok_files)}</p>"
608 output += "<p>The Python files with no security issues <b>detected</b> by codeaudit are:<p>"
609 output += dict_list_to_html_table(collection_ok_files)
610 output += "<br>"
611 if package_name is not None:
612 output += f"<p><b>Note:</b><i>Since this check is done on a package on PyPI.org, the temporary local directories are deleted. To examine the package in detail, you should download the sources locally and run the command:<code>codeaudit filescan</code> again.</i></p>"
613 output += "<p><b>Disclaimer:</b><i>This scan only evaluates Python files. Please note that security vulnerabilities may also exist in other files associated with the Python module.</i></p>"
614 output += DISCLAIMER_TEXT
615 create_htmlfile(output, filename)
618def report_module_information(inputfile, reportname=DEFAULT_OUTPUT_FILE):
619 """
620 Generate a report on known vulnerabilities in Python modules and packages.
622 This function analyzes a single Python file to identify imported
623 external modules and checks those modules against the OSV vulnerability
624 database. The collected results are written to a static HTML report.
626 If the input refers to a valid PyPI package name instead of a local Python
627 file, the function generates a vulnerability report directly for that
628 package.
630 While processing modules, progress information is printed to standard
631 output.
633 Example:
634 Generate a module vulnerability report for a Python file::
636 codeaudit modulescan <pythonfile>|<package> [yourreportname.html]
638 codeaudit modulescan mypythonfile.py
640 Args:
641 inputfile (str): Path to a Python source file (*.py) to analyze, or the
642 name of a package available on PyPI.
643 reportname (str, optional): Name (and optional path) of the HTML file to
644 write the vulnerability report to. The filename should use the
645 ``.html`` extension. Defaults to ``DEFAULT_OUTPUT_FILE``.
647 Returns:
648 None: The function writes a static HTML report to disk.
650 Raises:
651 SystemExit: If the input is not a valid Python file or a valid PyPI
652 package. File parsing and I/O errors are reported via standard
653 output before exiting.
654 """
655 html_output = "<h1>Python Code Audit Report</h1>"
656 file_path = Path(inputfile)
657 if file_path.is_dir():
658 print(
659 "codeaudit modulescan only works on single python files (*.py) or packages present on PyPI.org"
660 )
661 print(
662 "See codeaudit modulescan -h or check the manual https://codeaudit.nocomplexity.com"
663 )
664 exit(1)
665 elif (
666 file_path.suffix == ".py" and file_path.is_file() and is_ast_parsable(inputfile)
667 ):
668 source = read_in_source_file(inputfile)
669 used_modules = get_imported_modules(source)
670 # Initial call to print 0% progress
671 external_modules = used_modules["imported_modules"]
672 l = len(external_modules)
673 printProgressBar(0, l, prefix="Progress:", suffix="Complete", length=50)
674 html_output += f"<h2>Module scan report</h2>"
675 html_output += f"<p>Security information for file: <b>{inputfile}</b></p>"
676 html_output += f"<p>Total Dependencies Scanned: {l} </p>"
677 if external_modules:
678 html_output += "<details>"
679 html_output += "<summary>View scanned module dependencies(imported packages).</summary>"
680 html_output += (
681 "<ul>\n"
682 + "\n".join(f" <li>{module}</li>" for module in external_modules)
683 + "\n</ul>"
684 )
685 html_output += "</details>"
686 else:
687 html_output += "<p>✅ No external modules found!"
688 # Now vuln info per external module
689 if external_modules:
690 html_output += "<h3>Vulnerability information for detected modules</h3>"
691 for i, module in enumerate(external_modules): # sorted for nicer report
692 printProgressBar(i + 1, l, prefix="Progress:", suffix="Complete", length=50)
693 html_output += module_vulnerability_check(module) + "<br>"
694 html_output += f'<br><p>💡 To check for <b>security weaknesses</b> in this package, use the command:<div class="code-box">codeaudit filescan {inputfile}</div><br></p>'
695 html_output += "<br>" + DISCLAIMER_TEXT
696 create_htmlfile(html_output, reportname)
697 elif get_pypi_download_info(inputfile):
698 package_name = inputfile # The input variable is now equal to the package name
699 html_output += f"<h2>Package scan report for known vulnerabilities</h2>"
700 html_output += module_vulnerability_check(package_name)
701 html_output += f'<br><p>💡 To check for <b>security weaknesses</b> in this package, use the command:<div class="code-box">codeaudit filescan {package_name}</div><br></p>'
702 html_output += "<br>" + DISCLAIMER_TEXT
703 create_htmlfile(html_output, reportname)
704 else:
705 # File is NOT a valid Python file, or package does not exist on PyPI.
706 print(
707 f"Error: '{inputfile}' isn't a valid Python file(*.py), or a valid package on PyPI.org."
708 )
709 exit(1)
712def module_vulnerability_check(module):
713 """
714 Build the HTML fragment for the module vulnerability section of a code audit
715 module scan report.
717 The function checks whether vulnerability information is available for the
718 given Python package/module and returns an HTML snippet accordingly:
719 - If no vulnerabilities are found, a success message is rendered.
720 - If vulnerabilities are found, a collapsible HTML <details> section is
721 generated containing the formatted vulnerability data.
723 Args:
724 module (str): Name of the Python package/module to check.
726 Returns:
727 str: HTML string representing the vulnerability scan result for the module.
728 """
729 output = ""
730 vuln_info = check_module_vulnerability(module)
731 if not vuln_info:
732 # here SAST scan for package? - not needed (now)- do a filescan on Python package manually - dependency trees can be deep and for complex package are never Python only.
733 output += f"<p>✅ No known vulnerabilities found for package: <b>{module}</b>.</p>"
734 else:
735 output += "<details>"
736 output += f"<summary>⚠️ View vulnerability information for package <b>{module}</b>.</summary>"
737 output += json_to_html(vuln_info)
738 output += "</details>"
739 return output
742def create_htmlfile(html_input, outputfile):
743 """Creates a clean html file based on html input given"""
745 output_path = Path(outputfile).expanduser().resolve()
747 # Validate output directory (CLI-friendly)
748 if not output_path.parent.is_dir():
749 print(
750 f"Error: output directory does not exist:\n {output_path.parent}",
751 file=sys.stderr,
752 )
753 sys.exit(1)
755 # Read CSS so it is included in the reporting HTML file
756 css_content = Path(SIMPLE_CSS_FILE).read_text(encoding="utf-8")
758 # Start building the HTML
759 output = '<!DOCTYPE html><html lang="en-US"><head>'
760 output += '<meta charset="UTF-8"/>'
761 output += "<title>Python_Code_Audit_SecurityReport</title>"
762 output += f"<style>\n{css_content}\n</style>"
763 output += '<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>'
764 output += '<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>'
765 output += '<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>'
766 output += "</head><body>"
767 output += '<div class="container">'
768 output += html_input
770 now = datetime.datetime.now()
771 timestamp_str = now.strftime("%Y-%m-%d %H:%M")
772 code_audit_version = __version__
774 output += (
775 f"<p>This Python security report was created on: <b>{timestamp_str}</b> with "
776 + PYTHON_CODE_AUDIT_TEXT
777 + f" version <b>{code_audit_version}</b></p>"
778 )
780 output += "<hr>"
781 output += "<footer>"
782 output += (
783 '<div class="footer-links">'
784 'Check the <a href="https://nocomplexity.com/documents/codeaudit/intro.html" '
785 'target="_blank">documentation</a> for help on found issues.<br>'
786 'Codeaudit is made with <span class="heart">❤</span> by cyber security '
787 'professionals who advocate for <a href="https://nocomplexity.com/simplify-security/" '
788 'target="_blank">open simple security solutions</a>.<br>'
789 '<a href="https://nocomplexity.com/documents/codeaudit/CONTRIBUTE.html" '
790 'target="_blank">Join the community</a> and contribute to make this tool better!'
791 "</div>"
792 )
793 output += "</footer>"
794 output += "</div>"
795 output += "</body></html>"
797 # Write the HTML file
798 output_path.write_text(output, encoding="utf-8")
800 print("\n=====================================================================")
801 print(
802 "Code Audit report file created!\n"
803 "Paste the line below directly into your browser bar:\n"
804 f"\t{output_path.as_uri()}\n"
805 )
806 print("=====================================================================\n")
809# def create_htmlfile(html_input,outputfile):
810# """ Creates a clean html file based on html input given """
811# # Read CSS from the file - So it is included in the reporting HTML file
813# with open(SIMPLE_CSS_FILE, 'r') as css_file:
814# css_content = css_file.read()
815# # Start building the HTML
816# output = '<!DOCTYPE html><html lang="en-US"><head>'
817# output += '<meta charset="UTF-8"/>'
818# output += '<title>Python_Code_Audit_SecurityReport</title>'
819# # Inline CSS inside <style> block
820# output += f'<style>\n{css_content}\n</style>'
821# output += '<script src="https://cdn.jsdelivr.net/npm/vega@5"></script>' # needed for altair plots
822# output += '<script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>' # needed for altair plots
823# output += '<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>' # needed for altair plots
824# output += '</head><body>'
825# output += '<div class="container">'
826# output += html_input
827# now = datetime.datetime.now()
828# timestamp_str = now.strftime("%Y-%m-%d %H:%M")
829# code_audit_version = __version__
830# output += (
831# f"<p>This Python security report was created on: <b>{timestamp_str}</b> with "
832# + PYTHON_CODE_AUDIT_TEXT
833# + f" version <b>{code_audit_version}</b></p>"
834# )
835# output += '<hr>'
836# output += '<footer>'
837# output += (
838# '<div class="footer-links">'
839# 'Check the <a href="https://nocomplexity.com/documents/codeaudit/intro.html" '
840# 'target="_blank">documentation</a> for help on found issues.<br>'
841# 'Codeaudit is made with <span class="heart">❤</span> by cyber security '
842# 'professionals who advocate for <a href="https://nocomplexity.com/simplify-security/" target="_blank">open simple security solutions</a>.<br>'
843# '<a href="https://nocomplexity.com/documents/codeaudit/CONTRIBUTE.html" target="_blank">Join the community</a> and contribute to make this tool better!'
844# "</div>"
845# )
846# output += "</footer>"
847# output += '</div>' #base container
848# output += '</body></html>'
849# # Now create the HTML output file
850# with open(outputfile, 'w') as f:
851# f.write(output)
852# current_directory = os.getcwd()
853# # Get the directory of the output file (if any)
854# directory_for_output = os.path.dirname(os.path.abspath(outputfile))
855# filename_only = os.path.basename(outputfile)
856# # Determine the effective directory to use in the file URL
857# if not directory_for_output or directory_for_output == current_directory:
858# file_url = f'file://{current_directory}/{filename_only}'
859# else:
860# file_url = f'file://{directory_for_output}/{filename_only}'
861# # Print the result
862# print("\n=====================================================================")
863# print(f'Code Audit report file created!\nPaste the line below directly into your browser bar:\n\t{file_url}\n')
864# print("=====================================================================\n")
867def extract_altair_html(plot_html):
868 match = re.search(r"<body[^>]*>(.*?)</body>", plot_html, re.DOTALL | re.IGNORECASE)
869 if match:
870 body_content = match.group(1).strip()
871 minimal_html = f"{body_content}\n"
872 return minimal_html
873 else:
874 return "<p>Altair plot was supposed to be here: But something went wrong! Fix needed." # Empty fallback if <body> not found
877# Replace the second dot with <br>
878def replace_second_dot(s):
879 parts = s.split(".")
880 if len(parts) > 2:
881 return ".".join(parts[:2]) + "<br>" + ".".join(parts[2:])
882 return s
885def get_info_on_test(error):
886 """
887 Selects row in the checks DataFrame to print help text and severity.
889 Args:
890 error (str): A string to search for in the ['construct'] column.
892 Returns:
893 tuple: (severity, info_text)
894 """
895 severity = "tbd"
896 info_text = "tbd"
897 checks = ast_security_checks()
898 df = checks
899 # Try to find exact match in 'construct'
900 found_rows_exact = df[df["construct"] == error]
901 if not found_rows_exact.empty:
902 row = found_rows_exact.iloc[0] # get the first matching row
903 severity = row["severity"]
904 info_text = row["info"]
905 elif "extractall" in error:
906 # fallback if extractall is mentioned
907 # see also open issues : When both tarfile and zipfile module are used with aliases detection works, but static AST resolution parsing is not 100% possible. Human data flow analyse is needed since aliases can be used. So shortcut taken here, since aliases and usage should be automatic detected!
908 fallback_rows = df[df["construct"] == "tarfile.TarFile"]
909 if not fallback_rows.empty:
910 row = fallback_rows.iloc[0]
911 severity = row["severity"]
912 info_text = row["info"]
913 else:
914 print(f"\nERROR: No fallback row found for 'tarfile.extractall'")
915 exit(1)
916 else:
917 print(f"\nERROR: No row found for '{error}'")
918 print(f"No rows found exactly matching '{error}'.")
919 exit(1)
921 return severity, info_text
924def report_implemented_tests(filename=DEFAULT_OUTPUT_FILE):
925 """
926 Creates an HTML report of all implemented security checks.
928 This report provides a user-friendly overview of the static security checks
929 currently supported by Python Code Audit. It is intended to make it easier to review
930 the available validations without digging through the codebase.
932 The generated HTML includes:
933 - A table of all implemented checks
934 - The number of validations
935 - The version of Python Code Audit (codeaudit) used
936 - A disclaimer about version-specific reporting
938 The report is saved to the specified filename and is formatted to be
939 embeddable in larger multi-report documents.
941 Help me continue developing Python Code Audit as free and open-source software.
942 Join the community to contribute to the most complete, local first , Python Security Static scanner.
943 Help!! Join the journey, check: https://github.com/nocomplexity/codeaudit#contributing
946 Parameters:
947 filename (str): The output HTML filename. Defaults to 'codeaudit_checks.html'.
948 """
949 df_checks = ast_security_checks()
950 df_checks["construct"] = df_checks["construct"].apply(
951 replace_second_dot
952 ) # Make the validation column smaller - this is the simplest way! without using styling options from Pandas!
953 df_checks_sorted = df_checks.sort_values(by="construct")
954 output = "<h1>Python Code Audit Implemented validations</h1>" # prepared to be embedded to display multiple reports, so <h2> used
955 number_of_test = len(df_checks)
957 output += df_checks_sorted.to_html(escape=False, index=False)
958 code_audit_version = __version__
959 output += "<br>"
960 output += (
961 f"<p>Number of implemented security validations:<b>{number_of_test}</b></p>"
962 )
963 output += f"<p>Version of codeaudit: <b>{code_audit_version}</b>"
964 output += "<p>Because Python and cybersecurity are constantly changing, issue reports <b>SHOULD</b> specify the codeaudit version used.</p>"
965 output += DISCLAIMER_TEXT
966 create_htmlfile(output, filename)
969def printProgressBar(
970 iteration,
971 total,
972 prefix="",
973 suffix="",
974 decimals=1,
975 length=100,
976 fill="█",
977 printEnd="\r",
978):
979 """
980 Call in a loop to create terminal progress bar
981 @params:
982 iteration - Required : current iteration (Int)
983 total - Required : total iterations (Int)
984 prefix - Optional : prefix string (Str)
985 suffix - Optional : suffix string (Str)
986 decimals - Optional : positive number of decimals in percent complete (Int)
987 length - Optional : character length of bar (Int)
988 fill - Optional : bar fill character (Str)
989 printEnd - Optional : end character (e.g. "\r", "\r\n") (Str)
991 """
992 if total == 0:
993 percent = "100"
994 filledLength = 0
995 bar = "-" * length
996 else:
997 percent = ("{0:." + str(decimals) + "f}").format(
998 100 * (iteration / float(total))
999 )
1000 filledLength = int(length * iteration // total)
1001 bar = fill * filledLength + "-" * (length - filledLength)
1003 print(f"\r{prefix} |{bar}| {percent}% {suffix}", end=printEnd)
1005 if total != 0 and iteration >= total:
1006 print() # New line on completion