Coverage for src / codeaudit / suppression.py: 68%
100 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-09 09:33 +0200
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-09 09:33 +0200
1"""
2License GPLv3 or higher.
4(C) 2025 Created by Maikel Mardjan - https://nocomplexity.com/
6This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
8This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
10You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
12Suppression logic - see documentation
14"""
16import ast
17import re
18import sys
19import tokenize
20from collections import defaultdict
23def get_all_comments_by_line(filename):
24 """
25 Tokenize the file once and collect all real # comments
26 grouped by their starting line number.
27 """
28 comments_by_line = defaultdict(list)
30 try:
31 with tokenize.open(filename) as f:
32 for token in tokenize.generate_tokens(f.readline):
33 if token.type == tokenize.COMMENT:
34 text = token.string.lstrip("# \t").rstrip()
35 if text:
36 comments_by_line[token.start[0]].append(text)
38 except (OSError, UnicodeDecodeError, tokenize.TokenError) as exc:
39 # Fail loudly with context instead of silently ignoring
40 raise RuntimeError(f"Failed to extract comments from {filename}") from exc
42 return {line: "\n".join(texts) for line, texts in comments_by_line.items()}
45def get_start_to_end_lines(filename):
46 """
47 Parse the file once using AST and build a mapping:
48 start_line → highest end_lineno found for any node starting on that line.
50 Returns:
51 dict[int, int] — line numbers are 1-based
52 Returns empty dict if the file cannot be read or parsed.
53 """
54 end_lines = {}
56 try:
57 with open(filename, "r", encoding="utf-8") as f:
58 source = f.read()
60 try:
61 tree = ast.parse(source, filename=filename)
63 for node in ast.walk(tree):
64 # Most nodes have lineno, but some (like comprehension ifs) might not
65 if not hasattr(node, "lineno"):
66 continue
68 start = node.lineno
69 # end_lineno may be missing in very old Python versions → fallback to start
70 end = getattr(node, "end_lineno", start)
72 # Keep the maximum span for nodes starting on the same line
73 if start not in end_lines or end > end_lines[start]:
74 end_lines[start] = end
76 except SyntaxError as e:
77 print(
78 f"Syntax error in {filename} (line {e.lineno}): {e.msg}",
79 file=sys.stderr,
80 )
81 return {}
82 except (ValueError, UnicodeDecodeError) as e:
83 print(
84 f"Cannot read {filename} properly: {type(e).__name__}: {e}",
85 file=sys.stderr,
86 )
87 return {}
88 except MemoryError:
89 print(f"Out of memory while parsing {filename}", file=sys.stderr)
90 return {}
91 except Exception as e:
92 print(
93 f"Unexpected error parsing AST of {filename}: "
94 f"{type(e).__name__}: {e}",
95 file=sys.stderr,
96 )
97 return {}
99 except FileNotFoundError:
100 print(f"File not found: {filename}", file=sys.stderr)
101 return {}
102 except PermissionError:
103 print(f"Permission denied: {filename}", file=sys.stderr)
104 return {}
105 except IsADirectoryError:
106 print(f"Is a directory, not a file: {filename}", file=sys.stderr)
107 return {}
108 except OSError as e:
109 print(f"OS error opening {filename}: {e}", file=sys.stderr)
110 return {}
111 except Exception as e:
112 print(
113 f"Critical error while accessing {filename}: " f"{type(e).__name__}: {e}",
114 file=sys.stderr,
115 )
116 return {}
118 return end_lines
121# def get_start_to_end_lines(filename):
122# """
123# Parse AST once and build mapping: start_line → highest end_line found for nodes
124# starting on that line.
125# """
126# end_lines = {}
128# try:
129# with open(filename, 'r', encoding='utf-8') as f:
130# source = f.read()
131# tree = ast.parse(source)
133# for node in ast.walk(tree):
134# if not hasattr(node, 'lineno'):
135# continue
136# start = node.lineno
137# end = getattr(node, 'end_lineno', start)
138# # Take the maximum end line if multiple nodes start on same line
139# if start not in end_lines or end > end_lines[start]:
140# end_lines[start] = end
141# except Exception:
142# pass
144# return end_lines
147def is_suppressed(line, comments_by_line, start_to_end, match_func):
148 """
149 Check if the statement starting at `line` is suppressed by looking at comments
150 from start_line to end_line inclusive.
151 """
152 end = start_to_end.get(line, line)
153 for comment_line in range(line, end + 1):
154 comment = comments_by_line.get(comment_line, "")
155 if match_func(comment):
156 return True
157 return False
160def filter_sast_results(sast_dict):
161 """
162 Returns a new filtered dictionary with suppressed findings removed.
163 Parses & tokenizes the file only once.
164 Respects multi-line statements via AST end_lineno.
165 Empty lists and their keys are removed from the result.
166 """
167 file_location = sast_dict["file_location"]
168 original_result = sast_dict.get("result", {})
170 if not original_result:
171 return sast_dict.copy()
173 # Collect all unique line numbers that have findings
174 all_issue_lines = set()
175 for lines in original_result.values():
176 if isinstance(lines, list):
177 all_issue_lines.update(lines)
179 if not all_issue_lines:
180 return sast_dict.copy()
182 # Parse and tokenize **once**
183 comments_by_line = get_all_comments_by_line(file_location)
184 start_to_end = get_start_to_end_lines(file_location)
186 # Decide which lines to KEEP
187 keep_lines = set()
188 for line in sorted(all_issue_lines):
189 if not is_suppressed(
190 line, comments_by_line, start_to_end, match_suppression_keyword
191 ):
192 keep_lines.add(line)
194 # Build new result dictionary
195 new_result = {}
196 for key, value in original_result.items():
197 if isinstance(value, list):
198 filtered = [ln for ln in value if ln in keep_lines]
199 if filtered:
200 new_result[key] = filtered
201 else:
202 new_result[key] = value
204 # Return new full dictionary
205 filtered_dict = sast_dict.copy()
206 filtered_dict["result"] = new_result
207 return filtered_dict
210def match_suppression_keyword(comment_line):
211 """
212 Checks if a SAST suppression marker is present in the comment.
213 """
215 MARKER_LIST = [
216 "nosec",
217 "nosemgrep",
218 "sast-ignore",
219 "ignore-sast",
220 "security-ignore",
221 "ignore-security",
222 "NOSONAR",
223 "noqa",
224 # False positive / risk handling
225 "false-positive",
226 "falsepositive",
227 "risk-accepted",
228 "security-accepted",
229 "security-reviewed",
230 "security-exception",
231 ]
233 if not comment_line:
234 return False
236 normalized = " ".join(word.lstrip("#").lower() for word in comment_line.split())
237 tokens = re.split(r"[^\w\-]+", normalized)
238 return any(marker.lower() in tokens for marker in MARKER_LIST)