Coverage for src / codeaudit / suppression.py: 68%

100 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-09 09:33 +0200

1""" 

2License GPLv3 or higher. 

3 

4(C) 2025 Created by Maikel Mardjan - https://nocomplexity.com/ 

5 

6This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. 

7 

8This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 

9 

10You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. 

11 

12Suppression logic - see documentation 

13 

14""" 

15 

16import ast 

17import re 

18import sys 

19import tokenize 

20from collections import defaultdict 

21 

22 

23def get_all_comments_by_line(filename): 

24 """ 

25 Tokenize the file once and collect all real # comments 

26 grouped by their starting line number. 

27 """ 

28 comments_by_line = defaultdict(list) 

29 

30 try: 

31 with tokenize.open(filename) as f: 

32 for token in tokenize.generate_tokens(f.readline): 

33 if token.type == tokenize.COMMENT: 

34 text = token.string.lstrip("# \t").rstrip() 

35 if text: 

36 comments_by_line[token.start[0]].append(text) 

37 

38 except (OSError, UnicodeDecodeError, tokenize.TokenError) as exc: 

39 # Fail loudly with context instead of silently ignoring 

40 raise RuntimeError(f"Failed to extract comments from {filename}") from exc 

41 

42 return {line: "\n".join(texts) for line, texts in comments_by_line.items()} 

43 

44 

45def get_start_to_end_lines(filename): 

46 """ 

47 Parse the file once using AST and build a mapping: 

48 start_line → highest end_lineno found for any node starting on that line. 

49 

50 Returns: 

51 dict[int, int] — line numbers are 1-based 

52 Returns empty dict if the file cannot be read or parsed. 

53 """ 

54 end_lines = {} 

55 

56 try: 

57 with open(filename, "r", encoding="utf-8") as f: 

58 source = f.read() 

59 

60 try: 

61 tree = ast.parse(source, filename=filename) 

62 

63 for node in ast.walk(tree): 

64 # Most nodes have lineno, but some (like comprehension ifs) might not 

65 if not hasattr(node, "lineno"): 

66 continue 

67 

68 start = node.lineno 

69 # end_lineno may be missing in very old Python versions → fallback to start 

70 end = getattr(node, "end_lineno", start) 

71 

72 # Keep the maximum span for nodes starting on the same line 

73 if start not in end_lines or end > end_lines[start]: 

74 end_lines[start] = end 

75 

76 except SyntaxError as e: 

77 print( 

78 f"Syntax error in {filename} (line {e.lineno}): {e.msg}", 

79 file=sys.stderr, 

80 ) 

81 return {} 

82 except (ValueError, UnicodeDecodeError) as e: 

83 print( 

84 f"Cannot read {filename} properly: {type(e).__name__}: {e}", 

85 file=sys.stderr, 

86 ) 

87 return {} 

88 except MemoryError: 

89 print(f"Out of memory while parsing {filename}", file=sys.stderr) 

90 return {} 

91 except Exception as e: 

92 print( 

93 f"Unexpected error parsing AST of {filename}: " 

94 f"{type(e).__name__}: {e}", 

95 file=sys.stderr, 

96 ) 

97 return {} 

98 

99 except FileNotFoundError: 

100 print(f"File not found: {filename}", file=sys.stderr) 

101 return {} 

102 except PermissionError: 

103 print(f"Permission denied: {filename}", file=sys.stderr) 

104 return {} 

105 except IsADirectoryError: 

106 print(f"Is a directory, not a file: {filename}", file=sys.stderr) 

107 return {} 

108 except OSError as e: 

109 print(f"OS error opening {filename}: {e}", file=sys.stderr) 

110 return {} 

111 except Exception as e: 

112 print( 

113 f"Critical error while accessing {filename}: " f"{type(e).__name__}: {e}", 

114 file=sys.stderr, 

115 ) 

116 return {} 

117 

118 return end_lines 

119 

120 

121# def get_start_to_end_lines(filename): 

122# """ 

123# Parse AST once and build mapping: start_line → highest end_line found for nodes 

124# starting on that line. 

125# """ 

126# end_lines = {} 

127 

128# try: 

129# with open(filename, 'r', encoding='utf-8') as f: 

130# source = f.read() 

131# tree = ast.parse(source) 

132 

133# for node in ast.walk(tree): 

134# if not hasattr(node, 'lineno'): 

135# continue 

136# start = node.lineno 

137# end = getattr(node, 'end_lineno', start) 

138# # Take the maximum end line if multiple nodes start on same line 

139# if start not in end_lines or end > end_lines[start]: 

140# end_lines[start] = end 

141# except Exception: 

142# pass 

143 

144# return end_lines 

145 

146 

147def is_suppressed(line, comments_by_line, start_to_end, match_func): 

148 """ 

149 Check if the statement starting at `line` is suppressed by looking at comments 

150 from start_line to end_line inclusive. 

151 """ 

152 end = start_to_end.get(line, line) 

153 for comment_line in range(line, end + 1): 

154 comment = comments_by_line.get(comment_line, "") 

155 if match_func(comment): 

156 return True 

157 return False 

158 

159 

160def filter_sast_results(sast_dict): 

161 """ 

162 Returns a new filtered dictionary with suppressed findings removed. 

163 Parses & tokenizes the file only once. 

164 Respects multi-line statements via AST end_lineno. 

165 Empty lists and their keys are removed from the result. 

166 """ 

167 file_location = sast_dict["file_location"] 

168 original_result = sast_dict.get("result", {}) 

169 

170 if not original_result: 

171 return sast_dict.copy() 

172 

173 # Collect all unique line numbers that have findings 

174 all_issue_lines = set() 

175 for lines in original_result.values(): 

176 if isinstance(lines, list): 

177 all_issue_lines.update(lines) 

178 

179 if not all_issue_lines: 

180 return sast_dict.copy() 

181 

182 # Parse and tokenize **once** 

183 comments_by_line = get_all_comments_by_line(file_location) 

184 start_to_end = get_start_to_end_lines(file_location) 

185 

186 # Decide which lines to KEEP 

187 keep_lines = set() 

188 for line in sorted(all_issue_lines): 

189 if not is_suppressed( 

190 line, comments_by_line, start_to_end, match_suppression_keyword 

191 ): 

192 keep_lines.add(line) 

193 

194 # Build new result dictionary 

195 new_result = {} 

196 for key, value in original_result.items(): 

197 if isinstance(value, list): 

198 filtered = [ln for ln in value if ln in keep_lines] 

199 if filtered: 

200 new_result[key] = filtered 

201 else: 

202 new_result[key] = value 

203 

204 # Return new full dictionary 

205 filtered_dict = sast_dict.copy() 

206 filtered_dict["result"] = new_result 

207 return filtered_dict 

208 

209 

210def match_suppression_keyword(comment_line): 

211 """ 

212 Checks if a SAST suppression marker is present in the comment. 

213 """ 

214 

215 MARKER_LIST = [ 

216 "nosec", 

217 "nosemgrep", 

218 "sast-ignore", 

219 "ignore-sast", 

220 "security-ignore", 

221 "ignore-security", 

222 "NOSONAR", 

223 "noqa", 

224 # False positive / risk handling 

225 "false-positive", 

226 "falsepositive", 

227 "risk-accepted", 

228 "security-accepted", 

229 "security-reviewed", 

230 "security-exception", 

231 ] 

232 

233 if not comment_line: 

234 return False 

235 

236 normalized = " ".join(word.lstrip("#").lower() for word in comment_line.split()) 

237 tokens = re.split(r"[^\w\-]+", normalized) 

238 return any(marker.lower() in tokens for marker in MARKER_LIST)