Coverage for src / codeaudit / altairplots.py: 7%

229 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-09 09:33 +0200

1""" 

2License GPLv3 or higher. 

3 

4(C) 2025 - 2026 Created by Maikel Mardjan - https://nocomplexity.com/ 

5 

6This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. 

7 

8This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 

9 

10You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. 

11 

12Altair Plotting functions for Python Code Audit (aka codeaudit) 

13""" 

14 

15from collections import Counter 

16from pathlib import Path 

17 

18import altair as alt 

19import pandas as pd 

20 

21 

22def module_count_barchart(scanresult): 

23 """Create a bar chart showing module counts by category. 

24 

25 This function generates an Altair bar chart comparing the number of 

26 Python standard library modules and third-party modules found in the 

27 provided scan result. 

28 

29 Args: 

30 scanresult (dict): Scan result data containing a "module_overview" 

31 key with "core_modules" and "imported_modules" entries. 

32 

33 Returns: 

34 altair.Chart | str: An Altair bar chart visualizing module counts. 

35 Returns a warning message string if the input is invalid. 

36 """ 

37 if not scanresult or not isinstance(scanresult, dict): 

38 return "⚠️ No scan result available.\n\nPlease run a scan first." 

39 

40 data = scanresult["module_overview"] 

41 

42 counts_df = pd.DataFrame( 

43 { 

44 "Category": ["Python Standard Libraries", "Third-party modules"], 

45 "Count": [len(data["core_modules"]), len(data["imported_modules"])], 

46 } 

47 ) 

48 

49 bar_chart = ( 

50 alt.Chart(counts_df) 

51 .mark_bar(size=60, cornerRadius=8) 

52 .encode( 

53 x=alt.X("Category:N", title=None, axis=alt.Axis(labelFontSize=12)), 

54 y=alt.Y("Count:Q", title="Number of Modules"), 

55 color=alt.Color( 

56 "Category:N", 

57 scale=alt.Scale( 

58 domain=["Python Standard Libraries", "Third-party modules"], 

59 range=["#4C78A8", "#F58518"], 

60 ), 

61 ), 

62 tooltip=["Category", "Count"], 

63 ) 

64 .properties(title="Package Modules Overview", width=400, height=300) 

65 .configure_title(fontSize=16, anchor="start") 

66 ) 

67 

68 return bar_chart 

69 

70 

71def module_distribution_view(scanresult): 

72 """Create a donut chart showing module distribution. 

73 

74 Args: 

75 scanresult (dict): Scan result containing "module_overview" with 

76 "core_modules" and "imported_modules". 

77 

78 Returns: 

79 altair.Chart | str: Donut chart of module distribution, or a warning 

80 message if input is invalid. 

81 """ 

82 if not scanresult or not isinstance(scanresult, dict): 

83 return "⚠️ No scan result available.\n\nPlease run a scan first." 

84 

85 data = scanresult["module_overview"] 

86 pie_df = pd.DataFrame( 

87 { 

88 "Category": ["Python Standard Library modules", "Imported Libraries"], 

89 "Count": [len(data["core_modules"]), len(data["imported_modules"])], 

90 "Angle": [len(data["core_modules"]), len(data["imported_modules"])], 

91 } 

92 ) 

93 

94 pie_chart = ( 

95 alt.Chart(pie_df) 

96 .mark_arc(innerRadius=80, outerRadius=140) 

97 .encode( 

98 theta=alt.Theta(field="Count", type="quantitative"), 

99 color=alt.Color( 

100 "Category:N", 

101 scale=alt.Scale(range=["#4C78A8", "#F58518"]), 

102 legend=alt.Legend(title="Category"), 

103 ), 

104 tooltip=["Category", "Count"], 

105 ) 

106 .properties(title="Module Composition", width=380, height=380) 

107 ) 

108 

109 # Add percentage text in the center 

110 text = ( 

111 alt.Chart(pie_df) 

112 .mark_text(size=16, fontWeight="bold") 

113 .encode(text=alt.Text("Count:Q"), color=alt.value("white")) 

114 .transform_calculate(total="datum.Count") 

115 ) 

116 

117 donut = (pie_chart + text).configure_title(fontSize=16) 

118 

119 return donut 

120 

121 

122def make_chart(y_field, df): 

123 """Function to create a single bar chart with red and grey bars.""" 

124 

125 # Calculate the median (or use any other threshold if needed) 

126 threshold = df[y_field].median() 

127 

128 # Add a column for color condition 

129 df = df.copy() 

130 df["color"] = df[y_field].apply(lambda val: "red" if val > threshold else "grey") 

131 

132 chart = ( 

133 alt.Chart(df) 

134 .mark_bar() 

135 .encode( 

136 x=alt.X("FileName:N", sort=None, title="File Name"), 

137 y=alt.Y(f"{y_field}:Q", title=y_field), 

138 color=alt.Color( 

139 "color:N", 

140 scale=alt.Scale(domain=["red", "grey"], range=["#d62728", "#7f7f7f"]), 

141 legend=None, 

142 ), 

143 tooltip=["FileName", y_field], 

144 ) 

145 .properties(width=400, height=400, title=y_field) 

146 ) 

147 return chart 

148 

149 

150def multi_bar_chart(df): 

151 """Creates a multi bar chart for all relevant columns""" 

152 

153 # List of metrics to chart 

154 metrics = [ 

155 "Number_Of_Lines", 

156 "AST_Nodes", 

157 "External-Modules", 

158 "Functions", 

159 "Comment_Lines", 

160 "Complexity_Score", 

161 ] 

162 rows = [ 

163 alt.hconcat(*[make_chart(metric, df) for metric in metrics[i : i + 2]]) 

164 for i in range(0, len(metrics), 2) 

165 ] 

166 

167 # Stack the rows vertically 

168 multi_chart = alt.vconcat(*rows) 

169 return multi_chart 

170 

171 

172def issue_plot(input_dict): 

173 """ 

174 Create a radial (polar area) chart using Altair. 

175 

176 Parameters 

177 ---------- 

178 input_dict : dict 

179 Dictionary where keys are 'construct' and values are 'count'. 

180 

181 Returns 

182 ------- 

183 alt.Chart 

184 Altair chart object. 

185 """ 

186 # Convert input dict to DataFrame 

187 df = pd.DataFrame(list(input_dict.items()), columns=["construct", "count"]) 

188 

189 # Validation 

190 if not {"construct", "count"}.issubset(df.columns): 

191 raise ValueError("DataFrame must have 'construct' and 'count' columns.") 

192 

193 # Add a combined label for legend 

194 df["legend_label"] = df["construct"] + " (" + df["count"].astype(str) + ")" 

195 

196 # Compute fraction of total for angular width 

197 total = df["count"].sum() 

198 df["fraction"] = df["count"] / total 

199 

200 # Compute cumulative angle for start and end of each slice 

201 df["theta0"] = df["fraction"].cumsum() - df["fraction"] 

202 df["theta1"] = df["fraction"].cumsum() 

203 

204 # Radial chart using mark_arc 

205 chart = ( 

206 alt.Chart(df) 

207 .mark_arc(innerRadius=20) 

208 .encode( 

209 theta=alt.Theta("theta1:Q", stack=None, title=None), 

210 theta2="theta0:Q", # define start angle 

211 radius=alt.Radius("count:Q", scale=alt.Scale(type="sqrt")), # radial extent 

212 color=alt.Color( 

213 "legend_label:N", 

214 scale=alt.Scale(scheme="category20"), 

215 legend=alt.Legend(title="Weaknesses (Count)"), 

216 ), 

217 tooltip=["construct", "count"], 

218 ) 

219 .properties(title="Overview of Security Weaknesses", width=600, height=600) 

220 ) 

221 

222 return chart 

223 

224 

225def issue_overview(df): 

226 """ 

227 Create an Altair arc (donut) chart from a DataFrame 

228 with 'call' and 'count' columns, showing counts in the legend. 

229 """ 

230 # Create a label combining call and count for the legend 

231 df = df.copy() 

232 df["label"] = df["call"] + " (" + df["count"].astype(str) + ")" 

233 

234 chart = ( 

235 alt.Chart(df) 

236 .mark_arc(innerRadius=50, outerRadius=120) 

237 .encode( 

238 theta=alt.Theta("count:Q", title="Count"), 

239 color=alt.Color("label:N", title="Calls (Count)"), 

240 tooltip=["call", "count"], 

241 ) 

242 .properties(title="Overview of Security Weaknesses", width=600, height=600) 

243 ) 

244 return chart 

245 

246 

247def complexity_heatmap(scanresult): 

248 """Create an interactive heatmap of file complexity and size. 

249 

250 Highlights high-risk files based on complexity and lines of code, 

251 with dynamic filtering and threshold controls. 

252 

253 Args: 

254 scanresult (dict): Scan result containing "file_security_info" 

255 with file-level complexity and size metrics. 

256 

257 Returns: 

258 altair.Chart | str: Interactive heatmap chart, or a warning 

259 message if input is invalid. 

260 """ 

261 if not scanresult or not isinstance(scanresult, dict): 

262 return "⚠️ No scan result available.\n\nPlease run a scan first." 

263 

264 data = scanresult["file_security_info"] 

265 df = pd.DataFrame( 

266 [ 

267 { 

268 "File": f["file_name"], 

269 "Lines": f["Number_Of_Lines"], 

270 "Complexity": f["Complexity_Score"], 

271 } 

272 for f in data.values() 

273 ] 

274 ) 

275 

276 total_files = len(df) # Total number of files (for subtitle) 

277 df["RiskScore"] = (df["Complexity"] / 80) + ( 

278 df["Lines"] / 2000 

279 ) # define Risk score 

280 top_complexity = df.nlargest(30, "Complexity") # Filter for Top 30 by Complexity 

281 top_lines = df.nlargest(30, "Lines") # Top 30 by Lines 

282 

283 # --- Combine + deduplicate --- 

284 df_filtered = ( 

285 pd.concat([top_complexity, top_lines]) 

286 .drop_duplicates(subset="File") 

287 .sort_values("RiskScore", ascending=False) 

288 .reset_index(drop=True) 

289 ) 

290 

291 # --- Melt AFTER filtering --- 

292 df_melted = df_filtered.melt( 

293 id_vars=["File", "RiskScore"], 

294 value_vars=["Lines", "Complexity"], 

295 var_name="Metric", 

296 value_name="Value", 

297 ) 

298 

299 # Dynamic slider ranges 

300 max_complexity = int(df_filtered["Complexity"].max()) 

301 max_lines = int(df_filtered["Lines"].max()) 

302 

303 complexity_slider = alt.param( 

304 name="ComplexityThreshold", 

305 value=int(max_complexity * 0.7), 

306 bind=alt.binding_range( 

307 min=0, max=max_complexity, step=max(1, max_complexity // 100) 

308 ), 

309 ) 

310 

311 lines_slider = alt.param( 

312 name="LinesThreshold", 

313 value=int(max_lines * 0.7), 

314 bind=alt.binding_range(min=0, max=max_lines, step=max(10, max_lines // 100)), 

315 ) 

316 

317 show_highrisk_only = alt.param( 

318 name="ShowHighRiskOnly", 

319 value=False, 

320 bind=alt.binding_checkbox(name="Show only high-risk files"), 

321 ) 

322 

323 # --- High-risk condition --- 

324 highrisk_expr = ( 

325 (alt.datum.Metric == "Complexity") & (alt.datum.Value > complexity_slider) 

326 ) | ((alt.datum.Metric == "Lines") & (alt.datum.Value > lines_slider)) 

327 

328 # --- Filter expression --- 

329 filter_expr = (~show_highrisk_only) | highrisk_expr 

330 

331 base = ( 

332 alt.Chart(df_melted) 

333 .add_params(complexity_slider, lines_slider, show_highrisk_only) 

334 .transform_filter(filter_expr) 

335 ) 

336 

337 # Color logic (clean legend restored) --- 

338 color_scale = alt.condition( 

339 highrisk_expr, 

340 alt.value("#ff6b6b"), 

341 alt.Color( 

342 "Value:Q", 

343 scale=alt.Scale(scheme="yellowgreenblue"), 

344 legend=alt.Legend(title="Value"), 

345 ), 

346 ) 

347 

348 # Heatmap 

349 heatmap = ( 

350 base.mark_rect() 

351 .encode( 

352 x=alt.X("Metric:N", title="Metric"), 

353 y=alt.Y( 

354 "File:N", 

355 sort=alt.SortField(field="RiskScore", order="descending"), 

356 title=f"Filtered Files ({len(df_filtered)})", 

357 ), 

358 color=color_scale, 

359 tooltip=[ 

360 "File", 

361 "Metric", 

362 "Value", 

363 alt.Tooltip("RiskScore:Q", format=".2f"), 

364 ], 

365 ) 

366 .properties( 

367 width=500, 

368 height=450, 

369 title=alt.TitleParams( 

370 text="🔥 Code Risk Heatmap", 

371 subtitle=["Risk heatmap", f"Based on {total_files} files"], 

372 ), 

373 ) 

374 ) 

375 

376 # Text overlay 

377 text = base.mark_text(size=11).encode( 

378 x="Metric:N", 

379 y=alt.Y("File:N", sort=alt.SortField(field="RiskScore", order="descending")), 

380 text=alt.Text("Value:Q", format=".0f"), 

381 color=alt.condition(highrisk_expr, alt.value("white"), alt.value("black")), 

382 ) 

383 

384 return heatmap + text 

385 

386 

387def lines_of_code_overview(scanresult, width=800, height=400): 

388 """Create a bar chart of top files by lines of code. 

389 

390 Displays the top 30 files ranked by lines of code, with disambiguated 

391 filenames and tooltips showing full path and complexity. 

392 

393 Args: 

394 scanresult (dict): Scan result containing "file_security_info". 

395 width (int, optional): Chart width in pixels. Defaults to 800. 

396 height (int, optional): Chart height in pixels. Defaults to 400. 

397 

398 Returns: 

399 altair.Chart | str: Bar chart visualization, or a warning 

400 message if no valid data is available. 

401 """ 

402 # --- 1. Data Extraction --- 

403 files_dict = scanresult.get("file_security_info", {}) 

404 if not files_dict: 

405 return "⚠️ No file data found." 

406 

407 data = [] 

408 for f in files_dict.values(): 

409 full_path = str(f.get("FilePath", f.get("file_name", ""))) 

410 p = Path(full_path) 

411 data.append( 

412 { 

413 "full_path": full_path, 

414 "base_name": p.name, 

415 "parent_folder": p.parent.name if len(p.parts) > 1 else "", 

416 "lines": f.get("Number_Of_Lines", 0), 

417 "complexity": f.get("Complexity_Score", 0), 

418 } 

419 ) 

420 

421 df = pd.DataFrame(data) 

422 

423 if df.empty: 

424 return "⚠️ No file info available." 

425 

426 total_files = len(df) 

427 

428 # --- 2. Top 30 filter based on lines --- 

429 df = ( 

430 df.nlargest(30, "lines") 

431 .sort_values("lines", ascending=False) 

432 .reset_index(drop=True) 

433 ) 

434 

435 # --- 3. Smart Labeling: filename + parent folder only if needed --- 

436 counts = df.groupby("base_name")["base_name"].transform("count") 

437 df["display_name"] = [ 

438 ( 

439 f"{row['parent_folder']}/{row['base_name']}" 

440 if counts.iloc[i] > 1 and row["parent_folder"] 

441 else row["base_name"] 

442 ) 

443 for i, row in df.iterrows() 

444 ] 

445 

446 # --- 4. Color scale --- 

447 color_scale = alt.Scale(scheme="reds", domain=[0, df["lines"].max()]) 

448 

449 # --- 5. Chart --- 

450 chart = alt.Chart(df).encode( 

451 y=alt.Y( 

452 "display_name:N", 

453 sort=alt.EncodingSortField(field="lines", order="descending"), 

454 title=f"Top Files ({len(df)})", 

455 ), 

456 x=alt.X("lines:Q", title="Lines of Code"), 

457 tooltip=[ 

458 alt.Tooltip("full_path:N", title="Full Path"), 

459 alt.Tooltip("lines:Q", title="Lines"), 

460 alt.Tooltip("complexity:Q", title="Complexity"), 

461 ], 

462 ) 

463 

464 bars = chart.mark_bar().encode( 

465 color=alt.Color("lines:Q", scale=color_scale, title="LoC") 

466 ) 

467 

468 text = chart.mark_text(align="left", baseline="middle", dx=5).encode( 

469 text=alt.Text("lines:Q", format=",") 

470 ) 

471 

472 return ( 

473 (bars + text) 

474 .properties( 

475 width=width, 

476 height=height, 

477 title=alt.TitleParams( 

478 text="📊 Lines of Code per File", 

479 subtitle=[f"Top {len(df)} of {total_files} files"], 

480 ), 

481 ) 

482 .configure_view(strokeWidth=0) 

483 ) 

484 

485 

486def ast_nodes_overview(scanresult, width=800, height=400): 

487 """Create a bar chart of top files by AST node count. 

488 

489 Displays the top 30 files ranked by AST nodes, with disambiguated 

490 filenames, derived density metric, and tooltips showing file details. 

491 

492 Args: 

493 scanresult (dict): Scan result containing "file_security_info". 

494 width (int, optional): Chart width in pixels. Defaults to 800. 

495 height (int, optional): Chart height in pixels. Defaults to 400. 

496 

497 Returns: 

498 altair.Chart | str: Bar chart visualization, or a warning 

499 message if no valid data is available. 

500 """ 

501 if not scanresult or not isinstance(scanresult, dict): 

502 return "⚠️ No scan result available.\n\nPlease run a scan first." 

503 

504 files = scanresult.get("file_security_info", {}) 

505 if not files: 

506 return "⚠️ No file data found in scan result." 

507 

508 # Extract data --- 

509 data = [] 

510 for f in files.values(): 

511 full_path = str(f.get("FilePath", f.get("file_name", "unknown"))) 

512 p = Path(full_path) 

513 data.append( 

514 { 

515 "full_path": full_path, 

516 "base_name": p.name, 

517 "parent_folder": p.parent.name if len(p.parts) > 1 else "", 

518 "ast_nodes": f.get("AST_Nodes", 0), 

519 "lines": f.get("Number_Of_Lines", 1), # avoid div by zero 

520 "complexity": f.get("Complexity_Score", 0), 

521 "warnings": f.get("warnings", 0), 

522 } 

523 ) 

524 

525 df = pd.DataFrame(data) 

526 if df.empty: 

527 return "⚠️ No file info available." 

528 

529 total_files = len(df) 

530 

531 # Top 30 filter by AST nodes --- 

532 df = ( 

533 df.nlargest(30, "ast_nodes") 

534 .sort_values("ast_nodes", ascending=False) 

535 .reset_index(drop=True) 

536 ) 

537 

538 # Derived metric --- 

539 df["ast_density"] = df["ast_nodes"] / df["lines"] 

540 

541 # Smart Y-axis labels --- 

542 counts = df.groupby("base_name")["base_name"].transform("count") 

543 df["display_name"] = [ 

544 ( 

545 f"{row['parent_folder']}/{row['base_name']}" 

546 if counts.iloc[i] > 1 and row["parent_folder"] 

547 else row["base_name"] 

548 ) 

549 for i, row in df.iterrows() 

550 ] 

551 color_scale = alt.Scale(scheme="reds", domain=[0, df["ast_nodes"].max()]) 

552 threshold = df["ast_nodes"].quantile(0.75) 

553 rule = ( 

554 alt.Chart(pd.DataFrame({"threshold": [threshold]})) 

555 .mark_rule(color="black", strokeDash=[6, 4]) 

556 .encode(x="threshold:Q") 

557 ) 

558 chart = alt.Chart(df).encode( 

559 y=alt.Y( 

560 "display_name:N", 

561 sort=alt.EncodingSortField(field="ast_nodes", order="descending"), 

562 title=f"Top Files ({len(df)})", 

563 ), 

564 x=alt.X("ast_nodes:Q", title="AST Nodes"), 

565 tooltip=[ 

566 alt.Tooltip("full_path:N", title="Full Path"), 

567 alt.Tooltip("ast_nodes:Q", title="AST Nodes"), 

568 alt.Tooltip("lines:Q", title="Lines"), 

569 alt.Tooltip("complexity:Q", title="Complexity"), 

570 alt.Tooltip("ast_density:Q", title="AST Density", format=".2f"), 

571 alt.Tooltip("warnings:Q", title="Warnings"), 

572 ], 

573 ) 

574 bars = chart.mark_bar().encode( 

575 color=alt.condition( 

576 "datum.warnings > 0", 

577 alt.value("crimson"), 

578 alt.Color("ast_nodes:Q", scale=color_scale, title="AST Nodes"), 

579 ) 

580 ) 

581 text = chart.mark_text(align="left", baseline="middle", dx=5, color="black").encode( 

582 text=alt.Text("ast_nodes:Q", format=",") 

583 ) 

584 return ( 

585 (bars + text + rule) 

586 .properties( 

587 width=width, 

588 height=height, 

589 title=alt.TitleParams( 

590 text="📊 AST Nodes per File", 

591 subtitle=[f"Top {len(df)} of {total_files} files"], 

592 ), 

593 ) 

594 .configure_view(strokeWidth=0) 

595 ) 

596 

597 

598def weaknesses_overview(scanresult): 

599 """Create a bar chart of the most common security weaknesses. 

600 

601 Aggregates and counts validation findings across all files in the 

602 scan result, displaying the top occurrences in a bar chart. 

603 

604 Args: 

605 scanresult (dict): Scan result containing "file_security_info" 

606 with SAST validation findings per file. 

607 

608 Returns: 

609 altair.Chart: Bar chart of top security weaknesses, or a fallback 

610 text chart if no data is available. 

611 """ 

612 if not scanresult or not isinstance(scanresult, dict): 

613 return ( 

614 alt.Chart(pd.DataFrame({"msg": ["⚠️ No scan result"]})) 

615 .mark_text() 

616 .encode(text="msg:N") 

617 ) 

618 

619 file_security_info = scanresult.get("file_security_info") 

620 if not isinstance(file_security_info, dict) or len(file_security_info) == 0: 

621 return ( 

622 alt.Chart(pd.DataFrame({"msg": ["⚠️ No file security info found"]})) 

623 .mark_text() 

624 .encode(text="msg:N") 

625 ) 

626 

627 # --- Count every 'validation' across all files --- 

628 counter = Counter() 

629 for file_info in file_security_info.values(): 

630 if not isinstance(file_info, dict): 

631 continue 

632 sast_result = file_info.get("sast_result") 

633 if not isinstance(sast_result, dict): 

634 continue 

635 for finding in sast_result.values(): 

636 if isinstance(finding, dict): 

637 validation = finding.get("validation") 

638 if validation and isinstance(validation, str): 

639 counter[validation] += 1 

640 

641 if not counter: 

642 return ( 

643 alt.Chart(pd.DataFrame({"msg": ["✅ No security weaknesses found."]})) 

644 .mark_text(size=20) 

645 .encode(text="msg:N") 

646 ) 

647 

648 # --- Build DataFrame --- 

649 df = pd.DataFrame(list(counter.items()), columns=["construct", "count"]) 

650 df = df[df["count"] > 0] 

651 if df.empty: 

652 return ( 

653 alt.Chart(pd.DataFrame({"msg": ["⚠️ No security weaknesses found"]})) 

654 .mark_text(size=20) 

655 .encode(text="msg:N") 

656 ) 

657 

658 # --- Top 50 + formatting --- 

659 df = df.sort_values("count", ascending=False).head(50).reset_index(drop=True) 

660 df["construct"] = df["construct"].str.slice(0, 40) 

661 df["is_top5"] = df.index < 5 

662 

663 n_constructs = len(df) 

664 

665 # --- Dynamic sizing --- 

666 if n_constructs == 1: 

667 # Single construct → large nice rectangle 

668 bar_size = 160 

669 chart_height = 280 

670 chart_width = 680 

671 else: 

672 # Multiple constructs → normal behavior 

673 bar_size = None 

674 chart_height = max(380, n_constructs * 22) # scale height with number of bars 

675 chart_width = 550 

676 

677 # --- Bar chart with conditional size --- 

678 if n_constructs == 1: 

679 chart = ( 

680 alt.Chart(df) 

681 .mark_bar(size=bar_size) 

682 .encode( 

683 y=alt.Y( 

684 "construct:N", sort="-x", title=None, axis=alt.Axis(labelLimit=350) 

685 ), 

686 x=alt.X( 

687 "count:Q", 

688 title="Number of Occurrences", 

689 scale=alt.Scale(type="sqrt"), 

690 ), 

691 color=alt.Color( 

692 "count:Q", 

693 scale=alt.Scale(scheme="reds"), 

694 legend=alt.Legend(title="Count"), 

695 ), 

696 stroke=alt.condition( 

697 alt.datum.is_top5, alt.value("black"), alt.value(None) 

698 ), 

699 strokeWidth=alt.condition( 

700 alt.datum.is_top5, alt.value(2.5), alt.value(0) 

701 ), 

702 tooltip=["construct:N", "count:Q"], 

703 ) 

704 ) 

705 else: 

706 chart = ( 

707 alt.Chart(df) 

708 .mark_bar() 

709 .encode( 

710 y=alt.Y( 

711 "construct:N", sort="-x", title=None, axis=alt.Axis(labelLimit=350) 

712 ), 

713 x=alt.X( 

714 "count:Q", 

715 title="Number of Occurrences", 

716 scale=alt.Scale(type="sqrt"), 

717 ), 

718 color=alt.Color( 

719 "count:Q", 

720 scale=alt.Scale(scheme="reds"), 

721 legend=alt.Legend(title="Count"), 

722 ), 

723 stroke=alt.condition( 

724 alt.datum.is_top5, alt.value("black"), alt.value(None) 

725 ), 

726 strokeWidth=alt.condition( 

727 alt.datum.is_top5, alt.value(2.5), alt.value(0) 

728 ), 

729 tooltip=["construct:N", "count:Q"], 

730 ) 

731 ) 

732 

733 # --- Labels on bars --- 

734 text = ( 

735 alt.Chart(df) 

736 .mark_text(align="left", dx=5, fontSize=11, color="black") 

737 .encode(y=alt.Y("construct:N", sort="-x"), x="count:Q", text="count:Q") 

738 ) 

739 

740 # --- Final chart --- 

741 final_chart = ( 

742 (chart + text) 

743 .properties( 

744 title=alt.TitleParams( 

745 text="Top Security Weaknesses (by Validation)", 

746 anchor="start", 

747 fontSize=15, 

748 ), 

749 width=chart_width, 

750 height=chart_height, 

751 padding={"left": 10, "right": 35, "top": 15, "bottom": 10}, 

752 ) 

753 .configure_view(stroke=None) 

754 .configure_axis(grid=False, labelFontSize=12, titleFontSize=13) 

755 ) 

756 

757 return final_chart 

758 

759 

760def sast_files_overview(scanresult): 

761 """Create a bar chart of security issues per file. 

762 

763 Aggregates SAST findings across files and visualizes the number of 

764 security issues per file. Filenames are disambiguated using the 

765 parent folder when duplicates exist. 

766 

767 Args: 

768 scanresult (dict): Scan result containing "file_security_info" 

769 with per-file SAST findings and metadata. 

770 

771 Returns: 

772 altair.Chart: Bar chart of files with security issues, or a 

773 fallback text chart if no valid data is available. 

774 """ 

775 if not isinstance(scanresult, dict) or not scanresult: 

776 return ( 

777 alt.Chart(pd.DataFrame({"msg": ["⚠️ No scan result"]})) 

778 .mark_text(size=20) 

779 .encode(text="msg:N") 

780 ) 

781 

782 file_security_info = scanresult.get("file_security_info") 

783 if not isinstance(file_security_info, dict) or not file_security_info: 

784 return ( 

785 alt.Chart(pd.DataFrame({"msg": ["⚠️ No file security info found"]})) 

786 .mark_text() 

787 .encode(text="msg:N") 

788 ) 

789 

790 records = [] 

791 for file_info in file_security_info.values(): 

792 if not isinstance(file_info, dict): 

793 continue 

794 

795 sast_result = file_info.get("sast_result") 

796 if not isinstance(sast_result, dict) or not sast_result: 

797 continue 

798 

799 filepath = file_info.get("FilePath") or file_info.get("file_name", "") 

800 path_obj = Path(str(filepath)) 

801 

802 base_name = file_info.get("FileName") or path_obj.name or "Unknown" 

803 parent_folder = path_obj.parent.name if len(path_obj.parts) > 1 else None 

804 if parent_folder in ("", ".", "/"): 

805 parent_folder = None 

806 

807 records.append( 

808 { 

809 "base_name": base_name, 

810 "parent_folder": parent_folder, 

811 "full_path": str(filepath), 

812 "issues": len(sast_result), 

813 "complexity": file_info.get("Complexity_Score", 0), 

814 } 

815 ) 

816 

817 if not records: 

818 return ( 

819 alt.Chart(pd.DataFrame({"msg": ["✅ No security weaknesses identified."]})) 

820 .mark_text(size=14) 

821 .encode(text="msg:N") 

822 ) 

823 

824 df = pd.DataFrame(records) 

825 

826 # --- Smart labeling for duplicates --- 

827 name_counts = df.groupby("base_name")["base_name"].transform("count") 

828 df["display_name"] = [ 

829 ( 

830 f"{row.parent_folder}/{row.base_name}" 

831 if name_counts.iloc[i] > 1 and row.parent_folder 

832 else row.base_name 

833 ) 

834 for i, row in df.iterrows() 

835 ] 

836 

837 # --- Sort by issue count --- 

838 df = df.sort_values("issues", ascending=False).reset_index(drop=True) 

839 

840 # --- Chart --- 

841 base_chart = alt.Chart(df).encode( 

842 y=alt.Y( 

843 "display_name:N", 

844 sort="-x", 

845 title=None, 

846 axis=alt.Axis(labelLimit=420, labelFontSize=12), 

847 ), 

848 x=alt.X( 

849 "issues:Q", title="Number of Security Issues", axis=alt.Axis(tickMinStep=1) 

850 ), 

851 color=alt.Color( 

852 "issues:Q", 

853 scale=alt.Scale(scheme="orangered"), 

854 legend=alt.Legend(title="Issues"), 

855 ), 

856 tooltip=[ 

857 alt.Tooltip("display_name:N", title="File"), 

858 alt.Tooltip("issues:Q", title="Security Issues"), 

859 alt.Tooltip("complexity:Q", title="Complexity Score"), 

860 alt.Tooltip("full_path:N", title="Full Path"), 

861 ], 

862 ) 

863 

864 bars = base_chart.mark_bar(cornerRadiusEnd=6, size=22) 

865 

866 labels = base_chart.mark_text( 

867 align="left", baseline="middle", dx=8, fontSize=12, fontWeight="bold" 

868 ).encode(text="issues:Q") 

869 

870 chart = ( 

871 (bars + labels) 

872 .properties( 

873 title=alt.TitleParams( 

874 text=f"Files with Security Issues — {scanresult.get('package_name', 'Unknown Package')}", 

875 subtitle=f"Total files with findings: {len(df)}", 

876 anchor="start", 

877 fontSize=16, 

878 subtitleFontSize=12, 

879 ), 

880 width=720, 

881 height=max(340, len(df) * 28), 

882 ) 

883 .configure_view(stroke=None) 

884 .configure_axis( 

885 grid=True, gridColor="#f0f0f0", labelFontSize=12, titleFontSize=13 

886 ) 

887 ) 

888 

889 return chart 

890 

891 

892def weaknesses_radial_overview(scanresult): 

893 """ 

894 Returns a radial (polar area) chart showing the number of times each 'validation' 

895 appears across all files in the full scan result. 

896 """ 

897 # --- Input validation --- 

898 if not scanresult or not isinstance(scanresult, dict): 

899 return ( 

900 alt.Chart(pd.DataFrame({"msg": ["⚠️ No scan result"]})) 

901 .mark_text() 

902 .encode(text="msg:N") 

903 ) 

904 

905 file_security_info = scanresult.get("file_security_info") 

906 if not isinstance(file_security_info, dict) or len(file_security_info) == 0: 

907 return ( 

908 alt.Chart(pd.DataFrame({"msg": ["⚠️ No file security info found"]})) 

909 .mark_text() 

910 .encode(text="msg:N") 

911 ) 

912 

913 # --- Count every 'validation' across all files --- 

914 counter = Counter() 

915 for file_info in file_security_info.values(): 

916 if not isinstance(file_info, dict): 

917 continue 

918 sast_result = file_info.get("sast_result") 

919 if not isinstance(sast_result, dict): 

920 continue 

921 for finding in sast_result.values(): 

922 if isinstance(finding, dict): 

923 validation = finding.get("validation") 

924 if validation and isinstance(validation, str): 

925 counter[validation] += 1 

926 

927 if not counter: 

928 return ( 

929 alt.Chart( 

930 pd.DataFrame( 

931 { 

932 "msg": [ 

933 "✅ No security weaknesses found. No radial chart created." 

934 ] 

935 } 

936 ) 

937 ) 

938 .mark_text(size=14) 

939 .encode(text="msg:N") 

940 ) 

941 

942 # --- Build DataFrame --- 

943 df = pd.DataFrame(list(counter.items()), columns=["construct", "count"]) 

944 df = df[df["count"] > 0] 

945 if df.empty: 

946 return ( 

947 alt.Chart( 

948 pd.DataFrame( 

949 { 

950 "msg": [ 

951 "✅ No security weaknesses found. No radial chart created." 

952 ] 

953 } 

954 ) 

955 ) 

956 .mark_text(size=14) 

957 .encode(text="msg:N") 

958 ) 

959 

960 # --- Top 50 + formatting --- 

961 df = df.sort_values("count", ascending=False).head(50).reset_index(drop=True) 

962 df["construct"] = df["construct"].str.slice(0, 40) 

963 df["legend_label"] = df["construct"] + " (" + df["count"].astype(str) + ")" 

964 

965 # --- Compute fractions and angles for polar area chart --- 

966 total = df["count"].sum() 

967 df["fraction"] = df["count"] / total 

968 

969 if len(df) == 1: 

970 # Only one construct → full circle 

971 df["theta0"] = 0 

972 df["theta1"] = 1 

973 inner_radius = 120 # larger inner radius for single construct 

974 radius_scale = alt.Scale( 

975 type="sqrt", zero=True, domain=[0, df["count"].max() * 1.2] 

976 ) 

977 else: 

978 df["theta0"] = df["fraction"].cumsum() - df["fraction"] 

979 df["theta1"] = df["fraction"].cumsum() 

980 inner_radius = 20 

981 radius_scale = alt.Scale(type="sqrt", zero=True) 

982 

983 # --- Radial chart --- 

984 chart = ( 

985 alt.Chart(df) 

986 .mark_arc(innerRadius=inner_radius) 

987 .encode( 

988 theta=alt.Theta("theta1:Q", stack=None, title=None), 

989 theta2="theta0:Q", 

990 radius=alt.Radius("count:Q", scale=radius_scale), 

991 color=alt.Color( 

992 "legend_label:N", 

993 scale=alt.Scale(scheme="category20"), 

994 legend=alt.Legend(title="Weaknesses (Count)"), 

995 ), 

996 tooltip=["construct:N", "count:Q"], 

997 ) 

998 .properties(title="Overview of Security Weaknesses", width=600, height=600) 

999 ) 

1000 

1001 return chart