Coverage for src / codeaudit / altairplots.py: 7%
229 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-09 09:33 +0200
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-09 09:33 +0200
1"""
2License GPLv3 or higher.
4(C) 2025 - 2026 Created by Maikel Mardjan - https://nocomplexity.com/
6This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
8This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
10You should have received a copy of the GNU General Public License along with this program. If not, see <https://www.gnu.org/licenses/>.
12Altair Plotting functions for Python Code Audit (aka codeaudit)
13"""
15from collections import Counter
16from pathlib import Path
18import altair as alt
19import pandas as pd
22def module_count_barchart(scanresult):
23 """Create a bar chart showing module counts by category.
25 This function generates an Altair bar chart comparing the number of
26 Python standard library modules and third-party modules found in the
27 provided scan result.
29 Args:
30 scanresult (dict): Scan result data containing a "module_overview"
31 key with "core_modules" and "imported_modules" entries.
33 Returns:
34 altair.Chart | str: An Altair bar chart visualizing module counts.
35 Returns a warning message string if the input is invalid.
36 """
37 if not scanresult or not isinstance(scanresult, dict):
38 return "⚠️ No scan result available.\n\nPlease run a scan first."
40 data = scanresult["module_overview"]
42 counts_df = pd.DataFrame(
43 {
44 "Category": ["Python Standard Libraries", "Third-party modules"],
45 "Count": [len(data["core_modules"]), len(data["imported_modules"])],
46 }
47 )
49 bar_chart = (
50 alt.Chart(counts_df)
51 .mark_bar(size=60, cornerRadius=8)
52 .encode(
53 x=alt.X("Category:N", title=None, axis=alt.Axis(labelFontSize=12)),
54 y=alt.Y("Count:Q", title="Number of Modules"),
55 color=alt.Color(
56 "Category:N",
57 scale=alt.Scale(
58 domain=["Python Standard Libraries", "Third-party modules"],
59 range=["#4C78A8", "#F58518"],
60 ),
61 ),
62 tooltip=["Category", "Count"],
63 )
64 .properties(title="Package Modules Overview", width=400, height=300)
65 .configure_title(fontSize=16, anchor="start")
66 )
68 return bar_chart
71def module_distribution_view(scanresult):
72 """Create a donut chart showing module distribution.
74 Args:
75 scanresult (dict): Scan result containing "module_overview" with
76 "core_modules" and "imported_modules".
78 Returns:
79 altair.Chart | str: Donut chart of module distribution, or a warning
80 message if input is invalid.
81 """
82 if not scanresult or not isinstance(scanresult, dict):
83 return "⚠️ No scan result available.\n\nPlease run a scan first."
85 data = scanresult["module_overview"]
86 pie_df = pd.DataFrame(
87 {
88 "Category": ["Python Standard Library modules", "Imported Libraries"],
89 "Count": [len(data["core_modules"]), len(data["imported_modules"])],
90 "Angle": [len(data["core_modules"]), len(data["imported_modules"])],
91 }
92 )
94 pie_chart = (
95 alt.Chart(pie_df)
96 .mark_arc(innerRadius=80, outerRadius=140)
97 .encode(
98 theta=alt.Theta(field="Count", type="quantitative"),
99 color=alt.Color(
100 "Category:N",
101 scale=alt.Scale(range=["#4C78A8", "#F58518"]),
102 legend=alt.Legend(title="Category"),
103 ),
104 tooltip=["Category", "Count"],
105 )
106 .properties(title="Module Composition", width=380, height=380)
107 )
109 # Add percentage text in the center
110 text = (
111 alt.Chart(pie_df)
112 .mark_text(size=16, fontWeight="bold")
113 .encode(text=alt.Text("Count:Q"), color=alt.value("white"))
114 .transform_calculate(total="datum.Count")
115 )
117 donut = (pie_chart + text).configure_title(fontSize=16)
119 return donut
122def make_chart(y_field, df):
123 """Function to create a single bar chart with red and grey bars."""
125 # Calculate the median (or use any other threshold if needed)
126 threshold = df[y_field].median()
128 # Add a column for color condition
129 df = df.copy()
130 df["color"] = df[y_field].apply(lambda val: "red" if val > threshold else "grey")
132 chart = (
133 alt.Chart(df)
134 .mark_bar()
135 .encode(
136 x=alt.X("FileName:N", sort=None, title="File Name"),
137 y=alt.Y(f"{y_field}:Q", title=y_field),
138 color=alt.Color(
139 "color:N",
140 scale=alt.Scale(domain=["red", "grey"], range=["#d62728", "#7f7f7f"]),
141 legend=None,
142 ),
143 tooltip=["FileName", y_field],
144 )
145 .properties(width=400, height=400, title=y_field)
146 )
147 return chart
150def multi_bar_chart(df):
151 """Creates a multi bar chart for all relevant columns"""
153 # List of metrics to chart
154 metrics = [
155 "Number_Of_Lines",
156 "AST_Nodes",
157 "External-Modules",
158 "Functions",
159 "Comment_Lines",
160 "Complexity_Score",
161 ]
162 rows = [
163 alt.hconcat(*[make_chart(metric, df) for metric in metrics[i : i + 2]])
164 for i in range(0, len(metrics), 2)
165 ]
167 # Stack the rows vertically
168 multi_chart = alt.vconcat(*rows)
169 return multi_chart
172def issue_plot(input_dict):
173 """
174 Create a radial (polar area) chart using Altair.
176 Parameters
177 ----------
178 input_dict : dict
179 Dictionary where keys are 'construct' and values are 'count'.
181 Returns
182 -------
183 alt.Chart
184 Altair chart object.
185 """
186 # Convert input dict to DataFrame
187 df = pd.DataFrame(list(input_dict.items()), columns=["construct", "count"])
189 # Validation
190 if not {"construct", "count"}.issubset(df.columns):
191 raise ValueError("DataFrame must have 'construct' and 'count' columns.")
193 # Add a combined label for legend
194 df["legend_label"] = df["construct"] + " (" + df["count"].astype(str) + ")"
196 # Compute fraction of total for angular width
197 total = df["count"].sum()
198 df["fraction"] = df["count"] / total
200 # Compute cumulative angle for start and end of each slice
201 df["theta0"] = df["fraction"].cumsum() - df["fraction"]
202 df["theta1"] = df["fraction"].cumsum()
204 # Radial chart using mark_arc
205 chart = (
206 alt.Chart(df)
207 .mark_arc(innerRadius=20)
208 .encode(
209 theta=alt.Theta("theta1:Q", stack=None, title=None),
210 theta2="theta0:Q", # define start angle
211 radius=alt.Radius("count:Q", scale=alt.Scale(type="sqrt")), # radial extent
212 color=alt.Color(
213 "legend_label:N",
214 scale=alt.Scale(scheme="category20"),
215 legend=alt.Legend(title="Weaknesses (Count)"),
216 ),
217 tooltip=["construct", "count"],
218 )
219 .properties(title="Overview of Security Weaknesses", width=600, height=600)
220 )
222 return chart
225def issue_overview(df):
226 """
227 Create an Altair arc (donut) chart from a DataFrame
228 with 'call' and 'count' columns, showing counts in the legend.
229 """
230 # Create a label combining call and count for the legend
231 df = df.copy()
232 df["label"] = df["call"] + " (" + df["count"].astype(str) + ")"
234 chart = (
235 alt.Chart(df)
236 .mark_arc(innerRadius=50, outerRadius=120)
237 .encode(
238 theta=alt.Theta("count:Q", title="Count"),
239 color=alt.Color("label:N", title="Calls (Count)"),
240 tooltip=["call", "count"],
241 )
242 .properties(title="Overview of Security Weaknesses", width=600, height=600)
243 )
244 return chart
247def complexity_heatmap(scanresult):
248 """Create an interactive heatmap of file complexity and size.
250 Highlights high-risk files based on complexity and lines of code,
251 with dynamic filtering and threshold controls.
253 Args:
254 scanresult (dict): Scan result containing "file_security_info"
255 with file-level complexity and size metrics.
257 Returns:
258 altair.Chart | str: Interactive heatmap chart, or a warning
259 message if input is invalid.
260 """
261 if not scanresult or not isinstance(scanresult, dict):
262 return "⚠️ No scan result available.\n\nPlease run a scan first."
264 data = scanresult["file_security_info"]
265 df = pd.DataFrame(
266 [
267 {
268 "File": f["file_name"],
269 "Lines": f["Number_Of_Lines"],
270 "Complexity": f["Complexity_Score"],
271 }
272 for f in data.values()
273 ]
274 )
276 total_files = len(df) # Total number of files (for subtitle)
277 df["RiskScore"] = (df["Complexity"] / 80) + (
278 df["Lines"] / 2000
279 ) # define Risk score
280 top_complexity = df.nlargest(30, "Complexity") # Filter for Top 30 by Complexity
281 top_lines = df.nlargest(30, "Lines") # Top 30 by Lines
283 # --- Combine + deduplicate ---
284 df_filtered = (
285 pd.concat([top_complexity, top_lines])
286 .drop_duplicates(subset="File")
287 .sort_values("RiskScore", ascending=False)
288 .reset_index(drop=True)
289 )
291 # --- Melt AFTER filtering ---
292 df_melted = df_filtered.melt(
293 id_vars=["File", "RiskScore"],
294 value_vars=["Lines", "Complexity"],
295 var_name="Metric",
296 value_name="Value",
297 )
299 # Dynamic slider ranges
300 max_complexity = int(df_filtered["Complexity"].max())
301 max_lines = int(df_filtered["Lines"].max())
303 complexity_slider = alt.param(
304 name="ComplexityThreshold",
305 value=int(max_complexity * 0.7),
306 bind=alt.binding_range(
307 min=0, max=max_complexity, step=max(1, max_complexity // 100)
308 ),
309 )
311 lines_slider = alt.param(
312 name="LinesThreshold",
313 value=int(max_lines * 0.7),
314 bind=alt.binding_range(min=0, max=max_lines, step=max(10, max_lines // 100)),
315 )
317 show_highrisk_only = alt.param(
318 name="ShowHighRiskOnly",
319 value=False,
320 bind=alt.binding_checkbox(name="Show only high-risk files"),
321 )
323 # --- High-risk condition ---
324 highrisk_expr = (
325 (alt.datum.Metric == "Complexity") & (alt.datum.Value > complexity_slider)
326 ) | ((alt.datum.Metric == "Lines") & (alt.datum.Value > lines_slider))
328 # --- Filter expression ---
329 filter_expr = (~show_highrisk_only) | highrisk_expr
331 base = (
332 alt.Chart(df_melted)
333 .add_params(complexity_slider, lines_slider, show_highrisk_only)
334 .transform_filter(filter_expr)
335 )
337 # Color logic (clean legend restored) ---
338 color_scale = alt.condition(
339 highrisk_expr,
340 alt.value("#ff6b6b"),
341 alt.Color(
342 "Value:Q",
343 scale=alt.Scale(scheme="yellowgreenblue"),
344 legend=alt.Legend(title="Value"),
345 ),
346 )
348 # Heatmap
349 heatmap = (
350 base.mark_rect()
351 .encode(
352 x=alt.X("Metric:N", title="Metric"),
353 y=alt.Y(
354 "File:N",
355 sort=alt.SortField(field="RiskScore", order="descending"),
356 title=f"Filtered Files ({len(df_filtered)})",
357 ),
358 color=color_scale,
359 tooltip=[
360 "File",
361 "Metric",
362 "Value",
363 alt.Tooltip("RiskScore:Q", format=".2f"),
364 ],
365 )
366 .properties(
367 width=500,
368 height=450,
369 title=alt.TitleParams(
370 text="🔥 Code Risk Heatmap",
371 subtitle=["Risk heatmap", f"Based on {total_files} files"],
372 ),
373 )
374 )
376 # Text overlay
377 text = base.mark_text(size=11).encode(
378 x="Metric:N",
379 y=alt.Y("File:N", sort=alt.SortField(field="RiskScore", order="descending")),
380 text=alt.Text("Value:Q", format=".0f"),
381 color=alt.condition(highrisk_expr, alt.value("white"), alt.value("black")),
382 )
384 return heatmap + text
387def lines_of_code_overview(scanresult, width=800, height=400):
388 """Create a bar chart of top files by lines of code.
390 Displays the top 30 files ranked by lines of code, with disambiguated
391 filenames and tooltips showing full path and complexity.
393 Args:
394 scanresult (dict): Scan result containing "file_security_info".
395 width (int, optional): Chart width in pixels. Defaults to 800.
396 height (int, optional): Chart height in pixels. Defaults to 400.
398 Returns:
399 altair.Chart | str: Bar chart visualization, or a warning
400 message if no valid data is available.
401 """
402 # --- 1. Data Extraction ---
403 files_dict = scanresult.get("file_security_info", {})
404 if not files_dict:
405 return "⚠️ No file data found."
407 data = []
408 for f in files_dict.values():
409 full_path = str(f.get("FilePath", f.get("file_name", "")))
410 p = Path(full_path)
411 data.append(
412 {
413 "full_path": full_path,
414 "base_name": p.name,
415 "parent_folder": p.parent.name if len(p.parts) > 1 else "",
416 "lines": f.get("Number_Of_Lines", 0),
417 "complexity": f.get("Complexity_Score", 0),
418 }
419 )
421 df = pd.DataFrame(data)
423 if df.empty:
424 return "⚠️ No file info available."
426 total_files = len(df)
428 # --- 2. Top 30 filter based on lines ---
429 df = (
430 df.nlargest(30, "lines")
431 .sort_values("lines", ascending=False)
432 .reset_index(drop=True)
433 )
435 # --- 3. Smart Labeling: filename + parent folder only if needed ---
436 counts = df.groupby("base_name")["base_name"].transform("count")
437 df["display_name"] = [
438 (
439 f"{row['parent_folder']}/{row['base_name']}"
440 if counts.iloc[i] > 1 and row["parent_folder"]
441 else row["base_name"]
442 )
443 for i, row in df.iterrows()
444 ]
446 # --- 4. Color scale ---
447 color_scale = alt.Scale(scheme="reds", domain=[0, df["lines"].max()])
449 # --- 5. Chart ---
450 chart = alt.Chart(df).encode(
451 y=alt.Y(
452 "display_name:N",
453 sort=alt.EncodingSortField(field="lines", order="descending"),
454 title=f"Top Files ({len(df)})",
455 ),
456 x=alt.X("lines:Q", title="Lines of Code"),
457 tooltip=[
458 alt.Tooltip("full_path:N", title="Full Path"),
459 alt.Tooltip("lines:Q", title="Lines"),
460 alt.Tooltip("complexity:Q", title="Complexity"),
461 ],
462 )
464 bars = chart.mark_bar().encode(
465 color=alt.Color("lines:Q", scale=color_scale, title="LoC")
466 )
468 text = chart.mark_text(align="left", baseline="middle", dx=5).encode(
469 text=alt.Text("lines:Q", format=",")
470 )
472 return (
473 (bars + text)
474 .properties(
475 width=width,
476 height=height,
477 title=alt.TitleParams(
478 text="📊 Lines of Code per File",
479 subtitle=[f"Top {len(df)} of {total_files} files"],
480 ),
481 )
482 .configure_view(strokeWidth=0)
483 )
486def ast_nodes_overview(scanresult, width=800, height=400):
487 """Create a bar chart of top files by AST node count.
489 Displays the top 30 files ranked by AST nodes, with disambiguated
490 filenames, derived density metric, and tooltips showing file details.
492 Args:
493 scanresult (dict): Scan result containing "file_security_info".
494 width (int, optional): Chart width in pixels. Defaults to 800.
495 height (int, optional): Chart height in pixels. Defaults to 400.
497 Returns:
498 altair.Chart | str: Bar chart visualization, or a warning
499 message if no valid data is available.
500 """
501 if not scanresult or not isinstance(scanresult, dict):
502 return "⚠️ No scan result available.\n\nPlease run a scan first."
504 files = scanresult.get("file_security_info", {})
505 if not files:
506 return "⚠️ No file data found in scan result."
508 # Extract data ---
509 data = []
510 for f in files.values():
511 full_path = str(f.get("FilePath", f.get("file_name", "unknown")))
512 p = Path(full_path)
513 data.append(
514 {
515 "full_path": full_path,
516 "base_name": p.name,
517 "parent_folder": p.parent.name if len(p.parts) > 1 else "",
518 "ast_nodes": f.get("AST_Nodes", 0),
519 "lines": f.get("Number_Of_Lines", 1), # avoid div by zero
520 "complexity": f.get("Complexity_Score", 0),
521 "warnings": f.get("warnings", 0),
522 }
523 )
525 df = pd.DataFrame(data)
526 if df.empty:
527 return "⚠️ No file info available."
529 total_files = len(df)
531 # Top 30 filter by AST nodes ---
532 df = (
533 df.nlargest(30, "ast_nodes")
534 .sort_values("ast_nodes", ascending=False)
535 .reset_index(drop=True)
536 )
538 # Derived metric ---
539 df["ast_density"] = df["ast_nodes"] / df["lines"]
541 # Smart Y-axis labels ---
542 counts = df.groupby("base_name")["base_name"].transform("count")
543 df["display_name"] = [
544 (
545 f"{row['parent_folder']}/{row['base_name']}"
546 if counts.iloc[i] > 1 and row["parent_folder"]
547 else row["base_name"]
548 )
549 for i, row in df.iterrows()
550 ]
551 color_scale = alt.Scale(scheme="reds", domain=[0, df["ast_nodes"].max()])
552 threshold = df["ast_nodes"].quantile(0.75)
553 rule = (
554 alt.Chart(pd.DataFrame({"threshold": [threshold]}))
555 .mark_rule(color="black", strokeDash=[6, 4])
556 .encode(x="threshold:Q")
557 )
558 chart = alt.Chart(df).encode(
559 y=alt.Y(
560 "display_name:N",
561 sort=alt.EncodingSortField(field="ast_nodes", order="descending"),
562 title=f"Top Files ({len(df)})",
563 ),
564 x=alt.X("ast_nodes:Q", title="AST Nodes"),
565 tooltip=[
566 alt.Tooltip("full_path:N", title="Full Path"),
567 alt.Tooltip("ast_nodes:Q", title="AST Nodes"),
568 alt.Tooltip("lines:Q", title="Lines"),
569 alt.Tooltip("complexity:Q", title="Complexity"),
570 alt.Tooltip("ast_density:Q", title="AST Density", format=".2f"),
571 alt.Tooltip("warnings:Q", title="Warnings"),
572 ],
573 )
574 bars = chart.mark_bar().encode(
575 color=alt.condition(
576 "datum.warnings > 0",
577 alt.value("crimson"),
578 alt.Color("ast_nodes:Q", scale=color_scale, title="AST Nodes"),
579 )
580 )
581 text = chart.mark_text(align="left", baseline="middle", dx=5, color="black").encode(
582 text=alt.Text("ast_nodes:Q", format=",")
583 )
584 return (
585 (bars + text + rule)
586 .properties(
587 width=width,
588 height=height,
589 title=alt.TitleParams(
590 text="📊 AST Nodes per File",
591 subtitle=[f"Top {len(df)} of {total_files} files"],
592 ),
593 )
594 .configure_view(strokeWidth=0)
595 )
598def weaknesses_overview(scanresult):
599 """Create a bar chart of the most common security weaknesses.
601 Aggregates and counts validation findings across all files in the
602 scan result, displaying the top occurrences in a bar chart.
604 Args:
605 scanresult (dict): Scan result containing "file_security_info"
606 with SAST validation findings per file.
608 Returns:
609 altair.Chart: Bar chart of top security weaknesses, or a fallback
610 text chart if no data is available.
611 """
612 if not scanresult or not isinstance(scanresult, dict):
613 return (
614 alt.Chart(pd.DataFrame({"msg": ["⚠️ No scan result"]}))
615 .mark_text()
616 .encode(text="msg:N")
617 )
619 file_security_info = scanresult.get("file_security_info")
620 if not isinstance(file_security_info, dict) or len(file_security_info) == 0:
621 return (
622 alt.Chart(pd.DataFrame({"msg": ["⚠️ No file security info found"]}))
623 .mark_text()
624 .encode(text="msg:N")
625 )
627 # --- Count every 'validation' across all files ---
628 counter = Counter()
629 for file_info in file_security_info.values():
630 if not isinstance(file_info, dict):
631 continue
632 sast_result = file_info.get("sast_result")
633 if not isinstance(sast_result, dict):
634 continue
635 for finding in sast_result.values():
636 if isinstance(finding, dict):
637 validation = finding.get("validation")
638 if validation and isinstance(validation, str):
639 counter[validation] += 1
641 if not counter:
642 return (
643 alt.Chart(pd.DataFrame({"msg": ["✅ No security weaknesses found."]}))
644 .mark_text(size=20)
645 .encode(text="msg:N")
646 )
648 # --- Build DataFrame ---
649 df = pd.DataFrame(list(counter.items()), columns=["construct", "count"])
650 df = df[df["count"] > 0]
651 if df.empty:
652 return (
653 alt.Chart(pd.DataFrame({"msg": ["⚠️ No security weaknesses found"]}))
654 .mark_text(size=20)
655 .encode(text="msg:N")
656 )
658 # --- Top 50 + formatting ---
659 df = df.sort_values("count", ascending=False).head(50).reset_index(drop=True)
660 df["construct"] = df["construct"].str.slice(0, 40)
661 df["is_top5"] = df.index < 5
663 n_constructs = len(df)
665 # --- Dynamic sizing ---
666 if n_constructs == 1:
667 # Single construct → large nice rectangle
668 bar_size = 160
669 chart_height = 280
670 chart_width = 680
671 else:
672 # Multiple constructs → normal behavior
673 bar_size = None
674 chart_height = max(380, n_constructs * 22) # scale height with number of bars
675 chart_width = 550
677 # --- Bar chart with conditional size ---
678 if n_constructs == 1:
679 chart = (
680 alt.Chart(df)
681 .mark_bar(size=bar_size)
682 .encode(
683 y=alt.Y(
684 "construct:N", sort="-x", title=None, axis=alt.Axis(labelLimit=350)
685 ),
686 x=alt.X(
687 "count:Q",
688 title="Number of Occurrences",
689 scale=alt.Scale(type="sqrt"),
690 ),
691 color=alt.Color(
692 "count:Q",
693 scale=alt.Scale(scheme="reds"),
694 legend=alt.Legend(title="Count"),
695 ),
696 stroke=alt.condition(
697 alt.datum.is_top5, alt.value("black"), alt.value(None)
698 ),
699 strokeWidth=alt.condition(
700 alt.datum.is_top5, alt.value(2.5), alt.value(0)
701 ),
702 tooltip=["construct:N", "count:Q"],
703 )
704 )
705 else:
706 chart = (
707 alt.Chart(df)
708 .mark_bar()
709 .encode(
710 y=alt.Y(
711 "construct:N", sort="-x", title=None, axis=alt.Axis(labelLimit=350)
712 ),
713 x=alt.X(
714 "count:Q",
715 title="Number of Occurrences",
716 scale=alt.Scale(type="sqrt"),
717 ),
718 color=alt.Color(
719 "count:Q",
720 scale=alt.Scale(scheme="reds"),
721 legend=alt.Legend(title="Count"),
722 ),
723 stroke=alt.condition(
724 alt.datum.is_top5, alt.value("black"), alt.value(None)
725 ),
726 strokeWidth=alt.condition(
727 alt.datum.is_top5, alt.value(2.5), alt.value(0)
728 ),
729 tooltip=["construct:N", "count:Q"],
730 )
731 )
733 # --- Labels on bars ---
734 text = (
735 alt.Chart(df)
736 .mark_text(align="left", dx=5, fontSize=11, color="black")
737 .encode(y=alt.Y("construct:N", sort="-x"), x="count:Q", text="count:Q")
738 )
740 # --- Final chart ---
741 final_chart = (
742 (chart + text)
743 .properties(
744 title=alt.TitleParams(
745 text="Top Security Weaknesses (by Validation)",
746 anchor="start",
747 fontSize=15,
748 ),
749 width=chart_width,
750 height=chart_height,
751 padding={"left": 10, "right": 35, "top": 15, "bottom": 10},
752 )
753 .configure_view(stroke=None)
754 .configure_axis(grid=False, labelFontSize=12, titleFontSize=13)
755 )
757 return final_chart
760def sast_files_overview(scanresult):
761 """Create a bar chart of security issues per file.
763 Aggregates SAST findings across files and visualizes the number of
764 security issues per file. Filenames are disambiguated using the
765 parent folder when duplicates exist.
767 Args:
768 scanresult (dict): Scan result containing "file_security_info"
769 with per-file SAST findings and metadata.
771 Returns:
772 altair.Chart: Bar chart of files with security issues, or a
773 fallback text chart if no valid data is available.
774 """
775 if not isinstance(scanresult, dict) or not scanresult:
776 return (
777 alt.Chart(pd.DataFrame({"msg": ["⚠️ No scan result"]}))
778 .mark_text(size=20)
779 .encode(text="msg:N")
780 )
782 file_security_info = scanresult.get("file_security_info")
783 if not isinstance(file_security_info, dict) or not file_security_info:
784 return (
785 alt.Chart(pd.DataFrame({"msg": ["⚠️ No file security info found"]}))
786 .mark_text()
787 .encode(text="msg:N")
788 )
790 records = []
791 for file_info in file_security_info.values():
792 if not isinstance(file_info, dict):
793 continue
795 sast_result = file_info.get("sast_result")
796 if not isinstance(sast_result, dict) or not sast_result:
797 continue
799 filepath = file_info.get("FilePath") or file_info.get("file_name", "")
800 path_obj = Path(str(filepath))
802 base_name = file_info.get("FileName") or path_obj.name or "Unknown"
803 parent_folder = path_obj.parent.name if len(path_obj.parts) > 1 else None
804 if parent_folder in ("", ".", "/"):
805 parent_folder = None
807 records.append(
808 {
809 "base_name": base_name,
810 "parent_folder": parent_folder,
811 "full_path": str(filepath),
812 "issues": len(sast_result),
813 "complexity": file_info.get("Complexity_Score", 0),
814 }
815 )
817 if not records:
818 return (
819 alt.Chart(pd.DataFrame({"msg": ["✅ No security weaknesses identified."]}))
820 .mark_text(size=14)
821 .encode(text="msg:N")
822 )
824 df = pd.DataFrame(records)
826 # --- Smart labeling for duplicates ---
827 name_counts = df.groupby("base_name")["base_name"].transform("count")
828 df["display_name"] = [
829 (
830 f"{row.parent_folder}/{row.base_name}"
831 if name_counts.iloc[i] > 1 and row.parent_folder
832 else row.base_name
833 )
834 for i, row in df.iterrows()
835 ]
837 # --- Sort by issue count ---
838 df = df.sort_values("issues", ascending=False).reset_index(drop=True)
840 # --- Chart ---
841 base_chart = alt.Chart(df).encode(
842 y=alt.Y(
843 "display_name:N",
844 sort="-x",
845 title=None,
846 axis=alt.Axis(labelLimit=420, labelFontSize=12),
847 ),
848 x=alt.X(
849 "issues:Q", title="Number of Security Issues", axis=alt.Axis(tickMinStep=1)
850 ),
851 color=alt.Color(
852 "issues:Q",
853 scale=alt.Scale(scheme="orangered"),
854 legend=alt.Legend(title="Issues"),
855 ),
856 tooltip=[
857 alt.Tooltip("display_name:N", title="File"),
858 alt.Tooltip("issues:Q", title="Security Issues"),
859 alt.Tooltip("complexity:Q", title="Complexity Score"),
860 alt.Tooltip("full_path:N", title="Full Path"),
861 ],
862 )
864 bars = base_chart.mark_bar(cornerRadiusEnd=6, size=22)
866 labels = base_chart.mark_text(
867 align="left", baseline="middle", dx=8, fontSize=12, fontWeight="bold"
868 ).encode(text="issues:Q")
870 chart = (
871 (bars + labels)
872 .properties(
873 title=alt.TitleParams(
874 text=f"Files with Security Issues — {scanresult.get('package_name', 'Unknown Package')}",
875 subtitle=f"Total files with findings: {len(df)}",
876 anchor="start",
877 fontSize=16,
878 subtitleFontSize=12,
879 ),
880 width=720,
881 height=max(340, len(df) * 28),
882 )
883 .configure_view(stroke=None)
884 .configure_axis(
885 grid=True, gridColor="#f0f0f0", labelFontSize=12, titleFontSize=13
886 )
887 )
889 return chart
892def weaknesses_radial_overview(scanresult):
893 """
894 Returns a radial (polar area) chart showing the number of times each 'validation'
895 appears across all files in the full scan result.
896 """
897 # --- Input validation ---
898 if not scanresult or not isinstance(scanresult, dict):
899 return (
900 alt.Chart(pd.DataFrame({"msg": ["⚠️ No scan result"]}))
901 .mark_text()
902 .encode(text="msg:N")
903 )
905 file_security_info = scanresult.get("file_security_info")
906 if not isinstance(file_security_info, dict) or len(file_security_info) == 0:
907 return (
908 alt.Chart(pd.DataFrame({"msg": ["⚠️ No file security info found"]}))
909 .mark_text()
910 .encode(text="msg:N")
911 )
913 # --- Count every 'validation' across all files ---
914 counter = Counter()
915 for file_info in file_security_info.values():
916 if not isinstance(file_info, dict):
917 continue
918 sast_result = file_info.get("sast_result")
919 if not isinstance(sast_result, dict):
920 continue
921 for finding in sast_result.values():
922 if isinstance(finding, dict):
923 validation = finding.get("validation")
924 if validation and isinstance(validation, str):
925 counter[validation] += 1
927 if not counter:
928 return (
929 alt.Chart(
930 pd.DataFrame(
931 {
932 "msg": [
933 "✅ No security weaknesses found. No radial chart created."
934 ]
935 }
936 )
937 )
938 .mark_text(size=14)
939 .encode(text="msg:N")
940 )
942 # --- Build DataFrame ---
943 df = pd.DataFrame(list(counter.items()), columns=["construct", "count"])
944 df = df[df["count"] > 0]
945 if df.empty:
946 return (
947 alt.Chart(
948 pd.DataFrame(
949 {
950 "msg": [
951 "✅ No security weaknesses found. No radial chart created."
952 ]
953 }
954 )
955 )
956 .mark_text(size=14)
957 .encode(text="msg:N")
958 )
960 # --- Top 50 + formatting ---
961 df = df.sort_values("count", ascending=False).head(50).reset_index(drop=True)
962 df["construct"] = df["construct"].str.slice(0, 40)
963 df["legend_label"] = df["construct"] + " (" + df["count"].astype(str) + ")"
965 # --- Compute fractions and angles for polar area chart ---
966 total = df["count"].sum()
967 df["fraction"] = df["count"] / total
969 if len(df) == 1:
970 # Only one construct → full circle
971 df["theta0"] = 0
972 df["theta1"] = 1
973 inner_radius = 120 # larger inner radius for single construct
974 radius_scale = alt.Scale(
975 type="sqrt", zero=True, domain=[0, df["count"].max() * 1.2]
976 )
977 else:
978 df["theta0"] = df["fraction"].cumsum() - df["fraction"]
979 df["theta1"] = df["fraction"].cumsum()
980 inner_radius = 20
981 radius_scale = alt.Scale(type="sqrt", zero=True)
983 # --- Radial chart ---
984 chart = (
985 alt.Chart(df)
986 .mark_arc(innerRadius=inner_radius)
987 .encode(
988 theta=alt.Theta("theta1:Q", stack=None, title=None),
989 theta2="theta0:Q",
990 radius=alt.Radius("count:Q", scale=radius_scale),
991 color=alt.Color(
992 "legend_label:N",
993 scale=alt.Scale(scheme="category20"),
994 legend=alt.Legend(title="Weaknesses (Count)"),
995 ),
996 tooltip=["construct:N", "count:Q"],
997 )
998 .properties(title="Overview of Security Weaknesses", width=600, height=600)
999 )
1001 return chart