44import random
55from itertools import chain
66from pathlib import Path
7- from typing import Any , TypedDict
7+ from typing import Any , Literal , TypedDict
88
99from jinja2 import Environment , FileSystemLoader , PackageLoader
1010
@@ -49,6 +49,35 @@ class DiffStatistics(TypedDict):
4949 merged_by_project : list [MergedProjectStats ]
5050
5151
52+ _FAILURE_STATUS_LABELS = {
53+ "new" : "❌ newly failing" ,
54+ "persistent" : "➖ persistent" ,
55+ "fixed" : "🎉 crashes fixed" ,
56+ }
57+
58+
59+ def _failure_status_label (status : str ) -> str :
60+ return _FAILURE_STATUS_LABELS .get (status , "—" )
61+
62+
63+ def _failure_descriptor (
64+ project : dict [str , Any ], direction : Literal ["new" , "fixed" ]
65+ ) -> str :
66+ """Describe the kind of failure for `direction` ('new' or 'fixed')."""
67+ status_key = "new_status" if direction == "new" else "old_status"
68+ status = project .get (status_key , "" )
69+ panic_messages_key = (
70+ "introduced_panic_messages" if direction == "new" else "fixed_panic_messages"
71+ )
72+ if project .get (panic_messages_key ):
73+ return "panic"
74+ if status == "timeout" :
75+ return "timeout"
76+ if status == "abnormal exit" :
77+ return "crash"
78+ return "failure"
79+
80+
5281class DiagnosticDiff :
5382 """Class for comparing diagnostic data between two JSON files."""
5483
@@ -289,16 +318,43 @@ def _compute_diffs(self) -> dict[str, Any]:
289318 old_failed , old_status = self ._is_project_failed (old_project )
290319 new_failed , new_status = self ._is_project_failed (new_project )
291320
321+ old_panics = list (old_project .get ("panic_messages" , []))
322+ new_panics = list (new_project .get ("panic_messages" , []))
323+
292324 if old_failed or new_failed :
325+ old_panic_set = set (old_panics )
326+ new_panic_set = set (new_panics )
327+ introduced_panics = sorted (new_panic_set - old_panic_set )
328+ fixed_panics = sorted (old_panic_set - new_panic_set )
329+ persistent_panics = sorted (new_panic_set & old_panic_set )
330+
331+ # `failure_status` celebrates or flags only *overall* state
332+ # transitions — going from passing to failing (or vice
333+ # versa). A project that stays failing but picks up a new
334+ # panic still counts as a regression ("new"). A project
335+ # that stays failing but loses a panic is still failing —
336+ # surface the partial improvement in the HTML report and
337+ # raw diff, but don't celebrate it in the PR title/table.
338+ if introduced_panics or (not old_failed and new_failed ):
339+ failure_status = "new"
340+ elif old_failed and not new_failed :
341+ failure_status = "fixed"
342+ else :
343+ failure_status = "persistent"
344+
293345 result ["failed_projects" ].append ({
294346 "project" : project_name ,
295347 "project_location" : new_project .get ("project_location" , "" ),
296348 "old_status" : old_status ,
297349 "new_status" : new_status ,
298350 "old_return_code" : old_project .get ("return_code" ),
299351 "new_return_code" : new_project .get ("return_code" ),
300- "old_panic_messages" : old_project .get ("panic_messages" , []),
301- "new_panic_messages" : new_project .get ("panic_messages" , []),
352+ "old_panic_messages" : old_panics ,
353+ "new_panic_messages" : new_panics ,
354+ "introduced_panic_messages" : introduced_panics ,
355+ "fixed_panic_messages" : fixed_panics ,
356+ "persistent_panic_messages" : persistent_panics ,
357+ "failure_status" : failure_status ,
302358 })
303359 # Skip detailed diff analysis for failed projects
304360 continue
@@ -433,20 +489,32 @@ def _compute_diffs(self) -> dict[str, Any]:
433489 )
434490 result ["modified_projects" ].append (entry )
435491
436- # Sort failed projects to prioritize abnormal exits over timeouts
492+ # Sort failed projects so PR reviewers see new regressions first,
493+ # then persistent failures, then anything that was fixed.
494+ failure_status_priority = {"new" : 3 , "persistent" : 2 , "fixed" : 1 }
495+
437496 def failed_project_sort_key (project ):
497+ status_priority = failure_status_priority .get (
498+ project .get ("failure_status" , "persistent" ), 0
499+ )
438500 old_abnormal = project ["old_status" ] == "abnormal exit"
439501 new_abnormal = project ["new_status" ] == "abnormal exit"
440502 old_timeout = project ["old_status" ] == "timeout"
441503 new_timeout = project ["new_status" ] == "timeout"
442504
443505 if old_abnormal or new_abnormal :
444- return (2 , project ["project" ]) # Abnormal exits first
445- if old_timeout or new_timeout :
446- return (1 , project ["project" ]) # Timeouts second
447- return (0 , project ["project" ]) # Other failures last
506+ exit_priority = 2 # Abnormal exits (incl. panics/stack overflows) first
507+ elif old_timeout or new_timeout :
508+ exit_priority = 1 # Timeouts second
509+ else :
510+ exit_priority = 0
511+
512+ # Negate so the natural sort puts the highest-priority entries
513+ # at the top without relying on `reverse=True` (which would also
514+ # reverse the project-name tiebreaker).
515+ return (- status_priority , - exit_priority , project ["project" ])
448516
449- result ["failed_projects" ].sort (key = failed_project_sort_key , reverse = True )
517+ result ["failed_projects" ].sort (key = failed_project_sort_key )
450518
451519 return result
452520
@@ -889,33 +957,41 @@ def introduced_project_failures(self) -> list[str]:
889957
890958 return introduced
891959
892- def has_new_panics (self ) -> bool :
893- """Check if any project has new panic messages that weren't present before."""
894- for project in self .diffs .get ("failed_projects" , []):
895- old_panics = project .get ("old_panic_messages" , [])
896- new_panics = project .get ("new_panic_messages" , [])
897- if set (new_panics ) - set (old_panics ):
898- return True
899- return False
960+ def has_new_failures (self ) -> bool :
961+ """Check if any project regressed from passing to failing."""
962+ return any (
963+ project .get ("failure_status" ) == "new"
964+ for project in self .diffs .get ("failed_projects" , [])
965+ )
900966
901- def has_new_timeouts (self ) -> bool :
902- """Check if any project newly timed out (was not a timeout before) ."""
967+ def has_fixed_failures (self ) -> bool :
968+ """Check if any project went from failing to passing ."""
903969 return any (
904- project [ "new_status" ] == "timeout" and project [ "old_status" ] != "timeout "
970+ project . get ( "failure_status" ) == "fixed "
905971 for project in self .diffs .get ("failed_projects" , [])
906972 )
907973
908974 def generate_comment_title (self ) -> str :
909- """Generate the PR comment title with status indicators for panics and timeouts ."""
975+ """Generate the PR comment title with status indicators for new failures ."""
910976 title = "## `ecosystem-analyzer` results"
911- new_panics = self .has_new_panics ()
912- new_timeouts = self .has_new_timeouts ()
913- if new_panics and not new_timeouts :
914- title += ": new panics detected ❌"
915- elif new_timeouts and not new_panics :
916- title += ": new timeouts detected ❌"
917- elif new_panics and new_timeouts :
918- title += ": new panics/timeouts detected ❌"
977+ new_projects = [
978+ project
979+ for project in self .diffs .get ("failed_projects" , [])
980+ if project .get ("failure_status" ) == "new"
981+ ]
982+ if new_projects :
983+ # If every regression is a timeout, say so — a blanket "new
984+ # crashes detected" would be misleading. Any panic or crash
985+ # in the mix falls back to "crashes" so the title stays short.
986+ if all (
987+ _failure_descriptor (project , "new" ) == "timeout"
988+ for project in new_projects
989+ ):
990+ title += ": new timeouts detected ❌"
991+ else :
992+ title += ": new crashes detected ❌"
993+ elif self .has_fixed_failures ():
994+ title += ": ecosystem failure fixed 🎉"
919995 return title
920996
921997 def _render_raw_diff_sections (
@@ -971,14 +1047,41 @@ def add_entry(
9711047 sections .setdefault (header , []).append ((lines , counts_as_change ))
9721048
9731049 for project in self .diffs ["failed_projects" ]:
1050+ status = project .get ("failure_status" )
1051+ fixed_panics = project .get ("fixed_panic_messages" , [])
1052+ # Persistent failures with no panic delta aren't PR-specific
1053+ # news; they live in the HTML report instead of the comment's
1054+ # raw diff. Persistent-but-partially-improved failures (some
1055+ # panics resolved, project still failing) are surfaced here so
1056+ # reviewers can see the partial progress.
1057+ if status == "persistent" and not fixed_panics :
1058+ continue
1059+ # Prefix drives GitHub's `diff` highlighting: `-` is red (bad),
1060+ # `+` is green (good). Newly failing → red; fully fixed →
1061+ # green; partial fix on a still-failing project → green line
1062+ # noting the improvement.
1063+ if status == "new" :
1064+ line = (
1065+ f"- FAILED "
1066+ f"old={ project ['old_status' ]} ({ project .get ('old_return_code' )} ) "
1067+ f"new={ project ['new_status' ]} ({ project .get ('new_return_code' )} )"
1068+ )
1069+ elif status == "fixed" :
1070+ line = (
1071+ f"+ FIXED "
1072+ f"old={ project ['old_status' ]} ({ project .get ('old_return_code' )} ) "
1073+ f"new={ project ['new_status' ]} ({ project .get ('new_return_code' )} )"
1074+ )
1075+ else :
1076+ line = (
1077+ f"+ PARTIAL FIX { len (fixed_panics )} "
1078+ f"panic{ 's' if len (fixed_panics ) != 1 else '' } "
1079+ f"resolved, project still failing"
1080+ )
9741081 add_entry (
9751082 project ["project" ],
9761083 project .get ("project_location" ),
977- [
978- "- "
979- f"FAILED old={ project ['old_status' ]} ({ project .get ('old_return_code' )} ) "
980- f"new={ project ['new_status' ]} ({ project .get ('new_return_code' )} )"
981- ],
1084+ [line ],
9821085 counts_as_change = False ,
9831086 )
9841087
@@ -1079,19 +1182,39 @@ def render_statistics_markdown(
10791182
10801183 markdown_content = self .generate_comment_title () + "\n \n "
10811184
1082- if failed_projects :
1185+ # Projects that were already failing on the baseline and are still
1186+ # failing on the PR aren't news — omit them from the PR-comment
1187+ # summary table so reviewers can focus on what changed. They
1188+ # still show up in the full HTML report.
1189+ table_projects = [
1190+ project
1191+ for project in failed_projects
1192+ if project .get ("failure_status" ) != "persistent"
1193+ ]
1194+
1195+ if table_projects :
10831196 markdown_content += "**Failing projects**:\n \n "
1084- markdown_content += "| Project | Old Status | New Status | Old Return Code | New Return Code |\n "
1085- markdown_content += "|---------|------------|------------|-----------------|------------------|\n "
1197+ markdown_content += (
1198+ "| Project | Status | Old Status | New Status | "
1199+ "Old Return Code | New Return Code |\n "
1200+ )
1201+ markdown_content += (
1202+ "|---------|--------|------------|------------|"
1203+ "-----------------|------------------|\n "
1204+ )
10861205
1087- for project in failed_projects :
1206+ for project in table_projects :
10881207 old_status = project ["old_status" ]
10891208 new_status = project ["new_status" ]
10901209 old_rc = project .get ("old_return_code" , "None" )
10911210 new_rc = project .get ("new_return_code" , "None" )
1211+ status_label = _failure_status_label (
1212+ project .get ("failure_status" , "persistent" )
1213+ )
10921214
10931215 markdown_content += (
1094- f"| `{ project ['project' ]} ` | { old_status } | { new_status } | "
1216+ f"| `{ project ['project' ]} ` | { status_label } | "
1217+ f"{ old_status } | { new_status } | "
10951218 f"`{ old_rc } ` | `{ new_rc } ` |\n "
10961219 )
10971220
@@ -1104,7 +1227,7 @@ def render_statistics_markdown(
11041227 ):
11051228 markdown_content += "No diagnostic changes detected ✅\n "
11061229 else :
1107- if failed_projects :
1230+ if table_projects :
11081231 markdown_content += "**Diagnostic changes:**\n "
11091232
11101233 markdown_content += """
@@ -1283,6 +1406,8 @@ def generate_html_report(self, output_path: str) -> None:
12831406 "new_diagnostics" : self .new_diagnostics ,
12841407 "diffs" : self .diffs ,
12851408 "statistics" : statistics ,
1409+ "failure_descriptor" : _failure_descriptor ,
1410+ "failure_status_labels" : _FAILURE_STATUS_LABELS ,
12861411 }
12871412
12881413 # Ensure the output directory exists
0 commit comments