astral-sh
diff --git a/‎AGENTS.md‎
Lines changed: 1 addition & 1 deletion b/‎AGENTS.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/ecosystem_analyzer/diff.py‎
Lines changed: 165 additions & 40 deletions b/‎src/ecosystem_analyzer/diff.py‎
Lines changed: 165 additions & 40 deletions
diff --git a/‎src/ecosystem_analyzer/main.py‎
Lines changed: 37 additions & 9 deletions b/‎src/ecosystem_analyzer/main.py‎
Lines changed: 37 additions & 9 deletions
@@ -24,7 +24,7 @@ Use `uv` for all local work.
 
 The CI job checks out `ruff` (which includes `ty`'s implementation), copies `.github/ty-ecosystem.toml` into `~/.config/ty/ty.toml`, creates two local branches (`new_commit` at the PR head and `old_commit` at the merge base against the PR base branch), and snapshots the primer project lists from each revision. It installs `ecosystem-analyzer` with `uv tool install` from a pinned Git commit, then runs:
 
-- `ecosystem-analyzer --repository ruff --flaky-runs 10 diff ...`: compare diagnostics between `old_commit` and `new_commit` using the old/new/flaky project lists.
+- `ecosystem-analyzer --repository ruff --flaky-runs 10 diff ...`: compare diagnostics between `old_commit` and `new_commit` using the old/new/flaky project lists. The `--projects-old` and `--projects-new` options are optional; when omitted, each side defaults to every project known to `mypy_primer`.
 - `ecosystem-analyzer generate-diff ...`: produce the HTML diagnostics diff report.
 - `ecosystem-analyzer generate-diff-statistics ...`: produce the Markdown summary that is posted back to the PR.
 - `ecosystem-analyzer generate-timing-diff ...`: produce the HTML timing comparison report.
 
@@ -4,7 +4,7 @@
 import random
 from itertools import chain
 from pathlib import Path
-from typing import Any, TypedDict
+from typing import Any, Literal, TypedDict
 
 from jinja2 import Environment, FileSystemLoader, PackageLoader
 
@@ -49,6 +49,35 @@ class DiffStatistics(TypedDict):
     merged_by_project: list[MergedProjectStats]
 
 
+_FAILURE_STATUS_LABELS = {
+    "new": "❌ newly failing",
+    "persistent": "➖ persistent",
+    "fixed": "🎉 crashes fixed",
+}
+
+
+def _failure_status_label(status: str) -> str:
+    return _FAILURE_STATUS_LABELS.get(status, "—")
+
+
+def _failure_descriptor(
+    project: dict[str, Any], direction: Literal["new", "fixed"]
+) -> str:
+    """Describe the kind of failure for `direction` ('new' or 'fixed')."""
+    status_key = "new_status" if direction == "new" else "old_status"
+    status = project.get(status_key, "")
+    panic_messages_key = (
+        "introduced_panic_messages" if direction == "new" else "fixed_panic_messages"
+    )
+    if project.get(panic_messages_key):
+        return "panic"
+    if status == "timeout":
+        return "timeout"
+    if status == "abnormal exit":
+        return "crash"
+    return "failure"
+
+
 class DiagnosticDiff:
     """Class for comparing diagnostic data between two JSON files."""
 
@@ -289,16 +318,43 @@ def _compute_diffs(self) -> dict[str, Any]:
             old_failed, old_status = self._is_project_failed(old_project)
             new_failed, new_status = self._is_project_failed(new_project)
 
+            old_panics = list(old_project.get("panic_messages", []))
+            new_panics = list(new_project.get("panic_messages", []))
+
             if old_failed or new_failed:
+                old_panic_set = set(old_panics)
+                new_panic_set = set(new_panics)
+                introduced_panics = sorted(new_panic_set - old_panic_set)
+                fixed_panics = sorted(old_panic_set - new_panic_set)
+                persistent_panics = sorted(new_panic_set & old_panic_set)
+
+                # `failure_status` celebrates or flags only *overall* state
+                # transitions — going from passing to failing (or vice
+                # versa).  A project that stays failing but picks up a new
+                # panic still counts as a regression ("new").  A project
+                # that stays failing but loses a panic is still failing —
+                # surface the partial improvement in the HTML report and
+                # raw diff, but don't celebrate it in the PR title/table.
+                if introduced_panics or (not old_failed and new_failed):
+                    failure_status = "new"
+                elif old_failed and not new_failed:
+                    failure_status = "fixed"
+                else:
+                    failure_status = "persistent"
+
                 result["failed_projects"].append({
                     "project": project_name,
                     "project_location": new_project.get("project_location", ""),
                     "old_status": old_status,
                     "new_status": new_status,
                     "old_return_code": old_project.get("return_code"),
                     "new_return_code": new_project.get("return_code"),
-                    "old_panic_messages": old_project.get("panic_messages", []),
-                    "new_panic_messages": new_project.get("panic_messages", []),
+                    "old_panic_messages": old_panics,
+                    "new_panic_messages": new_panics,
+                    "introduced_panic_messages": introduced_panics,
+                    "fixed_panic_messages": fixed_panics,
+                    "persistent_panic_messages": persistent_panics,
+                    "failure_status": failure_status,
                 })
                 # Skip detailed diff analysis for failed projects
                 continue
@@ -433,20 +489,32 @@ def _compute_diffs(self) -> dict[str, Any]:
                     )
                 result["modified_projects"].append(entry)
 
-        # Sort failed projects to prioritize abnormal exits over timeouts
+        # Sort failed projects so PR reviewers see new regressions first,
+        # then persistent failures, then anything that was fixed.
+        failure_status_priority = {"new": 3, "persistent": 2, "fixed": 1}
+
         def failed_project_sort_key(project):
+            status_priority = failure_status_priority.get(
+                project.get("failure_status", "persistent"), 0
+            )
             old_abnormal = project["old_status"] == "abnormal exit"
             new_abnormal = project["new_status"] == "abnormal exit"
             old_timeout = project["old_status"] == "timeout"
             new_timeout = project["new_status"] == "timeout"
 
             if old_abnormal or new_abnormal:
-                return (2, project["project"])  # Abnormal exits first
-            if old_timeout or new_timeout:
-                return (1, project["project"])  # Timeouts second
-            return (0, project["project"])  # Other failures last
+                exit_priority = 2  # Abnormal exits (incl. panics/stack overflows) first
+            elif old_timeout or new_timeout:
+                exit_priority = 1  # Timeouts second
+            else:
+                exit_priority = 0
+
+            # Negate so the natural sort puts the highest-priority entries
+            # at the top without relying on `reverse=True` (which would also
+            # reverse the project-name tiebreaker).
+            return (-status_priority, -exit_priority, project["project"])
 
-        result["failed_projects"].sort(key=failed_project_sort_key, reverse=True)
+        result["failed_projects"].sort(key=failed_project_sort_key)
 
         return result
 
@@ -889,33 +957,41 @@ def introduced_project_failures(self) -> list[str]:
 
         return introduced
 
-    def has_new_panics(self) -> bool:
-        """Check if any project has new panic messages that weren't present before."""
-        for project in self.diffs.get("failed_projects", []):
-            old_panics = project.get("old_panic_messages", [])
-            new_panics = project.get("new_panic_messages", [])
-            if set(new_panics) - set(old_panics):
-                return True
-        return False
+    def has_new_failures(self) -> bool:
+        """Check if any project regressed from passing to failing."""
+        return any(
+            project.get("failure_status") == "new"
+            for project in self.diffs.get("failed_projects", [])
+        )
 
-    def has_new_timeouts(self) -> bool:
-        """Check if any project newly timed out (was not a timeout before)."""
+    def has_fixed_failures(self) -> bool:
+        """Check if any project went from failing to passing."""
         return any(
-            project["new_status"] == "timeout" and project["old_status"] != "timeout"
+            project.get("failure_status") == "fixed"
             for project in self.diffs.get("failed_projects", [])
         )
 
     def generate_comment_title(self) -> str:
-        """Generate the PR comment title with status indicators for panics and timeouts."""
+        """Generate the PR comment title with status indicators for new failures."""
         title = "## `ecosystem-analyzer` results"
-        new_panics = self.has_new_panics()
-        new_timeouts = self.has_new_timeouts()
-        if new_panics and not new_timeouts:
-            title += ": new panics detected ❌"
-        elif new_timeouts and not new_panics:
-            title += ": new timeouts detected ❌"
-        elif new_panics and new_timeouts:
-            title += ": new panics/timeouts detected ❌"
+        new_projects = [
+            project
+            for project in self.diffs.get("failed_projects", [])
+            if project.get("failure_status") == "new"
+        ]
+        if new_projects:
+            # If every regression is a timeout, say so — a blanket "new
+            # crashes detected" would be misleading.  Any panic or crash
+            # in the mix falls back to "crashes" so the title stays short.
+            if all(
+                _failure_descriptor(project, "new") == "timeout"
+                for project in new_projects
+            ):
+                title += ": new timeouts detected ❌"
+            else:
+                title += ": new crashes detected ❌"
+        elif self.has_fixed_failures():
+            title += ": ecosystem failure fixed 🎉"
         return title
 
     def _render_raw_diff_sections(
@@ -971,14 +1047,41 @@ def add_entry(
             sections.setdefault(header, []).append((lines, counts_as_change))
 
         for project in self.diffs["failed_projects"]:
+            status = project.get("failure_status")
+            fixed_panics = project.get("fixed_panic_messages", [])
+            # Persistent failures with no panic delta aren't PR-specific
+            # news; they live in the HTML report instead of the comment's
+            # raw diff.  Persistent-but-partially-improved failures (some
+            # panics resolved, project still failing) are surfaced here so
+            # reviewers can see the partial progress.
+            if status == "persistent" and not fixed_panics:
+                continue
+            # Prefix drives GitHub's `diff` highlighting: `-` is red (bad),
+            # `+` is green (good).  Newly failing → red; fully fixed →
+            # green; partial fix on a still-failing project → green line
+            # noting the improvement.
+            if status == "new":
+                line = (
+                    f"- FAILED "
+                    f"old={project['old_status']}({project.get('old_return_code')}) "
+                    f"new={project['new_status']}({project.get('new_return_code')})"
+                )
+            elif status == "fixed":
+                line = (
+                    f"+ FIXED "
+                    f"old={project['old_status']}({project.get('old_return_code')}) "
+                    f"new={project['new_status']}({project.get('new_return_code')})"
+                )
+            else:
+                line = (
+                    f"+ PARTIAL FIX {len(fixed_panics)} "
+                    f"panic{'s' if len(fixed_panics) != 1 else ''} "
+                    f"resolved, project still failing"
+                )
             add_entry(
                 project["project"],
                 project.get("project_location"),
-                [
-                    "- "
-                    f"FAILED old={project['old_status']}({project.get('old_return_code')}) "
-                    f"new={project['new_status']}({project.get('new_return_code')})"
-                ],
+                [line],
                 counts_as_change=False,
             )
 
@@ -1079,19 +1182,39 @@ def render_statistics_markdown(
 
         markdown_content = self.generate_comment_title() + "\n\n"
 
-        if failed_projects:
+        # Projects that were already failing on the baseline and are still
+        # failing on the PR aren't news — omit them from the PR-comment
+        # summary table so reviewers can focus on what changed.  They
+        # still show up in the full HTML report.
+        table_projects = [
+            project
+            for project in failed_projects
+            if project.get("failure_status") != "persistent"
+        ]
+
+        if table_projects:
             markdown_content += "**Failing projects**:\n\n"
-            markdown_content += "| Project | Old Status | New Status | Old Return Code | New Return Code |\n"
-            markdown_content += "|---------|------------|------------|-----------------|------------------|\n"
+            markdown_content += (
+                "| Project | Status | Old Status | New Status | "
+                "Old Return Code | New Return Code |\n"
+            )
+            markdown_content += (
+                "|---------|--------|------------|------------|"
+                "-----------------|------------------|\n"
+            )
 
-            for project in failed_projects:
+            for project in table_projects:
                 old_status = project["old_status"]
                 new_status = project["new_status"]
                 old_rc = project.get("old_return_code", "None")
                 new_rc = project.get("new_return_code", "None")
+                status_label = _failure_status_label(
+                    project.get("failure_status", "persistent")
+                )
 
                 markdown_content += (
-                    f"| `{project['project']}` | {old_status} | {new_status} | "
+                    f"| `{project['project']}` | {status_label} | "
+                    f"{old_status} | {new_status} | "
                     f"`{old_rc}` | `{new_rc}` |\n"
                 )
 
@@ -1104,7 +1227,7 @@ def render_statistics_markdown(
         ):
             markdown_content += "No diagnostic changes detected ✅\n"
         else:
-            if failed_projects:
+            if table_projects:
                 markdown_content += "**Diagnostic changes:**\n"
 
             markdown_content += """
@@ -1283,6 +1406,8 @@ def generate_html_report(self, output_path: str) -> None:
             "new_diagnostics": self.new_diagnostics,
             "diffs": self.diffs,
             "statistics": statistics,
+            "failure_descriptor": _failure_descriptor,
+            "failure_status_labels": _FAILURE_STATUS_LABELS,
         }
 
         # Ensure the output directory exists
 
@@ -9,7 +9,7 @@
 from .diff import DiagnosticDiff
 from .ecosystem_report import generate
 from .git import get_latest_ty_commits, resolve_ty_repo
-from .manager import Manager
+from .manager import Manager, get_all_project_names
 
 logger = logging.getLogger(__name__)
 
@@ -212,15 +212,17 @@ def analyze(
 @cli.command()
 @click.option(
     "--projects-old",
-    help="List to a file with projects to analyze for old commit",
+    help="File with projects to analyze for the old commit "
+    "(defaults to every mypy_primer project if omitted)",
     type=click.Path(exists=True, dir_okay=False, readable=True),
-    required=True,
+    required=False,
 )
 @click.option(
     "--projects-new",
-    help="List to a file with projects to analyze for new commit",
+    help="File with projects to analyze for the new commit "
+    "(defaults to every mypy_primer project if omitted)",
     type=click.Path(exists=True, dir_okay=False, readable=True),
-    required=True,
+    required=False,
 )
 @click.option(
     "--old",
@@ -292,8 +294,8 @@ def analyze(
 @click.pass_context
 def diff(
     ctx,
-    projects_old: str,
-    projects_new: str,
+    projects_old: str | None,
+    projects_new: str | None,
     old: str,
     new: str,
     output_old: str,
@@ -328,8 +330,34 @@ def diff(
     ):
         raise click.UsageError(f"--shard must be in range [0, {num_shards})")
 
-    project_names_old = Path(projects_old).read_text().splitlines()
-    project_names_new = Path(projects_new).read_text().splitlines()
+    if projects_old is None or projects_new is None:
+        default_project_names = get_all_project_names()
+        defaulted = [
+            name
+            for name, value in (
+                ("--projects-old", projects_old),
+                ("--projects-new", projects_new),
+            )
+            if value is None
+        ]
+        logger.info(
+            "Defaulting %s to all %d mypy_primer projects",
+            "/".join(defaulted),
+            len(default_project_names),
+        )
+    else:
+        default_project_names = []
+
+    project_names_old = (
+        Path(projects_old).read_text().splitlines()
+        if projects_old is not None
+        else list(default_project_names)
+    )
+    project_names_new = (
+        Path(projects_new).read_text().splitlines()
+        if projects_new is not None
+        else list(default_project_names)
+    )
     flaky_project_names = (
         set(Path(projects_flaky).read_text().splitlines()) if projects_flaky else None
     )