Skip to content

Commit 5ac0ec4

Browse files
committed
Run on all mypy_primer projects
1 parent 8d68649 commit 5ac0ec4

6 files changed

Lines changed: 866 additions & 61 deletions

File tree

AGENTS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Use `uv` for all local work.
2424

2525
The CI job checks out `ruff` (which includes `ty`'s implementation), copies `.github/ty-ecosystem.toml` into `~/.config/ty/ty.toml`, creates two local branches (`new_commit` at the PR head and `old_commit` at the merge base against the PR base branch), and snapshots the primer project lists from each revision. It installs `ecosystem-analyzer` with `uv tool install` from a pinned Git commit, then runs:
2626

27-
- `ecosystem-analyzer --repository ruff --flaky-runs 10 diff ...`: compare diagnostics between `old_commit` and `new_commit` using the old/new/flaky project lists.
27+
- `ecosystem-analyzer --repository ruff --flaky-runs 10 diff ...`: compare diagnostics between `old_commit` and `new_commit` using the old/new/flaky project lists. The `--projects-old` and `--projects-new` options are optional; when omitted, each side defaults to every project known to `mypy_primer`.
2828
- `ecosystem-analyzer generate-diff ...`: produce the HTML diagnostics diff report.
2929
- `ecosystem-analyzer generate-diff-statistics ...`: produce the Markdown summary that is posted back to the PR.
3030
- `ecosystem-analyzer generate-timing-diff ...`: produce the HTML timing comparison report.

src/ecosystem_analyzer/diff.py

Lines changed: 165 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import random
55
from itertools import chain
66
from pathlib import Path
7-
from typing import Any, TypedDict
7+
from typing import Any, Literal, TypedDict
88

99
from jinja2 import Environment, FileSystemLoader, PackageLoader
1010

@@ -49,6 +49,35 @@ class DiffStatistics(TypedDict):
4949
merged_by_project: list[MergedProjectStats]
5050

5151

52+
_FAILURE_STATUS_LABELS = {
53+
"new": "❌ newly failing",
54+
"persistent": "➖ persistent",
55+
"fixed": "🎉 crashes fixed",
56+
}
57+
58+
59+
def _failure_status_label(status: str) -> str:
60+
return _FAILURE_STATUS_LABELS.get(status, "—")
61+
62+
63+
def _failure_descriptor(
64+
project: dict[str, Any], direction: Literal["new", "fixed"]
65+
) -> str:
66+
"""Describe the kind of failure for `direction` ('new' or 'fixed')."""
67+
status_key = "new_status" if direction == "new" else "old_status"
68+
status = project.get(status_key, "")
69+
panic_messages_key = (
70+
"introduced_panic_messages" if direction == "new" else "fixed_panic_messages"
71+
)
72+
if project.get(panic_messages_key):
73+
return "panic"
74+
if status == "timeout":
75+
return "timeout"
76+
if status == "abnormal exit":
77+
return "crash"
78+
return "failure"
79+
80+
5281
class DiagnosticDiff:
5382
"""Class for comparing diagnostic data between two JSON files."""
5483

@@ -289,16 +318,43 @@ def _compute_diffs(self) -> dict[str, Any]:
289318
old_failed, old_status = self._is_project_failed(old_project)
290319
new_failed, new_status = self._is_project_failed(new_project)
291320

321+
old_panics = list(old_project.get("panic_messages", []))
322+
new_panics = list(new_project.get("panic_messages", []))
323+
292324
if old_failed or new_failed:
325+
old_panic_set = set(old_panics)
326+
new_panic_set = set(new_panics)
327+
introduced_panics = sorted(new_panic_set - old_panic_set)
328+
fixed_panics = sorted(old_panic_set - new_panic_set)
329+
persistent_panics = sorted(new_panic_set & old_panic_set)
330+
331+
# `failure_status` celebrates or flags only *overall* state
332+
# transitions — going from passing to failing (or vice
333+
# versa). A project that stays failing but picks up a new
334+
# panic still counts as a regression ("new"). A project
335+
# that stays failing but loses a panic is still failing —
336+
# surface the partial improvement in the HTML report and
337+
# raw diff, but don't celebrate it in the PR title/table.
338+
if introduced_panics or (not old_failed and new_failed):
339+
failure_status = "new"
340+
elif old_failed and not new_failed:
341+
failure_status = "fixed"
342+
else:
343+
failure_status = "persistent"
344+
293345
result["failed_projects"].append({
294346
"project": project_name,
295347
"project_location": new_project.get("project_location", ""),
296348
"old_status": old_status,
297349
"new_status": new_status,
298350
"old_return_code": old_project.get("return_code"),
299351
"new_return_code": new_project.get("return_code"),
300-
"old_panic_messages": old_project.get("panic_messages", []),
301-
"new_panic_messages": new_project.get("panic_messages", []),
352+
"old_panic_messages": old_panics,
353+
"new_panic_messages": new_panics,
354+
"introduced_panic_messages": introduced_panics,
355+
"fixed_panic_messages": fixed_panics,
356+
"persistent_panic_messages": persistent_panics,
357+
"failure_status": failure_status,
302358
})
303359
# Skip detailed diff analysis for failed projects
304360
continue
@@ -433,20 +489,32 @@ def _compute_diffs(self) -> dict[str, Any]:
433489
)
434490
result["modified_projects"].append(entry)
435491

436-
# Sort failed projects to prioritize abnormal exits over timeouts
492+
# Sort failed projects so PR reviewers see new regressions first,
493+
# then persistent failures, then anything that was fixed.
494+
failure_status_priority = {"new": 3, "persistent": 2, "fixed": 1}
495+
437496
def failed_project_sort_key(project):
497+
status_priority = failure_status_priority.get(
498+
project.get("failure_status", "persistent"), 0
499+
)
438500
old_abnormal = project["old_status"] == "abnormal exit"
439501
new_abnormal = project["new_status"] == "abnormal exit"
440502
old_timeout = project["old_status"] == "timeout"
441503
new_timeout = project["new_status"] == "timeout"
442504

443505
if old_abnormal or new_abnormal:
444-
return (2, project["project"]) # Abnormal exits first
445-
if old_timeout or new_timeout:
446-
return (1, project["project"]) # Timeouts second
447-
return (0, project["project"]) # Other failures last
506+
exit_priority = 2 # Abnormal exits (incl. panics/stack overflows) first
507+
elif old_timeout or new_timeout:
508+
exit_priority = 1 # Timeouts second
509+
else:
510+
exit_priority = 0
511+
512+
# Negate so the natural sort puts the highest-priority entries
513+
# at the top without relying on `reverse=True` (which would also
514+
# reverse the project-name tiebreaker).
515+
return (-status_priority, -exit_priority, project["project"])
448516

449-
result["failed_projects"].sort(key=failed_project_sort_key, reverse=True)
517+
result["failed_projects"].sort(key=failed_project_sort_key)
450518

451519
return result
452520

@@ -889,33 +957,41 @@ def introduced_project_failures(self) -> list[str]:
889957

890958
return introduced
891959

892-
def has_new_panics(self) -> bool:
893-
"""Check if any project has new panic messages that weren't present before."""
894-
for project in self.diffs.get("failed_projects", []):
895-
old_panics = project.get("old_panic_messages", [])
896-
new_panics = project.get("new_panic_messages", [])
897-
if set(new_panics) - set(old_panics):
898-
return True
899-
return False
960+
def has_new_failures(self) -> bool:
961+
"""Check if any project regressed from passing to failing."""
962+
return any(
963+
project.get("failure_status") == "new"
964+
for project in self.diffs.get("failed_projects", [])
965+
)
900966

901-
def has_new_timeouts(self) -> bool:
902-
"""Check if any project newly timed out (was not a timeout before)."""
967+
def has_fixed_failures(self) -> bool:
968+
"""Check if any project went from failing to passing."""
903969
return any(
904-
project["new_status"] == "timeout" and project["old_status"] != "timeout"
970+
project.get("failure_status") == "fixed"
905971
for project in self.diffs.get("failed_projects", [])
906972
)
907973

908974
def generate_comment_title(self) -> str:
909-
"""Generate the PR comment title with status indicators for panics and timeouts."""
975+
"""Generate the PR comment title with status indicators for new failures."""
910976
title = "## `ecosystem-analyzer` results"
911-
new_panics = self.has_new_panics()
912-
new_timeouts = self.has_new_timeouts()
913-
if new_panics and not new_timeouts:
914-
title += ": new panics detected ❌"
915-
elif new_timeouts and not new_panics:
916-
title += ": new timeouts detected ❌"
917-
elif new_panics and new_timeouts:
918-
title += ": new panics/timeouts detected ❌"
977+
new_projects = [
978+
project
979+
for project in self.diffs.get("failed_projects", [])
980+
if project.get("failure_status") == "new"
981+
]
982+
if new_projects:
983+
# If every regression is a timeout, say so — a blanket "new
984+
# crashes detected" would be misleading. Any panic or crash
985+
# in the mix falls back to "crashes" so the title stays short.
986+
if all(
987+
_failure_descriptor(project, "new") == "timeout"
988+
for project in new_projects
989+
):
990+
title += ": new timeouts detected ❌"
991+
else:
992+
title += ": new crashes detected ❌"
993+
elif self.has_fixed_failures():
994+
title += ": ecosystem failure fixed 🎉"
919995
return title
920996

921997
def _render_raw_diff_sections(
@@ -971,14 +1047,41 @@ def add_entry(
9711047
sections.setdefault(header, []).append((lines, counts_as_change))
9721048

9731049
for project in self.diffs["failed_projects"]:
1050+
status = project.get("failure_status")
1051+
fixed_panics = project.get("fixed_panic_messages", [])
1052+
# Persistent failures with no panic delta aren't PR-specific
1053+
# news; they live in the HTML report instead of the comment's
1054+
# raw diff. Persistent-but-partially-improved failures (some
1055+
# panics resolved, project still failing) are surfaced here so
1056+
# reviewers can see the partial progress.
1057+
if status == "persistent" and not fixed_panics:
1058+
continue
1059+
# Prefix drives GitHub's `diff` highlighting: `-` is red (bad),
1060+
# `+` is green (good). Newly failing → red; fully fixed →
1061+
# green; partial fix on a still-failing project → green line
1062+
# noting the improvement.
1063+
if status == "new":
1064+
line = (
1065+
f"- FAILED "
1066+
f"old={project['old_status']}({project.get('old_return_code')}) "
1067+
f"new={project['new_status']}({project.get('new_return_code')})"
1068+
)
1069+
elif status == "fixed":
1070+
line = (
1071+
f"+ FIXED "
1072+
f"old={project['old_status']}({project.get('old_return_code')}) "
1073+
f"new={project['new_status']}({project.get('new_return_code')})"
1074+
)
1075+
else:
1076+
line = (
1077+
f"+ PARTIAL FIX {len(fixed_panics)} "
1078+
f"panic{'s' if len(fixed_panics) != 1 else ''} "
1079+
f"resolved, project still failing"
1080+
)
9741081
add_entry(
9751082
project["project"],
9761083
project.get("project_location"),
977-
[
978-
"- "
979-
f"FAILED old={project['old_status']}({project.get('old_return_code')}) "
980-
f"new={project['new_status']}({project.get('new_return_code')})"
981-
],
1084+
[line],
9821085
counts_as_change=False,
9831086
)
9841087

@@ -1079,19 +1182,39 @@ def render_statistics_markdown(
10791182

10801183
markdown_content = self.generate_comment_title() + "\n\n"
10811184

1082-
if failed_projects:
1185+
# Projects that were already failing on the baseline and are still
1186+
# failing on the PR aren't news — omit them from the PR-comment
1187+
# summary table so reviewers can focus on what changed. They
1188+
# still show up in the full HTML report.
1189+
table_projects = [
1190+
project
1191+
for project in failed_projects
1192+
if project.get("failure_status") != "persistent"
1193+
]
1194+
1195+
if table_projects:
10831196
markdown_content += "**Failing projects**:\n\n"
1084-
markdown_content += "| Project | Old Status | New Status | Old Return Code | New Return Code |\n"
1085-
markdown_content += "|---------|------------|------------|-----------------|------------------|\n"
1197+
markdown_content += (
1198+
"| Project | Status | Old Status | New Status | "
1199+
"Old Return Code | New Return Code |\n"
1200+
)
1201+
markdown_content += (
1202+
"|---------|--------|------------|------------|"
1203+
"-----------------|------------------|\n"
1204+
)
10861205

1087-
for project in failed_projects:
1206+
for project in table_projects:
10881207
old_status = project["old_status"]
10891208
new_status = project["new_status"]
10901209
old_rc = project.get("old_return_code", "None")
10911210
new_rc = project.get("new_return_code", "None")
1211+
status_label = _failure_status_label(
1212+
project.get("failure_status", "persistent")
1213+
)
10921214

10931215
markdown_content += (
1094-
f"| `{project['project']}` | {old_status} | {new_status} | "
1216+
f"| `{project['project']}` | {status_label} | "
1217+
f"{old_status} | {new_status} | "
10951218
f"`{old_rc}` | `{new_rc}` |\n"
10961219
)
10971220

@@ -1104,7 +1227,7 @@ def render_statistics_markdown(
11041227
):
11051228
markdown_content += "No diagnostic changes detected ✅\n"
11061229
else:
1107-
if failed_projects:
1230+
if table_projects:
11081231
markdown_content += "**Diagnostic changes:**\n"
11091232

11101233
markdown_content += """
@@ -1283,6 +1406,8 @@ def generate_html_report(self, output_path: str) -> None:
12831406
"new_diagnostics": self.new_diagnostics,
12841407
"diffs": self.diffs,
12851408
"statistics": statistics,
1409+
"failure_descriptor": _failure_descriptor,
1410+
"failure_status_labels": _FAILURE_STATUS_LABELS,
12861411
}
12871412

12881413
# Ensure the output directory exists

src/ecosystem_analyzer/main.py

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from .diff import DiagnosticDiff
1010
from .ecosystem_report import generate
1111
from .git import get_latest_ty_commits, resolve_ty_repo
12-
from .manager import Manager
12+
from .manager import Manager, get_all_project_names
1313

1414
logger = logging.getLogger(__name__)
1515

@@ -212,15 +212,17 @@ def analyze(
212212
@cli.command()
213213
@click.option(
214214
"--projects-old",
215-
help="List to a file with projects to analyze for old commit",
215+
help="File with projects to analyze for the old commit "
216+
"(defaults to every mypy_primer project if omitted)",
216217
type=click.Path(exists=True, dir_okay=False, readable=True),
217-
required=True,
218+
required=False,
218219
)
219220
@click.option(
220221
"--projects-new",
221-
help="List to a file with projects to analyze for new commit",
222+
help="File with projects to analyze for the new commit "
223+
"(defaults to every mypy_primer project if omitted)",
222224
type=click.Path(exists=True, dir_okay=False, readable=True),
223-
required=True,
225+
required=False,
224226
)
225227
@click.option(
226228
"--old",
@@ -292,8 +294,8 @@ def analyze(
292294
@click.pass_context
293295
def diff(
294296
ctx,
295-
projects_old: str,
296-
projects_new: str,
297+
projects_old: str | None,
298+
projects_new: str | None,
297299
old: str,
298300
new: str,
299301
output_old: str,
@@ -328,8 +330,34 @@ def diff(
328330
):
329331
raise click.UsageError(f"--shard must be in range [0, {num_shards})")
330332

331-
project_names_old = Path(projects_old).read_text().splitlines()
332-
project_names_new = Path(projects_new).read_text().splitlines()
333+
if projects_old is None or projects_new is None:
334+
default_project_names = get_all_project_names()
335+
defaulted = [
336+
name
337+
for name, value in (
338+
("--projects-old", projects_old),
339+
("--projects-new", projects_new),
340+
)
341+
if value is None
342+
]
343+
logger.info(
344+
"Defaulting %s to all %d mypy_primer projects",
345+
"/".join(defaulted),
346+
len(default_project_names),
347+
)
348+
else:
349+
default_project_names = []
350+
351+
project_names_old = (
352+
Path(projects_old).read_text().splitlines()
353+
if projects_old is not None
354+
else list(default_project_names)
355+
)
356+
project_names_new = (
357+
Path(projects_new).read_text().splitlines()
358+
if projects_new is not None
359+
else list(default_project_names)
360+
)
333361
flaky_project_names = (
334362
set(Path(projects_flaky).read_text().splitlines()) if projects_flaky else None
335363
)

0 commit comments

Comments
 (0)