Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions src/ecosystem_analyzer/flaky.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@ def _location_key(diag: Diagnostic) -> tuple[str, int, int]:
return (diag["path"], diag["line"], diag["column"])


def diagnostic_keys(
diagnostics: list[Diagnostic],
) -> frozenset[tuple[str, int, int, str, str, str]]:
"""Return the set of unique diagnostic keys for a list of diagnostics."""
return frozenset(_diagnostic_key(d) for d in diagnostics)


def classify_diagnostics(
all_runs: list[list[Diagnostic]],
) -> tuple[list[Diagnostic], list[FlakyLocation]]:
Expand Down
25 changes: 4 additions & 21 deletions src/ecosystem_analyzer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,14 +289,6 @@ def analyze(
type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path),
required=False,
)
@click.option(
"--dynamic-flaky",
is_flag=True,
help="Enable dynamic flaky detection: skip reruns when there are no changes "
"relative to baseline, and short-circuit when all changes are flaky. "
"--flaky-runs becomes the maximum number of runs. Note: flakiness is only "
"detected for diagnostics that differ from baseline.",
)
@click.pass_context
def diff(
ctx,
Expand All @@ -313,7 +305,6 @@ def diff(
num_shards: int | None,
ty_binary_old: Path | None,
ty_binary_new: Path | None,
dynamic_flaky: bool,
) -> None:
"""
Compare diagnostics between two commits.
Expand All @@ -337,10 +328,6 @@ def diff(
):
raise click.UsageError(f"--shard must be in range [0, {num_shards})")

if dynamic_flaky and ctx.obj["flaky_runs"] < 2:
click.echo("Error: --dynamic-flaky requires --flaky-runs >= 2", err=True)
ctx.exit(1)

project_names_old = Path(projects_old).read_text().splitlines()
project_names_new = Path(projects_new).read_text().splitlines()
flaky_project_names = (
Expand Down Expand Up @@ -372,17 +359,15 @@ def diff(
)

# Build (or use pre-built) old ty — building overlaps with background
# project installation. In dynamic mode, the old side runs once (no flaky
# detection) and its output is passed as a baseline so the new side can
# skip reruns for unchanged projects and short-circuit when all changes
# are flaky.
# project installation
if ty_binary_old is not None:
manager.use_prebuilt(ty_binary_old, old)
else:
manager.build(old)

# Run for old commit with old projects
manager.activate(project_names_old)
run_outputs_old = manager.run_active_projects(single_run=dynamic_flaky)
run_outputs_old = manager.run_active_projects()
manager.write_run_outputs(run_outputs_old, output_old)

# Build (or use pre-built) new ty — incremental build is near-instant
Expand All @@ -391,9 +376,7 @@ def diff(
else:
manager.build(new)
manager.activate(project_names_new)
run_outputs_new = manager.run_active_projects(
baseline=run_outputs_old if dynamic_flaky else None
)
run_outputs_new = manager.run_active_projects()
manager.write_run_outputs(run_outputs_new, output_new)


Expand Down
60 changes: 15 additions & 45 deletions src/ecosystem_analyzer/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,56 +164,26 @@ def run_for_commit(self, commit: str | Commit) -> list[RunOutput]:
self._ensure_installed()
return self._run_active_projects()

def run_active_projects(
self,
*,
baseline: list[RunOutput] | None = None,
single_run: bool = False,
) -> list[RunOutput]:
"""Run the current ty build on active projects.

When *baseline* is provided, flaky projects use dynamic detection
that can skip reruns or short-circuit early (see
``Ty.run_on_project_dynamic``). Without a baseline the fixed
``--flaky-runs`` behaviour is used.

When *single_run* is True, every project runs exactly once
regardless of ``--flaky-runs`` — useful for establishing a
baseline for dynamic detection.
"""
def run_active_projects(self) -> list[RunOutput]:
"""Run the current ty build on active projects."""
self._ensure_installed()
return self._run_active_projects(baseline=baseline, single_run=single_run)

def _is_flaky_project(self, project: InstalledProject) -> bool:
return self._flaky_runs > 1 and (
not self._flaky_projects or project.name in self._flaky_projects
)

def _run_active_projects(
self,
*,
baseline: list[RunOutput] | None = None,
single_run: bool = False,
) -> list[RunOutput]:
assert not (single_run and baseline is not None), (
"single_run=True and baseline are mutually exclusive: a baseline "
"is only meaningful for dynamic flaky detection"
)
baseline_by_project: dict[str, RunOutput] = (
{o["project"]: o for o in baseline} if baseline is not None else {}
)
return self._run_active_projects()

def _run_active_projects(self) -> list[RunOutput]:
run_outputs = []
for project in self._active_projects:
if not single_run and self._is_flaky_project(project):
if baseline is not None:
output = self._ty.run_on_project_dynamic(
project,
self._flaky_runs,
baseline_by_project.get(project.name),
n = (
self._flaky_runs
if (
self._flaky_runs > 1
and (
not self._flaky_projects or project.name in self._flaky_projects
)
else:
output = self._ty.run_on_project_multiple(project, self._flaky_runs)
)
else 1
)
if n > 1:
output = self._ty.run_on_project_multiple(project, n)
else:
output = self._ty.run_on_project(project)
run_outputs.append(output)
Expand Down
204 changes: 47 additions & 157 deletions src/ecosystem_analyzer/ty.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

from git import Commit, Repo

from .diagnostic import Diagnostic, DiagnosticsParser
from .flaky import classify_diagnostics, diagnostic_keys
from .diagnostic import DiagnosticsParser
from .flaky import classify_diagnostics
from .installed_project import InstalledProject
from .run_output import FlakyLocation, RunOutput
from .run_output import RunOutput

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -169,16 +169,49 @@ def run_on_project(self, project: InstalledProject) -> RunOutput:
output["panic_messages"] = panic_messages
return output

def _build_multi_run_result(
self,
project: InstalledProject,
stable: list[Diagnostic],
flaky_locations: list[FlakyLocation],
n: int,
times: list[float],
return_codes: list[int],
) -> RunOutput:
"""Build a RunOutput from the results of multiple ty runs."""
def run_on_project_multiple(self, project: InstalledProject, n: int) -> RunOutput:
"""Run ty on a project N times and classify diagnostics as stable/flaky.

Returns a single RunOutput where `diagnostics` contains only stable
diagnostics and `flaky_diagnostics` contains grouped flaky ones.
"""
assert n >= 2, "Use run_on_project for single runs"
logger.info(
f"Running ty on project '{project.name}' {n} times for flaky detection"
)

all_diagnostics: list[list] = []
times: list[float] = []
return_codes: list[int | None] = []

for i in range(n):
logger.info(f" Run {i + 1}/{n} for '{project.name}'")
output = self.run_on_project(project)

# If any run fails abnormally, bail out and return the failure
if output.get("return_code") is not None and output["return_code"] not in (
0,
1,
):
logger.warning(
f"Run {i + 1}/{n} for '{project.name}' failed with return code "
f"{output['return_code']}; aborting flaky detection"
)
return output
if output.get("return_code") is None:
# Timeout
logger.warning(
f"Run {i + 1}/{n} for '{project.name}' timed out; aborting flaky detection"
)
return output

all_diagnostics.append(output["diagnostics"])
if (time_s := output.get("time_s")) is not None:
times.append(time_s)
return_codes.append(output.get("return_code"))

stable, flaky_locations = classify_diagnostics(all_diagnostics)

# Use median time
median_time: float | None = None
if times:
Expand All @@ -187,7 +220,7 @@ def _build_multi_run_result(
median_time = sorted_times[mid]

# Use most common return code
rc_counts = Counter(return_codes)
rc_counts = Counter(rc for rc in return_codes if rc is not None)
most_common_rc = rc_counts.most_common(1)[0][0] if rc_counts else None

result = RunOutput({
Expand All @@ -211,146 +244,3 @@ def _build_multi_run_result(
)

return result

@staticmethod
def _run_aborted(
output: RunOutput, project: InstalledProject, run_idx: int, total: int
) -> bool:
"""Log and return True if this run's exit status aborts flaky detection."""
rc = output.get("return_code")
if rc is None:
logger.warning(
f"Run {run_idx}/{total} for '{project.name}' timed out; "
f"aborting flaky detection"
)
return True
if rc not in (0, 1):
logger.warning(
f"Run {run_idx}/{total} for '{project.name}' failed with return "
f"code {rc}; aborting flaky detection"
)
return True
return False

def run_on_project_multiple(self, project: InstalledProject, n: int) -> RunOutput:
"""Run ty on a project N times and classify diagnostics as stable/flaky.

Returns a single RunOutput where `diagnostics` contains only stable
diagnostics and `flaky_diagnostics` contains grouped flaky ones.
"""
assert n >= 2, "Use run_on_project for single runs"
logger.info(
f"Running ty on project '{project.name}' {n} times for flaky detection"
)

all_diagnostics: list[list] = []
times: list[float] = []
return_codes: list[int] = []

for i in range(n):
logger.info(f" Run {i + 1}/{n} for '{project.name}'")
output = self.run_on_project(project)
if self._run_aborted(output, project, i + 1, n):
return output

all_diagnostics.append(output["diagnostics"])
if (time_s := output.get("time_s")) is not None:
times.append(time_s)
rc = output["return_code"]
assert rc is not None
return_codes.append(rc)

stable, flaky_locations = classify_diagnostics(all_diagnostics)
return self._build_multi_run_result(
project, stable, flaky_locations, n, times, return_codes
)

def run_on_project_dynamic(
self,
project: InstalledProject,
max_runs: int,
baseline: RunOutput | None,
) -> RunOutput:
"""Run ty with dynamic flaky detection that can short-circuit.

Compared to ``run_on_project_multiple`` (which always runs exactly N
times), this method can finish early:

1. If the first run produces identical diagnostics to *baseline*,
all reruns are skipped — there are no changes to investigate.
2. After each subsequent run (starting from run 2), if every
diagnostic that *differs* from the baseline has been classified as
flaky, the remaining runs are skipped.

*baseline* is typically the single-run output from the old commit in
a ``diff`` invocation. When *baseline* is ``None`` (e.g. for a
newly added project), the empty set is used — so optimisation 1
fires only when the first run itself is empty, and optimisation 2
fires when every diagnostic turns out to be flaky.

Note: flakiness in diagnostics shared with *baseline* is only
missed when the first run happens to match *baseline* exactly —
Optimisation 1 skips reruns and we never get a chance to observe
the variation. Once reruns do happen, every diagnostic is
classified via ``classify_diagnostics`` regardless of whether it
is shared with *baseline*.
"""
assert max_runs >= 2, "Use run_on_project for single runs"
logger.info(
f"Running ty on project '{project.name}' with dynamic flaky detection "
f"(max {max_runs} runs)"
)

baseline_keys = (
diagnostic_keys(baseline["diagnostics"]) if baseline else frozenset()
)

logger.info(f" Run 1/{max_runs} for '{project.name}'")
first_output = self.run_on_project(project)
if self._run_aborted(first_output, project, 1, max_runs):
return first_output

# Optimisation 1: no changes relative to baseline → skip reruns
if diagnostic_keys(first_output["diagnostics"]) == baseline_keys:
logger.info(f" '{project.name}': no changes vs baseline, skipping reruns")
return first_output

all_diagnostics: list[list[Diagnostic]] = [first_output["diagnostics"]]
times: list[float] = []
if (t := first_output.get("time_s")) is not None:
times.append(t)
first_rc = first_output["return_code"]
assert first_rc is not None
return_codes: list[int] = [first_rc]

for i in range(1, max_runs):
logger.info(f" Run {i + 1}/{max_runs} for '{project.name}'")
output = self.run_on_project(project)
if self._run_aborted(output, project, i + 1, max_runs):
return output

all_diagnostics.append(output["diagnostics"])
if (t := output.get("time_s")) is not None:
times.append(t)
rc = output["return_code"]
assert rc is not None
return_codes.append(rc)

stable, flaky_locations = classify_diagnostics(all_diagnostics)

# Optimisation 2: all changes vs baseline are flaky → short-circuit
if diagnostic_keys(stable) == baseline_keys:
logger.info(
f" '{project.name}': all changes are flaky after "
f"{len(all_diagnostics)} runs, short-circuiting"
)
break

return self._build_multi_run_result(
project,
stable,
flaky_locations,
len(all_diagnostics),
times,
return_codes,
)
Loading