Skip to content

Commit e7576e6

Browse files
authored
Revert "Add a --dynamic-flaky flag, allowing ecosystem-analyzer to short-circuit if it detects that all diagnostic changes are flaky" (#47)
1 parent fabf0dc commit e7576e6

5 files changed

Lines changed: 66 additions & 801 deletions

File tree

src/ecosystem_analyzer/flaky.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,6 @@ def _location_key(diag: Diagnostic) -> tuple[str, int, int]:
2121
return (diag["path"], diag["line"], diag["column"])
2222

2323

24-
def diagnostic_keys(
25-
diagnostics: list[Diagnostic],
26-
) -> frozenset[tuple[str, int, int, str, str, str]]:
27-
"""Return the set of unique diagnostic keys for a list of diagnostics."""
28-
return frozenset(_diagnostic_key(d) for d in diagnostics)
29-
30-
3124
def classify_diagnostics(
3225
all_runs: list[list[Diagnostic]],
3326
) -> tuple[list[Diagnostic], list[FlakyLocation]]:

src/ecosystem_analyzer/main.py

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -289,14 +289,6 @@ def analyze(
289289
type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path),
290290
required=False,
291291
)
292-
@click.option(
293-
"--dynamic-flaky",
294-
is_flag=True,
295-
help="Enable dynamic flaky detection: skip reruns when there are no changes "
296-
"relative to baseline, and short-circuit when all changes are flaky. "
297-
"--flaky-runs becomes the maximum number of runs. Note: flakiness is only "
298-
"detected for diagnostics that differ from baseline.",
299-
)
300292
@click.pass_context
301293
def diff(
302294
ctx,
@@ -313,7 +305,6 @@ def diff(
313305
num_shards: int | None,
314306
ty_binary_old: Path | None,
315307
ty_binary_new: Path | None,
316-
dynamic_flaky: bool,
317308
) -> None:
318309
"""
319310
Compare diagnostics between two commits.
@@ -337,10 +328,6 @@ def diff(
337328
):
338329
raise click.UsageError(f"--shard must be in range [0, {num_shards})")
339330

340-
if dynamic_flaky and ctx.obj["flaky_runs"] < 2:
341-
click.echo("Error: --dynamic-flaky requires --flaky-runs >= 2", err=True)
342-
ctx.exit(1)
343-
344331
project_names_old = Path(projects_old).read_text().splitlines()
345332
project_names_new = Path(projects_new).read_text().splitlines()
346333
flaky_project_names = (
@@ -372,17 +359,15 @@ def diff(
372359
)
373360

374361
# Build (or use pre-built) old ty — building overlaps with background
375-
# project installation. In dynamic mode, the old side runs once (no flaky
376-
# detection) and its output is passed as a baseline so the new side can
377-
# skip reruns for unchanged projects and short-circuit when all changes
378-
# are flaky.
362+
# project installation
379363
if ty_binary_old is not None:
380364
manager.use_prebuilt(ty_binary_old, old)
381365
else:
382366
manager.build(old)
383367

368+
# Run for old commit with old projects
384369
manager.activate(project_names_old)
385-
run_outputs_old = manager.run_active_projects(single_run=dynamic_flaky)
370+
run_outputs_old = manager.run_active_projects()
386371
manager.write_run_outputs(run_outputs_old, output_old)
387372

388373
# Build (or use pre-built) new ty — incremental build is near-instant
@@ -391,9 +376,7 @@ def diff(
391376
else:
392377
manager.build(new)
393378
manager.activate(project_names_new)
394-
run_outputs_new = manager.run_active_projects(
395-
baseline=run_outputs_old if dynamic_flaky else None
396-
)
379+
run_outputs_new = manager.run_active_projects()
397380
manager.write_run_outputs(run_outputs_new, output_new)
398381

399382

src/ecosystem_analyzer/manager.py

Lines changed: 15 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -164,56 +164,26 @@ def run_for_commit(self, commit: str | Commit) -> list[RunOutput]:
164164
self._ensure_installed()
165165
return self._run_active_projects()
166166

167-
def run_active_projects(
168-
self,
169-
*,
170-
baseline: list[RunOutput] | None = None,
171-
single_run: bool = False,
172-
) -> list[RunOutput]:
173-
"""Run the current ty build on active projects.
174-
175-
When *baseline* is provided, flaky projects use dynamic detection
176-
that can skip reruns or short-circuit early (see
177-
``Ty.run_on_project_dynamic``). Without a baseline the fixed
178-
``--flaky-runs`` behaviour is used.
179-
180-
When *single_run* is True, every project runs exactly once
181-
regardless of ``--flaky-runs`` — useful for establishing a
182-
baseline for dynamic detection.
183-
"""
167+
def run_active_projects(self) -> list[RunOutput]:
168+
"""Run the current ty build on active projects."""
184169
self._ensure_installed()
185-
return self._run_active_projects(baseline=baseline, single_run=single_run)
186-
187-
def _is_flaky_project(self, project: InstalledProject) -> bool:
188-
return self._flaky_runs > 1 and (
189-
not self._flaky_projects or project.name in self._flaky_projects
190-
)
191-
192-
def _run_active_projects(
193-
self,
194-
*,
195-
baseline: list[RunOutput] | None = None,
196-
single_run: bool = False,
197-
) -> list[RunOutput]:
198-
assert not (single_run and baseline is not None), (
199-
"single_run=True and baseline are mutually exclusive: a baseline "
200-
"is only meaningful for dynamic flaky detection"
201-
)
202-
baseline_by_project: dict[str, RunOutput] = (
203-
{o["project"]: o for o in baseline} if baseline is not None else {}
204-
)
170+
return self._run_active_projects()
205171

172+
def _run_active_projects(self) -> list[RunOutput]:
206173
run_outputs = []
207174
for project in self._active_projects:
208-
if not single_run and self._is_flaky_project(project):
209-
if baseline is not None:
210-
output = self._ty.run_on_project_dynamic(
211-
project,
212-
self._flaky_runs,
213-
baseline_by_project.get(project.name),
175+
n = (
176+
self._flaky_runs
177+
if (
178+
self._flaky_runs > 1
179+
and (
180+
not self._flaky_projects or project.name in self._flaky_projects
214181
)
215-
else:
216-
output = self._ty.run_on_project_multiple(project, self._flaky_runs)
182+
)
183+
else 1
184+
)
185+
if n > 1:
186+
output = self._ty.run_on_project_multiple(project, n)
217187
else:
218188
output = self._ty.run_on_project(project)
219189
run_outputs.append(output)

src/ecosystem_analyzer/ty.py

Lines changed: 47 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99

1010
from git import Commit, Repo
1111

12-
from .diagnostic import Diagnostic, DiagnosticsParser
13-
from .flaky import classify_diagnostics, diagnostic_keys
12+
from .diagnostic import DiagnosticsParser
13+
from .flaky import classify_diagnostics
1414
from .installed_project import InstalledProject
15-
from .run_output import FlakyLocation, RunOutput
15+
from .run_output import RunOutput
1616

1717
logger = logging.getLogger(__name__)
1818

@@ -169,16 +169,49 @@ def run_on_project(self, project: InstalledProject) -> RunOutput:
169169
output["panic_messages"] = panic_messages
170170
return output
171171

172-
def _build_multi_run_result(
173-
self,
174-
project: InstalledProject,
175-
stable: list[Diagnostic],
176-
flaky_locations: list[FlakyLocation],
177-
n: int,
178-
times: list[float],
179-
return_codes: list[int],
180-
) -> RunOutput:
181-
"""Build a RunOutput from the results of multiple ty runs."""
172+
def run_on_project_multiple(self, project: InstalledProject, n: int) -> RunOutput:
173+
"""Run ty on a project N times and classify diagnostics as stable/flaky.
174+
175+
Returns a single RunOutput where `diagnostics` contains only stable
176+
diagnostics and `flaky_diagnostics` contains grouped flaky ones.
177+
"""
178+
assert n >= 2, "Use run_on_project for single runs"
179+
logger.info(
180+
f"Running ty on project '{project.name}' {n} times for flaky detection"
181+
)
182+
183+
all_diagnostics: list[list] = []
184+
times: list[float] = []
185+
return_codes: list[int | None] = []
186+
187+
for i in range(n):
188+
logger.info(f" Run {i + 1}/{n} for '{project.name}'")
189+
output = self.run_on_project(project)
190+
191+
# If any run fails abnormally, bail out and return the failure
192+
if output.get("return_code") is not None and output["return_code"] not in (
193+
0,
194+
1,
195+
):
196+
logger.warning(
197+
f"Run {i + 1}/{n} for '{project.name}' failed with return code "
198+
f"{output['return_code']}; aborting flaky detection"
199+
)
200+
return output
201+
if output.get("return_code") is None:
202+
# Timeout
203+
logger.warning(
204+
f"Run {i + 1}/{n} for '{project.name}' timed out; aborting flaky detection"
205+
)
206+
return output
207+
208+
all_diagnostics.append(output["diagnostics"])
209+
if (time_s := output.get("time_s")) is not None:
210+
times.append(time_s)
211+
return_codes.append(output.get("return_code"))
212+
213+
stable, flaky_locations = classify_diagnostics(all_diagnostics)
214+
182215
# Use median time
183216
median_time: float | None = None
184217
if times:
@@ -187,7 +220,7 @@ def _build_multi_run_result(
187220
median_time = sorted_times[mid]
188221

189222
# Use most common return code
190-
rc_counts = Counter(return_codes)
223+
rc_counts = Counter(rc for rc in return_codes if rc is not None)
191224
most_common_rc = rc_counts.most_common(1)[0][0] if rc_counts else None
192225

193226
result = RunOutput({
@@ -211,146 +244,3 @@ def _build_multi_run_result(
211244
)
212245

213246
return result
214-
215-
@staticmethod
216-
def _run_aborted(
217-
output: RunOutput, project: InstalledProject, run_idx: int, total: int
218-
) -> bool:
219-
"""Log and return True if this run's exit status aborts flaky detection."""
220-
rc = output.get("return_code")
221-
if rc is None:
222-
logger.warning(
223-
f"Run {run_idx}/{total} for '{project.name}' timed out; "
224-
f"aborting flaky detection"
225-
)
226-
return True
227-
if rc not in (0, 1):
228-
logger.warning(
229-
f"Run {run_idx}/{total} for '{project.name}' failed with return "
230-
f"code {rc}; aborting flaky detection"
231-
)
232-
return True
233-
return False
234-
235-
def run_on_project_multiple(self, project: InstalledProject, n: int) -> RunOutput:
236-
"""Run ty on a project N times and classify diagnostics as stable/flaky.
237-
238-
Returns a single RunOutput where `diagnostics` contains only stable
239-
diagnostics and `flaky_diagnostics` contains grouped flaky ones.
240-
"""
241-
assert n >= 2, "Use run_on_project for single runs"
242-
logger.info(
243-
f"Running ty on project '{project.name}' {n} times for flaky detection"
244-
)
245-
246-
all_diagnostics: list[list] = []
247-
times: list[float] = []
248-
return_codes: list[int] = []
249-
250-
for i in range(n):
251-
logger.info(f" Run {i + 1}/{n} for '{project.name}'")
252-
output = self.run_on_project(project)
253-
if self._run_aborted(output, project, i + 1, n):
254-
return output
255-
256-
all_diagnostics.append(output["diagnostics"])
257-
if (time_s := output.get("time_s")) is not None:
258-
times.append(time_s)
259-
rc = output["return_code"]
260-
assert rc is not None
261-
return_codes.append(rc)
262-
263-
stable, flaky_locations = classify_diagnostics(all_diagnostics)
264-
return self._build_multi_run_result(
265-
project, stable, flaky_locations, n, times, return_codes
266-
)
267-
268-
def run_on_project_dynamic(
269-
self,
270-
project: InstalledProject,
271-
max_runs: int,
272-
baseline: RunOutput | None,
273-
) -> RunOutput:
274-
"""Run ty with dynamic flaky detection that can short-circuit.
275-
276-
Compared to ``run_on_project_multiple`` (which always runs exactly N
277-
times), this method can finish early:
278-
279-
1. If the first run produces identical diagnostics to *baseline*,
280-
all reruns are skipped — there are no changes to investigate.
281-
2. After each subsequent run (starting from run 2), if every
282-
diagnostic that *differs* from the baseline has been classified as
283-
flaky, the remaining runs are skipped.
284-
285-
*baseline* is typically the single-run output from the old commit in
286-
a ``diff`` invocation. When *baseline* is ``None`` (e.g. for a
287-
newly added project), the empty set is used — so optimisation 1
288-
fires only when the first run itself is empty, and optimisation 2
289-
fires when every diagnostic turns out to be flaky.
290-
291-
Note: flakiness in diagnostics shared with *baseline* is only
292-
missed when the first run happens to match *baseline* exactly —
293-
Optimisation 1 skips reruns and we never get a chance to observe
294-
the variation. Once reruns do happen, every diagnostic is
295-
classified via ``classify_diagnostics`` regardless of whether it
296-
is shared with *baseline*.
297-
"""
298-
assert max_runs >= 2, "Use run_on_project for single runs"
299-
logger.info(
300-
f"Running ty on project '{project.name}' with dynamic flaky detection "
301-
f"(max {max_runs} runs)"
302-
)
303-
304-
baseline_keys = (
305-
diagnostic_keys(baseline["diagnostics"]) if baseline else frozenset()
306-
)
307-
308-
logger.info(f" Run 1/{max_runs} for '{project.name}'")
309-
first_output = self.run_on_project(project)
310-
if self._run_aborted(first_output, project, 1, max_runs):
311-
return first_output
312-
313-
# Optimisation 1: no changes relative to baseline → skip reruns
314-
if diagnostic_keys(first_output["diagnostics"]) == baseline_keys:
315-
logger.info(f" '{project.name}': no changes vs baseline, skipping reruns")
316-
return first_output
317-
318-
all_diagnostics: list[list[Diagnostic]] = [first_output["diagnostics"]]
319-
times: list[float] = []
320-
if (t := first_output.get("time_s")) is not None:
321-
times.append(t)
322-
first_rc = first_output["return_code"]
323-
assert first_rc is not None
324-
return_codes: list[int] = [first_rc]
325-
326-
for i in range(1, max_runs):
327-
logger.info(f" Run {i + 1}/{max_runs} for '{project.name}'")
328-
output = self.run_on_project(project)
329-
if self._run_aborted(output, project, i + 1, max_runs):
330-
return output
331-
332-
all_diagnostics.append(output["diagnostics"])
333-
if (t := output.get("time_s")) is not None:
334-
times.append(t)
335-
rc = output["return_code"]
336-
assert rc is not None
337-
return_codes.append(rc)
338-
339-
stable, flaky_locations = classify_diagnostics(all_diagnostics)
340-
341-
# Optimisation 2: all changes vs baseline are flaky → short-circuit
342-
if diagnostic_keys(stable) == baseline_keys:
343-
logger.info(
344-
f" '{project.name}': all changes are flaky after "
345-
f"{len(all_diagnostics)} runs, short-circuiting"
346-
)
347-
break
348-
349-
return self._build_multi_run_result(
350-
project,
351-
stable,
352-
flaky_locations,
353-
len(all_diagnostics),
354-
times,
355-
return_codes,
356-
)

0 commit comments

Comments
 (0)