99
1010from git import Commit , Repo
1111
12- from .diagnostic import Diagnostic , DiagnosticsParser
13- from .flaky import classify_diagnostics , diagnostic_keys
12+ from .diagnostic import DiagnosticsParser
13+ from .flaky import classify_diagnostics
1414from .installed_project import InstalledProject
15- from .run_output import FlakyLocation , RunOutput
15+ from .run_output import RunOutput
1616
1717logger = logging .getLogger (__name__ )
1818
@@ -169,16 +169,49 @@ def run_on_project(self, project: InstalledProject) -> RunOutput:
169169 output ["panic_messages" ] = panic_messages
170170 return output
171171
172- def _build_multi_run_result (
173- self ,
174- project : InstalledProject ,
175- stable : list [Diagnostic ],
176- flaky_locations : list [FlakyLocation ],
177- n : int ,
178- times : list [float ],
179- return_codes : list [int ],
180- ) -> RunOutput :
181- """Build a RunOutput from the results of multiple ty runs."""
172+ def run_on_project_multiple (self , project : InstalledProject , n : int ) -> RunOutput :
173+ """Run ty on a project N times and classify diagnostics as stable/flaky.
174+
175+ Returns a single RunOutput where `diagnostics` contains only stable
176+ diagnostics and `flaky_diagnostics` contains grouped flaky ones.
177+ """
178+ assert n >= 2 , "Use run_on_project for single runs"
179+ logger .info (
180+ f"Running ty on project '{ project .name } ' { n } times for flaky detection"
181+ )
182+
183+ all_diagnostics : list [list ] = []
184+ times : list [float ] = []
185+ return_codes : list [int | None ] = []
186+
187+ for i in range (n ):
188+ logger .info (f" Run { i + 1 } /{ n } for '{ project .name } '" )
189+ output = self .run_on_project (project )
190+
191+ # If any run fails abnormally, bail out and return the failure
192+ if output .get ("return_code" ) is not None and output ["return_code" ] not in (
193+ 0 ,
194+ 1 ,
195+ ):
196+ logger .warning (
197+ f"Run { i + 1 } /{ n } for '{ project .name } ' failed with return code "
198+ f"{ output ['return_code' ]} ; aborting flaky detection"
199+ )
200+ return output
201+ if output .get ("return_code" ) is None :
202+ # Timeout
203+ logger .warning (
204+ f"Run { i + 1 } /{ n } for '{ project .name } ' timed out; aborting flaky detection"
205+ )
206+ return output
207+
208+ all_diagnostics .append (output ["diagnostics" ])
209+ if (time_s := output .get ("time_s" )) is not None :
210+ times .append (time_s )
211+ return_codes .append (output .get ("return_code" ))
212+
213+ stable , flaky_locations = classify_diagnostics (all_diagnostics )
214+
182215 # Use median time
183216 median_time : float | None = None
184217 if times :
@@ -187,7 +220,7 @@ def _build_multi_run_result(
187220 median_time = sorted_times [mid ]
188221
189222 # Use most common return code
190- rc_counts = Counter (return_codes )
223+ rc_counts = Counter (rc for rc in return_codes if rc is not None )
191224 most_common_rc = rc_counts .most_common (1 )[0 ][0 ] if rc_counts else None
192225
193226 result = RunOutput ({
@@ -211,146 +244,3 @@ def _build_multi_run_result(
211244 )
212245
213246 return result
214-
215- @staticmethod
216- def _run_aborted (
217- output : RunOutput , project : InstalledProject , run_idx : int , total : int
218- ) -> bool :
219- """Log and return True if this run's exit status aborts flaky detection."""
220- rc = output .get ("return_code" )
221- if rc is None :
222- logger .warning (
223- f"Run { run_idx } /{ total } for '{ project .name } ' timed out; "
224- f"aborting flaky detection"
225- )
226- return True
227- if rc not in (0 , 1 ):
228- logger .warning (
229- f"Run { run_idx } /{ total } for '{ project .name } ' failed with return "
230- f"code { rc } ; aborting flaky detection"
231- )
232- return True
233- return False
234-
235- def run_on_project_multiple (self , project : InstalledProject , n : int ) -> RunOutput :
236- """Run ty on a project N times and classify diagnostics as stable/flaky.
237-
238- Returns a single RunOutput where `diagnostics` contains only stable
239- diagnostics and `flaky_diagnostics` contains grouped flaky ones.
240- """
241- assert n >= 2 , "Use run_on_project for single runs"
242- logger .info (
243- f"Running ty on project '{ project .name } ' { n } times for flaky detection"
244- )
245-
246- all_diagnostics : list [list ] = []
247- times : list [float ] = []
248- return_codes : list [int ] = []
249-
250- for i in range (n ):
251- logger .info (f" Run { i + 1 } /{ n } for '{ project .name } '" )
252- output = self .run_on_project (project )
253- if self ._run_aborted (output , project , i + 1 , n ):
254- return output
255-
256- all_diagnostics .append (output ["diagnostics" ])
257- if (time_s := output .get ("time_s" )) is not None :
258- times .append (time_s )
259- rc = output ["return_code" ]
260- assert rc is not None
261- return_codes .append (rc )
262-
263- stable , flaky_locations = classify_diagnostics (all_diagnostics )
264- return self ._build_multi_run_result (
265- project , stable , flaky_locations , n , times , return_codes
266- )
267-
268- def run_on_project_dynamic (
269- self ,
270- project : InstalledProject ,
271- max_runs : int ,
272- baseline : RunOutput | None ,
273- ) -> RunOutput :
274- """Run ty with dynamic flaky detection that can short-circuit.
275-
276- Compared to ``run_on_project_multiple`` (which always runs exactly N
277- times), this method can finish early:
278-
279- 1. If the first run produces identical diagnostics to *baseline*,
280- all reruns are skipped — there are no changes to investigate.
281- 2. After each subsequent run (starting from run 2), if every
282- diagnostic that *differs* from the baseline has been classified as
283- flaky, the remaining runs are skipped.
284-
285- *baseline* is typically the single-run output from the old commit in
286- a ``diff`` invocation. When *baseline* is ``None`` (e.g. for a
287- newly added project), the empty set is used — so optimisation 1
288- fires only when the first run itself is empty, and optimisation 2
289- fires when every diagnostic turns out to be flaky.
290-
291- Note: flakiness in diagnostics shared with *baseline* is only
292- missed when the first run happens to match *baseline* exactly —
293- Optimisation 1 skips reruns and we never get a chance to observe
294- the variation. Once reruns do happen, every diagnostic is
295- classified via ``classify_diagnostics`` regardless of whether it
296- is shared with *baseline*.
297- """
298- assert max_runs >= 2 , "Use run_on_project for single runs"
299- logger .info (
300- f"Running ty on project '{ project .name } ' with dynamic flaky detection "
301- f"(max { max_runs } runs)"
302- )
303-
304- baseline_keys = (
305- diagnostic_keys (baseline ["diagnostics" ]) if baseline else frozenset ()
306- )
307-
308- logger .info (f" Run 1/{ max_runs } for '{ project .name } '" )
309- first_output = self .run_on_project (project )
310- if self ._run_aborted (first_output , project , 1 , max_runs ):
311- return first_output
312-
313- # Optimisation 1: no changes relative to baseline → skip reruns
314- if diagnostic_keys (first_output ["diagnostics" ]) == baseline_keys :
315- logger .info (f" '{ project .name } ': no changes vs baseline, skipping reruns" )
316- return first_output
317-
318- all_diagnostics : list [list [Diagnostic ]] = [first_output ["diagnostics" ]]
319- times : list [float ] = []
320- if (t := first_output .get ("time_s" )) is not None :
321- times .append (t )
322- first_rc = first_output ["return_code" ]
323- assert first_rc is not None
324- return_codes : list [int ] = [first_rc ]
325-
326- for i in range (1 , max_runs ):
327- logger .info (f" Run { i + 1 } /{ max_runs } for '{ project .name } '" )
328- output = self .run_on_project (project )
329- if self ._run_aborted (output , project , i + 1 , max_runs ):
330- return output
331-
332- all_diagnostics .append (output ["diagnostics" ])
333- if (t := output .get ("time_s" )) is not None :
334- times .append (t )
335- rc = output ["return_code" ]
336- assert rc is not None
337- return_codes .append (rc )
338-
339- stable , flaky_locations = classify_diagnostics (all_diagnostics )
340-
341- # Optimisation 2: all changes vs baseline are flaky → short-circuit
342- if diagnostic_keys (stable ) == baseline_keys :
343- logger .info (
344- f" '{ project .name } ': all changes are flaky after "
345- f"{ len (all_diagnostics )} runs, short-circuiting"
346- )
347- break
348-
349- return self ._build_multi_run_result (
350- project ,
351- stable ,
352- flaky_locations ,
353- len (all_diagnostics ),
354- times ,
355- return_codes ,
356- )
0 commit comments