-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Serialize engine config in new pdsh benchmark CLI #22365
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 13 commits
904cfb3
9cb80fd
8f497d7
044eb4c
91faeb8
3e383fa
a0b7c7f
bfb6dba
b98a0b4
62cb242
b766790
e9433e0
0f2a37a
bdc907c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -80,6 +80,7 @@ | |
| from cudf_polars.experimental.explain import SerializablePlan | ||
| from cudf_polars.experimental.rapidsmpf.frontend.core import StreamingEngine | ||
| from cudf_polars.experimental.rapidsmpf.frontend.options import StreamingOptions | ||
|
|
||
| POLARS_VALIDATION_OPTIONS = { | ||
| "check_row_order": True, | ||
| "check_column_order": True, | ||
|
|
@@ -544,7 +545,7 @@ def from_args(cls, args: argparse.Namespace) -> RunConfig: | |
| duckdb_temp_dir=args.duckdb_temp_dir, | ||
| ) | ||
|
|
||
| def serialize(self, engine: pl.GPUEngine | None) -> dict: | ||
| def serialize(self, engine: StreamingEngine | None) -> dict: | ||
| """Serialize the run config to a dictionary.""" | ||
| opts = self.streaming_options | ||
| result: dict[str, Any] = { | ||
|
|
@@ -583,7 +584,21 @@ def serialize(self, engine: pl.GPUEngine | None) -> dict: | |
| } | ||
| if engine is not None: | ||
| config_options = ConfigOptions.from_polars_engine(engine) | ||
| result["config_options"] = dataclasses.asdict(config_options) | ||
| # Drop non-serializable contexts. | ||
| config_options = dataclasses.replace( | ||
| config_options, | ||
| executor=dataclasses.replace( | ||
| config_options.executor, | ||
| spmd_context=None, | ||
| ray_context=None, | ||
| dask_context=None, | ||
| ), | ||
| ) | ||
| rapidsmpf_options = engine.rapidsmpf_options.get_strings() | ||
| result["config_options"] = { | ||
| "config_options": dataclasses.asdict(config_options), | ||
| "rapidsmpf_options": rapidsmpf_options, | ||
| } | ||
| return result | ||
|
|
||
| def summarize(self) -> None: | ||
|
|
@@ -1060,6 +1075,7 @@ def _finalize_benchmark_run( | |
| run_config: RunConfig, | ||
| validation_failures: list[int], | ||
| query_failures: list[tuple[int, int]], | ||
| engine: StreamingEngine | None, | ||
| ) -> None: | ||
| """Summarize, serialize, and exit after a benchmark run.""" | ||
| if args.summarize: | ||
|
|
@@ -1074,7 +1090,7 @@ def _finalize_benchmark_run( | |
| ) | ||
| else: | ||
| print("✅ All validated queries passed.") | ||
| args.output.write(json.dumps(run_config.serialize(engine=None))) | ||
| args.output.write(json.dumps(run_config.serialize(engine=engine))) | ||
| args.output.write("\n") | ||
| sys.exit(1 if (query_failures or validation_failures) else 0) | ||
|
|
||
|
|
@@ -1133,7 +1149,9 @@ def _allgather_result(df: pl.DataFrame) -> pl.DataFrame: | |
| run_config = _consolidate_logs( | ||
| run_config, engine=engine, gather_client_logs=False | ||
| ) | ||
| _finalize_benchmark_run(args, run_config, validation_failures, query_failures) | ||
| _finalize_benchmark_run( | ||
| args, run_config, validation_failures, query_failures, engine=engine | ||
| ) | ||
|
|
||
|
|
||
| def run_polars_ray( | ||
|
|
@@ -1180,7 +1198,9 @@ def run_polars_ray( | |
| run_config = dataclasses.replace(run_config, records=dict(records), plans=plans) | ||
| run_config = _consolidate_logs(run_config, engine=engine) | ||
|
|
||
| _finalize_benchmark_run(args, run_config, validation_failures, query_failures) | ||
| _finalize_benchmark_run( | ||
| args, run_config, validation_failures, query_failures, engine=engine | ||
| ) | ||
|
Comment on lines
+1201
to
+1203
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Serialize the Ray engine before leaving the context manager. This now runs after Suggested fix with RayEngine(
rapidsmpf_options=run_config.streaming_options.to_rapidsmpf_options(),
executor_options=executor_options,
engine_options=engine_options,
ray_init_options=ray_init_options,
) as engine:
run_config = dataclasses.replace(run_config, n_workers=engine.nranks)
records, plans, validation_failures, query_failures = _run_query_loop(
benchmark,
args,
run_config,
engine,
numeric_type,
date_type,
validation_files,
)
run_config = dataclasses.replace(run_config, records=dict(records), plans=plans)
run_config = _consolidate_logs(run_config, engine=engine)
-
- _finalize_benchmark_run(
- args, run_config, validation_failures, query_failures, engine=engine
- )
+ _finalize_benchmark_run(
+ args, run_config, validation_failures, query_failures, engine=engine
+ )🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def run_polars_dask( | ||
|
|
@@ -1240,7 +1260,9 @@ def run_polars_dask( | |
| finally: | ||
| if dask_client is not None: | ||
| dask_client.close() | ||
| _finalize_benchmark_run(args, run_config, validation_failures, query_failures) | ||
| _finalize_benchmark_run( | ||
| args, run_config, validation_failures, query_failures, engine=engine | ||
| ) | ||
|
Comment on lines
+1263
to
+1265
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Finalize the Dask run while the engine is still alive. By the time this call executes, the Suggested fix try:
with DaskEngine(
rapidsmpf_options=run_config.streaming_options.to_rapidsmpf_options(),
executor_options=executor_options,
engine_options=engine_options,
dask_client=dask_client,
) as engine:
run_config = dataclasses.replace(run_config, n_workers=engine.nranks)
records, plans, validation_failures, query_failures = _run_query_loop(
benchmark,
args,
run_config,
engine,
numeric_type,
date_type,
validation_files,
)
run_config = dataclasses.replace(
run_config, records=dict(records), plans=plans
)
run_config = _consolidate_logs(run_config, engine)
+ _finalize_benchmark_run(
+ args, run_config, validation_failures, query_failures, engine=engine
+ )
finally:
if dask_client is not None:
dask_client.close()
- _finalize_benchmark_run(
- args, run_config, validation_failures, query_failures, engine=engine
- )🤖 Prompt for AI Agents |
||
|
|
||
|
|
||
| def setup_logging(query_id: int, iteration: int) -> None: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Keep serialized RapidsMPF options in sync with engine resets.
serialize()now trustsengine.rapidsmpf_options, but the frontend_reset()paths rebuild their liveContextfrom newOptionsobjects without updating that cached attribute. After a reset, this JSON will report the old settings instead of the ones actually running. Please either refreshself.rapidsmpf_optionsin each_reset()or serialize from the same freshly resolved options object used by the reset path.🤖 Prompt for AI Agents