-
Notifications
You must be signed in to change notification settings - Fork 3.8k
[Fix](pyudf) clear stale UDAF state cache on drop #63062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -455,6 +455,7 @@ class PythonUDFMeta: | |
|
|
||
| def __init__( | ||
| self, | ||
| function_id: int, | ||
| name: str, | ||
| symbol: str, | ||
| location: str, | ||
|
|
@@ -470,6 +471,7 @@ def __init__( | |
| Initialize Python UDF metadata. | ||
|
|
||
| Args: | ||
| function_id: FE catalog function id | ||
| name: UDF function name | ||
| symbol: Symbol to load (function name or module.function) | ||
| location: File path or directory containing the UDF | ||
|
|
@@ -481,6 +483,7 @@ def __init__( | |
| output_type: PyArrow data type for return value | ||
| client_type: 0 for UDF, 1 for UDAF, 2 for UDTF | ||
| """ | ||
| self.id = function_id | ||
| self.name = name | ||
| self.symbol = symbol | ||
| self.location = location | ||
|
|
@@ -508,7 +511,7 @@ def __str__(self) -> str: | |
| """Returns a string representation of the UDF metadata.""" | ||
| udf_load_type_str = "INLINE" if self.udf_load_type == 0 else "MODULE" | ||
| return ( | ||
| f"PythonUDFMeta(name={self.name}, symbol={self.symbol}, " | ||
| f"PythonUDFMeta(id={self.id}, name={self.name}, symbol={self.symbol}, " | ||
| f"location={self.location}, udf_load_type={udf_load_type_str}, runtime_version={self.runtime_version}, " | ||
| f"always_nullable={self.always_nullable}, client_type={self.client_type.name}, " | ||
| f"input_types={self.input_types}, output_type={self.output_type})" | ||
|
|
@@ -1575,8 +1578,9 @@ def __init__(self, location: str): | |
| location: Unix socket path for the server | ||
| """ | ||
| super().__init__(location) | ||
| # Use a dictionary to maintain separate state managers for each UDAF function | ||
| # Key: function signature (name + input_types), Value: UDAFStateManager instance | ||
| # Use a dictionary to maintain separate state managers for each UDAF function. | ||
| # Key includes function_id so DROP/CREATE with the same name and signature | ||
| # cannot reuse a class loaded from old inline code. | ||
| self.udaf_state_managers: Dict[str, UDAFStateManager] = {} | ||
| self.udaf_managers_lock = threading.Lock() | ||
|
|
||
|
|
@@ -1593,9 +1597,10 @@ def _get_udaf_state_manager( | |
| Returns: | ||
| UDAFStateManager instance for this specific UDAF | ||
| """ | ||
| # Create a unique key based on function name and argument types | ||
| type_names = [str(field.type) for field in python_udaf_meta.input_types] | ||
| func_key = f"{python_udaf_meta.name}({','.join(type_names)})" | ||
| func_key = ( | ||
| f"{python_udaf_meta.id}:{python_udaf_meta.name}({','.join(type_names)})" | ||
| ) | ||
|
|
||
| with self.udaf_managers_lock: | ||
| if func_key not in self.udaf_state_managers: | ||
|
|
@@ -1607,6 +1612,31 @@ def _get_udaf_state_manager( | |
|
|
||
| return self.udaf_state_managers[func_key] | ||
|
|
||
| def _clear_udaf_state_cache_by_function_id(self, function_id: int) -> int: | ||
| """ | ||
| Clear UDAF managers for a dropped function id. | ||
|
|
||
| DROP FUNCTION cache cleanup is asynchronous. The runtime key still includes | ||
| function_id for correctness, while this action releases old states and class | ||
| objects after the drop task reaches this Python process. | ||
| """ | ||
| prefix = f"{function_id}:" | ||
| cleared = 0 | ||
|
|
||
| with self.udaf_managers_lock: | ||
| keys_to_remove = [ | ||
| key for key in self.udaf_state_managers if key.startswith(prefix) | ||
| ] | ||
| for key in keys_to_remove: | ||
| manager = self.udaf_state_managers.pop(key) | ||
| manager.states.clear() | ||
| cleared += 1 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clearing |
||
|
|
||
| if cleared: | ||
| gc.collect() | ||
|
|
||
| return cleared | ||
|
|
||
| @staticmethod | ||
| def parse_python_udf_meta( | ||
| descriptor: flight.FlightDescriptor, | ||
|
|
@@ -1623,6 +1653,7 @@ def parse_python_udf_meta( | |
| return None | ||
|
|
||
| cmd_json = json.loads(descriptor.command) | ||
| function_id = cmd_json["id"] | ||
| name = cmd_json["name"] | ||
| symbol = cmd_json["symbol"] | ||
| location = cmd_json["location"] | ||
|
|
@@ -1648,6 +1679,7 @@ def parse_python_udf_meta( | |
| output_type = output_schema.field(0).type | ||
|
|
||
| python_udf_meta = PythonUDFMeta( | ||
| function_id=function_id, | ||
| name=name, | ||
| symbol=symbol, | ||
| location=location, | ||
|
|
@@ -2513,14 +2545,42 @@ def do_action( | |
| Supported actions: | ||
| - "clear_module_cache": Clear Python module cache for a specific location | ||
| Body: JSON with "location" field (the UDF cache directory path) | ||
| - "clear_udaf_state_cache": Clear UDAF runtime state for a dropped function id | ||
| Body: JSON with "function_id" field | ||
| """ | ||
| action_type = action.type | ||
|
|
||
| if action_type == "clear_module_cache": | ||
| yield from self._handle_clear_module_cache(action.body.to_pybytes()) | ||
| elif action_type == "clear_udaf_state_cache": | ||
| yield from self._handle_clear_udaf_state_cache(action.body.to_pybytes()) | ||
| else: | ||
| raise flight.FlightUnavailableError(f"Unknown action: {action_type}") | ||
|
|
||
| def _handle_clear_udaf_state_cache(self, body: bytes): | ||
| """ | ||
| Clear cached UDAF state managers for a dropped function id. | ||
| """ | ||
| try: | ||
| params = json.loads(body.decode("utf-8")) | ||
| function_id = int(params["function_id"]) | ||
|
|
||
| cleared_managers = self._clear_udaf_state_cache_by_function_id(function_id) | ||
|
|
||
| result = { | ||
| "success": True, | ||
| "cleared_managers": cleared_managers, | ||
| "function_id": function_id, | ||
| } | ||
| yield flight.Result(json.dumps(result).encode("utf-8")) | ||
|
|
||
| except Exception as e: | ||
| logging.error("clear_udaf_state_cache failed: %s", e) | ||
| yield flight.Result(json.dumps({ | ||
| "success": False, | ||
| "error": str(e) | ||
| }).encode("utf-8")) | ||
|
|
||
| def _handle_clear_module_cache(self, body: bytes): | ||
| """ | ||
| Clear Python module cache for a specific UDF location. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This can crash the BE from a DROP FUNCTION cleanup task.
_broadcast_action_to_processes()returns an error whenever an active Python process failsDoAction/Next, andTHROW_IF_ERRORconverts that into adoris::Exception. The caller isclean_udf_cache_callback(), which is run byTaskWorkerPoolvia_callback(task)without any catch boundary, so an uncaught exception terminates the worker thread/process instead of reporting a best-effort cache cleanup failure. The existing module-cache cleanup path logs and continues; this should returnStatusand be handled with a warning (or use the existing cleanup-styleWARN_IF_ERROR) rather than throwing out of the task callback.