-
Notifications
You must be signed in to change notification settings - Fork 3.8k
[Fix](pyudf) clear stale UDAF state cache on drop #63062
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,21 +21,22 @@ | |
| #include <sys/socket.h> | ||
| #include <sys/un.h> | ||
|
|
||
| #include <boost/process.hpp> | ||
| #include <filesystem> | ||
| #include <fstream> | ||
| #include <future> | ||
| #include <string> | ||
| #include <vector> | ||
|
|
||
| #include "common/config.h" | ||
| #include "common/status.h" | ||
| #include "udf/python/python_env.h" | ||
| #include "udf/python/python_udf_client.h" | ||
| #include "udf/python/python_udf_meta.h" | ||
| #include "udf/python/python_udf_runtime.h" | ||
|
|
||
| namespace doris { | ||
|
|
||
| namespace fs = std::filesystem; | ||
| namespace bp = boost::process; | ||
|
|
||
| class PythonServerTest : public ::testing::Test { | ||
| protected: | ||
|
|
@@ -136,6 +137,13 @@ class PythonServerTest : public ::testing::Test { | |
| ofs << "# fake server\n"; | ||
| ofs.close(); | ||
| } | ||
|
|
||
| ProcessPtr create_sleep_process() { | ||
| bp::ipstream output_stream; | ||
| std::string sleep_path = fs::exists("/bin/sleep") ? "/bin/sleep" : "/usr/bin/sleep"; | ||
| bp::child child(sleep_path, "60", bp::std_out > output_stream, bp::std_err > bp::null); | ||
| return std::make_shared<PythonUDFProcess>(std::move(child), std::move(output_stream)); | ||
| } | ||
| }; | ||
|
|
||
| // ============================================================================ | ||
|
|
@@ -304,6 +312,39 @@ TEST_F(PythonServerTest, ShutdownAfterFailedInitializationDoesNotCrash) { | |
| EXPECT_NO_THROW(mgr.shutdown()); | ||
| } | ||
|
|
||
| TEST_F(PythonServerTest, ClearUdafStateCacheWithoutProcessesIsNoOp) { | ||
| PythonServerManager mgr; | ||
|
|
||
| EXPECT_NO_THROW(mgr.clear_udaf_state_cache(12345)); | ||
| } | ||
|
|
||
| TEST_F(PythonServerTest, ClearModuleCacheWithoutProcessesIsNoOp) { | ||
| PythonServerManager mgr; | ||
|
|
||
| auto status = mgr.clear_module_cache("/tmp/python_udf_cache"); | ||
| EXPECT_TRUE(status.ok()) << status.to_string(); | ||
| } | ||
|
|
||
| TEST_F(PythonServerTest, BroadcastActionWithInvalidProcessUriReturnsError) { | ||
| PythonServerManager mgr; | ||
| PythonVersion version("3.9.16", test_dir_, test_dir_ + "/bin/python3"); | ||
| ProcessPtr process = create_sleep_process(); | ||
| ASSERT_NE(process, nullptr); | ||
| ASSERT_TRUE(process->is_alive()); | ||
| process->_uri = "invalid-python-flight-uri"; | ||
|
|
||
| mgr.set_process_pool_for_test(version, {process}); | ||
| auto status = mgr._broadcast_action_to_processes( | ||
| "clear_udaf_state_cache", R"({"function_id": 12345})", "function_id=12345"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test will not compile as written. |
||
|
|
||
| EXPECT_FALSE(status.ok()); | ||
| EXPECT_NE(status.to_string().find("clear_udaf_state_cache failed for function_id=12345"), | ||
| std::string::npos); | ||
| EXPECT_NE(status.to_string().find("success=0, failed=1"), std::string::npos); | ||
|
|
||
| mgr.shutdown(); | ||
| } | ||
|
|
||
| // ============================================================================ | ||
| // PythonServerManager::get_client() - client retrieval test | ||
| // ============================================================================ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Clearing
manager.statescan invalidate in-flight UDAF queries. The DROP cleanup task is submitted asynchronously after FE removes the function, while an already-started query can still have a Flight exchange using this sameUDAFStateManagerfor the old function id. If this action runs between that query's CREATE/ACCUMULATE and later SERIALIZE/FINALIZE/DESTROY calls, those operations will find theirplace_identries removed and fail withKeyError/failed UDAF results. Since addingfunction_idto the key already prevents a recreated function from reusing the old class, cleanup should detach the manager fromudaf_state_managerswithout clearing a manager that active exchanges may still reference, or add explicit lifecycle/ref-count coordination. Also consider returning the manager from_get_udaf_state_manager()while still under the lock so a concurrent pop cannot occur between lookup and return.