diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp index 74a413cc286..c83c65c42ab 100644 --- a/cpp/include/cudf/strings/replace_re.hpp +++ b/cpp/include/cudf/strings/replace_re.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -53,6 +53,8 @@ std::unique_ptr replace_re( * @brief For each string, replaces any character sequence matching the given patterns * with the corresponding string in the `replacements` column. * + * @deprecated in 26.06. To be removed in a future release. + * * Any null string entries return corresponding null output column entries. * * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. @@ -65,7 +67,7 @@ std::unique_ptr replace_re( * @param mr Device memory resource used to allocate the returned column's device memory * @return New strings column */ -std::unique_ptr replace_re( +[[deprecated]] std::unique_ptr replace_re( strings_column_view const& input, std::vector const& patterns, strings_column_view const& replacements, diff --git a/cpp/src/strings/regex/regexec.cpp b/cpp/src/strings/regex/regexec.cpp index 4ae4a238e2a..9e94c044bb1 100644 --- a/cpp/src/strings/regex/regexec.cpp +++ b/cpp/src/strings/regex/regexec.cpp @@ -145,7 +145,7 @@ void reprog_device::set_working_memory(void* buffer, int32_t thread_count, int32 { _buffer = buffer; _thread_count = thread_count; - _max_insts = _max_insts > 0 ? _max_insts : _insts_count; + _max_insts = max_insts > 0 ? max_insts : _insts_count; } int32_t reprog_device::compute_shared_memory_size() const diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index 587a8970dbc..b163c8b4fcf 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -158,6 +158,7 @@ std::unique_ptr replace_re(strings_column_view const& input, }); auto d_max_prog = **max_prog; + auto const max_insts = d_max_prog.insts_counts(); auto const buffer_size = d_max_prog.working_memory_size(input.size()); auto d_buffer = rmm::device_buffer(buffer_size, stream); @@ -166,8 +167,8 @@ std::unique_ptr replace_re(strings_column_view const& input, std::transform(h_progs.begin(), h_progs.end(), std::back_inserter(progs), - [d_buffer = d_buffer.data(), size = input.size()](auto& prog) { - prog->set_working_memory(d_buffer, size); + [d_buffer = d_buffer.data(), size = input.size(), max_insts](auto& prog) { + prog->set_working_memory(d_buffer, size, max_insts); return *prog; }); auto d_progs = @@ -184,6 +185,8 @@ std::unique_ptr replace_re(strings_column_view const& input, stream, mr); + stream.synchronize(); + return make_strings_column(input.size(), std::move(offsets_column), chars.release(), diff --git a/cpp/tests/streams/strings/replace_test.cpp b/cpp/tests/streams/strings/replace_test.cpp index 7f6cf338416..fccdf60b49c 100644 --- a/cpp/tests/streams/strings/replace_test.cpp +++ b/cpp/tests/streams/strings/replace_test.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -25,17 +25,6 @@ TEST_F(StringsReplaceTest, Replace) cudf::strings::replace(view, target, repl, -1, cudf::test::get_default_stream()); cudf::strings::replace_multiple(view, view, view, cudf::test::get_default_stream()); cudf::strings::replace_slice(view, repl, 1, 2, cudf::test::get_default_stream()); - - auto const pattern = std::string("[a-z]"); - auto const prog = cudf::strings::regex_program::create(pattern); - cudf::strings::replace_re(view, *prog, repl, 1, cudf::test::get_default_stream()); - - cudf::test::strings_column_wrapper repls({"1", "a", " "}); - cudf::strings::replace_re(view, - {pattern, pattern, pattern}, - cudf::strings_column_view(repls), - cudf::strings::regex_flags::DEFAULT, - cudf::test::get_default_stream()); } TEST_F(StringsReplaceTest, ReplaceRegex) @@ -47,13 +36,6 @@ TEST_F(StringsReplaceTest, ReplaceRegex) auto const pattern = std::string("[a-z]"); auto const prog = cudf::strings::regex_program::create(pattern); cudf::strings::replace_re(view, *prog, repl, 1, cudf::test::get_default_stream()); - - cudf::test::strings_column_wrapper repls({"1", "a", " "}); - cudf::strings::replace_re(view, - {pattern, pattern, pattern}, - cudf::strings_column_view(repls), - cudf::strings::regex_flags::DEFAULT, - cudf::test::get_default_stream()); } TEST_F(StringsReplaceTest, ReplaceRegexBackref) diff --git a/cpp/tests/strings/replace_regex_tests.cpp b/cpp/tests/strings/replace_regex_tests.cpp index 9530760f9ca..c9b6aa10568 100644 --- a/cpp/tests/strings/replace_regex_tests.cpp +++ b/cpp/tests/strings/replace_regex_tests.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -51,38 +51,6 @@ TEST_F(StringsReplaceRegexTest, ReplaceRegexTest) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } -TEST_F(StringsReplaceRegexTest, ReplaceMultiRegexTest) -{ - std::vector h_strings{"the quick brown fox jumps over the lazy dog", - "the fat cat lays next to the other accénted cat", - "a slow moving turtlé cannot catch the bird", - "which can be composéd together to form a more complete", - "thé result does not include the value in the sum in", - "", - nullptr}; - - cudf::test::strings_column_wrapper strings( - h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings)); - auto strings_view = cudf::strings_column_view(strings); - - std::vector h_expected{" quick brown fox jumps over lazy dog", - " fat cat lays next to other accénted cat", - "** slow moving turtlé cannot catch bird", - "which can be composéd together to form ** more complete", - "thé result does not include value N sum N", - "", - nullptr}; - - std::vector patterns{"\\bthe\\b", "\\bin\\b", "\\ba\\b"}; - std::vector h_repls{"", "N", "**"}; - cudf::test::strings_column_wrapper repls(h_repls.begin(), h_repls.end()); - auto repls_view = cudf::strings_column_view(repls); - auto results = cudf::strings::replace_re(strings_view, patterns, repls_view); - cudf::test::strings_column_wrapper expected( - h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected)); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); -} - TEST_F(StringsReplaceRegexTest, InvalidRegex) { // these are quantifiers that do not have a preceding character/class @@ -103,13 +71,8 @@ TEST_F(StringsReplaceRegexTest, WithEmptyPattern) auto empty_pattern = std::string(""); auto repl = cudf::string_scalar("bbb"); - std::vector patterns({empty_pattern}); - cudf::test::strings_column_wrapper repls({"bbb"}); - auto repls_view = cudf::strings_column_view(repls); - auto results = cudf::strings::replace_re(strings_view, patterns, repls_view); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, strings); - auto prog = cudf::strings::regex_program::create(empty_pattern); - results = cudf::strings::replace_re(strings_view, *prog, repl); + auto prog = cudf::strings::regex_program::create(empty_pattern); + auto results = cudf::strings::replace_re(strings_view, *prog, repl); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, strings); } @@ -237,17 +200,6 @@ TEST_F(StringsReplaceRegexTest, Multiline) results = cudf::strings::replace_re(sv, *prog, repl); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); - // multi-replace - std::vector patterns({"aba$", "^aba"}); - cudf::test::strings_column_wrapper repls({">", "<"}); - results = cudf::strings::replace_re(sv, patterns, cudf::strings_column_view(repls), multiline); - cudf::test::strings_column_wrapper multi_expected_ml({"bcd\n>\nefg", ">\n< abab\n>", ">"}); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, multi_expected_ml); - - results = cudf::strings::replace_re(sv, patterns, cudf::strings_column_view(repls)); - cudf::test::strings_column_wrapper multi_expected({"bcd\naba\nefg", "<\naba abab\n>", ">"}); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, multi_expected); - // backref-replace auto repl_template = std::string("[\\1]"); pattern = std::string("(^aba)"); diff --git a/python/cudf/cudf/core/accessors/string.py b/python/cudf/cudf/core/accessors/string.py index cc23771cd7f..e29c63695e4 100644 --- a/python/cudf/cudf/core/accessors/string.py +++ b/python/cudf/cudf/core/accessors/string.py @@ -1027,6 +1027,11 @@ def replace( ) if regex: + warnings.warn( + "regex support for multiple replace patterns " + "will be removed in a future version.", + FutureWarning, + ) result = self._column.replace_re( list(pat), as_column(repl, dtype=CUDF_STRING_DTYPE), # type: ignore[arg-type] diff --git a/python/cudf/cudf/tests/series/accessors/test_str.py b/python/cudf/cudf/tests/series/accessors/test_str.py index 969ca8a4c19..e7f1a25b32a 100644 --- a/python/cudf/cudf/tests/series/accessors/test_str.py +++ b/python/cudf/cudf/tests/series/accessors/test_str.py @@ -1535,7 +1535,7 @@ def test_string_replace_multi(): ps = pd.Series(["hello", "goodbye"]) gs = cudf.Series(["hello", "goodbye"]) expect = ps.str.replace("e", "E").str.replace("o", "O") - got = gs.str.replace(["e", "o"], ["E", "O"]) + got = gs.str.replace(["e", "o"], ["E", "O"], regex=False) assert_eq(expect, got) @@ -1543,7 +1543,7 @@ def test_string_replace_multi(): gs = cudf.Series(ps) expect = ps.str.replace("f.", "ba", regex=True) - got = gs.str.replace(["f."], ["ba"], regex=True) + got = gs.str.replace("f.", "ba", regex=True) assert_eq(expect, got) ps = pd.Series(["f.o", "fuz", np.nan])