Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions cpp/include/cudf/strings/replace_re.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
Expand Down Expand Up @@ -53,6 +53,8 @@ std::unique_ptr<column> replace_re(
* @brief For each string, replaces any character sequence matching the given patterns
* with the corresponding string in the `replacements` column.
*
* @deprecated in 26.06. To be removed in a future release.
*
* Any null string entries return corresponding null output column entries.
*
* See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
Expand All @@ -65,7 +67,7 @@ std::unique_ptr<column> replace_re(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings column
*/
std::unique_ptr<column> replace_re(
[[deprecated]] std::unique_ptr<column> replace_re(
strings_column_view const& input,
std::vector<std::string> const& patterns,
strings_column_view const& replacements,
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/regex/regexec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ void reprog_device::set_working_memory(void* buffer, int32_t thread_count, int32
{
_buffer = buffer;
_thread_count = thread_count;
_max_insts = _max_insts > 0 ? _max_insts : _insts_count;
_max_insts = max_insts > 0 ? max_insts : _insts_count;
}

int32_t reprog_device::compute_shared_memory_size() const
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/strings/replace/multi_re.cu
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ std::unique_ptr<column> replace_re(strings_column_view const& input,
});

auto d_max_prog = **max_prog;
auto const max_insts = d_max_prog.insts_counts();
auto const buffer_size = d_max_prog.working_memory_size(input.size());
auto d_buffer = rmm::device_buffer(buffer_size, stream);

Expand All @@ -166,8 +167,8 @@ std::unique_ptr<column> replace_re(strings_column_view const& input,
std::transform(h_progs.begin(),
h_progs.end(),
std::back_inserter(progs),
[d_buffer = d_buffer.data(), size = input.size()](auto& prog) {
prog->set_working_memory(d_buffer, size);
[d_buffer = d_buffer.data(), size = input.size(), max_insts](auto& prog) {
prog->set_working_memory(d_buffer, size, max_insts);
return *prog;
});
auto d_progs =
Expand All @@ -184,6 +185,8 @@ std::unique_ptr<column> replace_re(strings_column_view const& input,
stream,
mr);

stream.synchronize();

return make_strings_column(input.size(),
std::move(offsets_column),
chars.release(),
Expand Down
20 changes: 1 addition & 19 deletions cpp/tests/streams/strings/replace_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -25,17 +25,6 @@ TEST_F(StringsReplaceTest, Replace)
cudf::strings::replace(view, target, repl, -1, cudf::test::get_default_stream());
cudf::strings::replace_multiple(view, view, view, cudf::test::get_default_stream());
cudf::strings::replace_slice(view, repl, 1, 2, cudf::test::get_default_stream());

auto const pattern = std::string("[a-z]");
auto const prog = cudf::strings::regex_program::create(pattern);
cudf::strings::replace_re(view, *prog, repl, 1, cudf::test::get_default_stream());

cudf::test::strings_column_wrapper repls({"1", "a", " "});
cudf::strings::replace_re(view,
{pattern, pattern, pattern},
cudf::strings_column_view(repls),
cudf::strings::regex_flags::DEFAULT,
cudf::test::get_default_stream());
}

TEST_F(StringsReplaceTest, ReplaceRegex)
Expand All @@ -47,13 +36,6 @@ TEST_F(StringsReplaceTest, ReplaceRegex)
auto const pattern = std::string("[a-z]");
auto const prog = cudf::strings::regex_program::create(pattern);
cudf::strings::replace_re(view, *prog, repl, 1, cudf::test::get_default_stream());

cudf::test::strings_column_wrapper repls({"1", "a", " "});
cudf::strings::replace_re(view,
{pattern, pattern, pattern},
cudf::strings_column_view(repls),
cudf::strings::regex_flags::DEFAULT,
cudf::test::get_default_stream());
}

TEST_F(StringsReplaceTest, ReplaceRegexBackref)
Expand Down
54 changes: 3 additions & 51 deletions cpp/tests/strings/replace_regex_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -51,38 +51,6 @@ TEST_F(StringsReplaceRegexTest, ReplaceRegexTest)
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
}

TEST_F(StringsReplaceRegexTest, ReplaceMultiRegexTest)
{
std::vector<char const*> h_strings{"the quick brown fox jumps over the lazy dog",
"the fat cat lays next to the other accénted cat",
"a slow moving turtlé cannot catch the bird",
"which can be composéd together to form a more complete",
"thé result does not include the value in the sum in",
"",
nullptr};

cudf::test::strings_column_wrapper strings(
h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
auto strings_view = cudf::strings_column_view(strings);

std::vector<char const*> h_expected{" quick brown fox jumps over lazy dog",
" fat cat lays next to other accénted cat",
"** slow moving turtlé cannot catch bird",
"which can be composéd together to form ** more complete",
"thé result does not include value N sum N",
"",
nullptr};

std::vector<std::string> patterns{"\\bthe\\b", "\\bin\\b", "\\ba\\b"};
std::vector<std::string> h_repls{"", "N", "**"};
cudf::test::strings_column_wrapper repls(h_repls.begin(), h_repls.end());
auto repls_view = cudf::strings_column_view(repls);
auto results = cudf::strings::replace_re(strings_view, patterns, repls_view);
cudf::test::strings_column_wrapper expected(
h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
}

TEST_F(StringsReplaceRegexTest, InvalidRegex)
{
// these are quantifiers that do not have a preceding character/class
Expand All @@ -103,13 +71,8 @@ TEST_F(StringsReplaceRegexTest, WithEmptyPattern)

auto empty_pattern = std::string("");
auto repl = cudf::string_scalar("bbb");
std::vector<std::string> patterns({empty_pattern});
cudf::test::strings_column_wrapper repls({"bbb"});
auto repls_view = cudf::strings_column_view(repls);
auto results = cudf::strings::replace_re(strings_view, patterns, repls_view);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, strings);
auto prog = cudf::strings::regex_program::create(empty_pattern);
results = cudf::strings::replace_re(strings_view, *prog, repl);
auto prog = cudf::strings::regex_program::create(empty_pattern);
auto results = cudf::strings::replace_re(strings_view, *prog, repl);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, strings);
}

Expand Down Expand Up @@ -237,17 +200,6 @@ TEST_F(StringsReplaceRegexTest, Multiline)
results = cudf::strings::replace_re(sv, *prog, repl);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);

// multi-replace
std::vector<std::string> patterns({"aba$", "^aba"});
cudf::test::strings_column_wrapper repls({">", "<"});
results = cudf::strings::replace_re(sv, patterns, cudf::strings_column_view(repls), multiline);
cudf::test::strings_column_wrapper multi_expected_ml({"bcd\n>\nefg", ">\n< abab\n>", ">"});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, multi_expected_ml);

results = cudf::strings::replace_re(sv, patterns, cudf::strings_column_view(repls));
cudf::test::strings_column_wrapper multi_expected({"bcd\naba\nefg", "<\naba abab\n>", ">"});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, multi_expected);

// backref-replace
auto repl_template = std::string("[\\1]");
pattern = std::string("(^aba)");
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/core/accessors/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,6 +1027,11 @@ def replace(
)

if regex:
warnings.warn(
"regex support for multiple replace patterns "
"will be removed in a future version.",
FutureWarning,
)
result = self._column.replace_re(
list(pat),
as_column(repl, dtype=CUDF_STRING_DTYPE), # type: ignore[arg-type]
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/series/accessors/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,15 +1535,15 @@ def test_string_replace_multi():
ps = pd.Series(["hello", "goodbye"])
gs = cudf.Series(["hello", "goodbye"])
expect = ps.str.replace("e", "E").str.replace("o", "O")
got = gs.str.replace(["e", "o"], ["E", "O"])
got = gs.str.replace(["e", "o"], ["E", "O"], regex=False)

assert_eq(expect, got)

ps = pd.Series(["foo", "fuz", np.nan])
gs = cudf.Series(ps)

expect = ps.str.replace("f.", "ba", regex=True)
got = gs.str.replace(["f."], ["ba"], regex=True)
got = gs.str.replace("f.", "ba", regex=True)
assert_eq(expect, got)

ps = pd.Series(["f.o", "fuz", np.nan])
Expand Down
Loading