Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions tests/unit/test_query_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,6 @@ def test_text_query_with_string_filter():
assert "AND" not in query_string_wildcard


@pytest.mark.skip("Test is flaking")
def test_text_query_word_weights():
# verify word weights get added into the raw Redis query syntax
query = TextQuery(
Expand All @@ -344,10 +343,33 @@ def test_text_query_word_weights():
text_weights={"alpha": 2, "delta": 0.555, "gamma": 0.95},
)

assert (
str(query)
== "@description:(query | string | alpha=>{$weight:2} | bravo | delta=>{$weight:0.555} | tango | alpha=>{$weight:2}) SCORER BM25STD WITHSCORES DIALECT 2 LIMIT 0 10"
)
# Check query components with structural guarantees,
# not exact token ordering (which is non-deterministic).
Comment on lines +346 to +347
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment about token ordering being non-deterministic is likely misleading here: TextQuery._tokenize_and_escape_query() preserves the input token order and only does in-place substitutions for weighted tokens. If the goal is to avoid brittleness, consider rewording this to the actual nondeterminism/flakiness source (or just say the test avoids exact full-string matching).

Suggested change
# Check query components with structural guarantees,
# not exact token ordering (which is non-deterministic).
# Check query components with structural guarantees rather than relying
# on exact full-string or token-order matching, to avoid brittle tests.

Copilot uses AI. Check for mistakes.
query_str = str(query)

# Description clause is properly delimited
assert "@description:(" in query_str
desc_start = query_str.index("@description:(")
desc_close = query_str.index(")", desc_start)
desc_clause = query_str[desc_start + len("@description:(") : desc_close]

# Weighted terms appear inside the description clause
assert "alpha=>{$weight:2}" in desc_clause
assert "delta=>{$weight:0.555}" in desc_clause

# alpha appears twice (once per occurrence in input text) — count
assert desc_clause.count("alpha") == 2
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated

Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The alpha frequency check can pass even if only one occurrence is weighted (e.g., one alpha=>{$weight:2} plus one plain alpha still makes desc_clause.count("alpha") == 2). To ensure both occurrences are properly weighted, count the full weighted token (e.g., occurrences of alpha=>{$weight:2}) and/or explicitly assert that an unweighted alpha token is absent after tokenization.

Suggested change
# alpha appears twice (once per occurrence in input text) — count
assert desc_clause.count("alpha") == 2
# alpha appears twice (once per occurrence in input text), and each is weighted
alpha_weighted = "alpha=>{$weight:2}"
assert desc_clause.count(alpha_weighted) == 2
# Ensure there are no unweighted 'alpha' tokens
idx = 0
while True:
idx = desc_clause.find("alpha", idx)
if idx == -1:
break
# Every occurrence of 'alpha' must be part of the weighted token
assert desc_clause.startswith(alpha_weighted, idx)
idx += len("alpha")

Copilot uses AI. Check for mistakes.
# Unweighted terms are present
for term in ["query", "string", "bravo", "tango"]:
assert term in desc_clause
Comment on lines +371 to +373
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assert term in desc_clause checks substrings, not whole tokens, so it may pass even if the tokenization/output changes in a way that shouldn’t be acceptable (or fail for escaped tokens). Consider tokenizing desc_clause (split on | and strip) and asserting the expected token set is present.

Suggested change
# Unweighted terms are present
for term in ["query", "string", "bravo", "tango"]:
assert term in desc_clause
# Unweighted terms are present as standalone tokens within the clause
desc_tokens = {token.strip() for token in desc_clause.split("|") if token.strip()}
for term in ["query", "string", "bravo", "tango"]:
assert term in desc_tokens

Copilot uses AI. Check for mistakes.

# Post-query modifiers follow after the closing paren
suffix = query_str[desc_close + 1 :]
assert suffix.lstrip().startswith("SCORER BM25STD")
assert "WITHSCORES" in suffix
assert "DIALECT 2" in suffix
assert "LIMIT 0 10" in suffix
Copy link

Copilot AI Mar 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The suffix assertions only check for presence of WITHSCORES, DIALECT 2, and LIMIT 0 10, but not their relative order. Since the goal is to catch regressions in modifier ordering, consider asserting the index/position order (e.g., SCORER < WITHSCORES < DIALECT < LIMIT) rather than just substring inclusion.

Suggested change
assert suffix.lstrip().startswith("SCORER BM25STD")
assert "WITHSCORES" in suffix
assert "DIALECT 2" in suffix
assert "LIMIT 0 10" in suffix
suffix_stripped = suffix.lstrip()
assert suffix_stripped.startswith("SCORER BM25STD")
scorer_idx = suffix_stripped.index("SCORER BM25STD")
withscores_idx = suffix_stripped.index("WITHSCORES")
dialect_idx = suffix_stripped.index("DIALECT 2")
limit_idx = suffix_stripped.index("LIMIT 0 10")
# Ensure modifiers appear in the expected order:
# SCORER < WITHSCORES < DIALECT < LIMIT
assert scorer_idx <= withscores_idx <= dialect_idx <= limit_idx

Copilot uses AI. Check for mistakes.

# raise an error if weights are not positive floats
with pytest.raises(ValueError):
Expand Down
Loading