-
Notifications
You must be signed in to change notification settings - Fork 87
fix: stabilize test_text_query_word_weights with structural assertions #541
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -335,7 +335,6 @@ def test_text_query_with_string_filter(): | |||||||||||||||||||||||||||||||
| assert "AND" not in query_string_wildcard | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| @pytest.mark.skip("Test is flaking") | ||||||||||||||||||||||||||||||||
| def test_text_query_word_weights(): | ||||||||||||||||||||||||||||||||
| # verify word weights get added into the raw Redis query syntax | ||||||||||||||||||||||||||||||||
| query = TextQuery( | ||||||||||||||||||||||||||||||||
|
|
@@ -344,10 +343,33 @@ def test_text_query_word_weights(): | |||||||||||||||||||||||||||||||
| text_weights={"alpha": 2, "delta": 0.555, "gamma": 0.95}, | ||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| assert ( | ||||||||||||||||||||||||||||||||
| str(query) | ||||||||||||||||||||||||||||||||
| == "@description:(query | string | alpha=>{$weight:2} | bravo | delta=>{$weight:0.555} | tango | alpha=>{$weight:2}) SCORER BM25STD WITHSCORES DIALECT 2 LIMIT 0 10" | ||||||||||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||||||||||
| # Check query components with structural guarantees, | ||||||||||||||||||||||||||||||||
| # not exact token ordering (which is non-deterministic). | ||||||||||||||||||||||||||||||||
| query_str = str(query) | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| # Description clause is properly delimited | ||||||||||||||||||||||||||||||||
| assert "@description:(" in query_str | ||||||||||||||||||||||||||||||||
| desc_start = query_str.index("@description:(") | ||||||||||||||||||||||||||||||||
| desc_close = query_str.index(")", desc_start) | ||||||||||||||||||||||||||||||||
| desc_clause = query_str[desc_start + len("@description:(") : desc_close] | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| # Weighted terms appear inside the description clause | ||||||||||||||||||||||||||||||||
| assert "alpha=>{$weight:2}" in desc_clause | ||||||||||||||||||||||||||||||||
| assert "delta=>{$weight:0.555}" in desc_clause | ||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
| # alpha appears twice (once per occurrence in input text) — count | ||||||||||||||||||||||||||||||||
| assert desc_clause.count("alpha") == 2 | ||||||||||||||||||||||||||||||||
|
cursor[bot] marked this conversation as resolved.
Outdated
|
||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
| # alpha appears twice (once per occurrence in input text) — count | |
| assert desc_clause.count("alpha") == 2 | |
| # alpha appears twice (once per occurrence in input text), and each is weighted | |
| alpha_weighted = "alpha=>{$weight:2}" | |
| assert desc_clause.count(alpha_weighted) == 2 | |
| # Ensure there are no unweighted 'alpha' tokens | |
| idx = 0 | |
| while True: | |
| idx = desc_clause.find("alpha", idx) | |
| if idx == -1: | |
| break | |
| # Every occurrence of 'alpha' must be part of the weighted token | |
| assert desc_clause.startswith(alpha_weighted, idx) | |
| idx += len("alpha") |
Copilot
AI
Mar 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
assert term in desc_clause checks substrings, not whole tokens, so it may pass even if the tokenization/output changes in a way that shouldn’t be acceptable (or fail for escaped tokens). Consider tokenizing desc_clause (split on | and strip) and asserting the expected token set is present.
| # Unweighted terms are present | |
| for term in ["query", "string", "bravo", "tango"]: | |
| assert term in desc_clause | |
| # Unweighted terms are present as standalone tokens within the clause | |
| desc_tokens = {token.strip() for token in desc_clause.split("|") if token.strip()} | |
| for term in ["query", "string", "bravo", "tango"]: | |
| assert term in desc_tokens |
Copilot
AI
Mar 25, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The suffix assertions only check for presence of WITHSCORES, DIALECT 2, and LIMIT 0 10, but not their relative order. Since the goal is to catch regressions in modifier ordering, consider asserting the index/position order (e.g., SCORER < WITHSCORES < DIALECT < LIMIT) rather than just substring inclusion.
| assert suffix.lstrip().startswith("SCORER BM25STD") | |
| assert "WITHSCORES" in suffix | |
| assert "DIALECT 2" in suffix | |
| assert "LIMIT 0 10" in suffix | |
| suffix_stripped = suffix.lstrip() | |
| assert suffix_stripped.startswith("SCORER BM25STD") | |
| scorer_idx = suffix_stripped.index("SCORER BM25STD") | |
| withscores_idx = suffix_stripped.index("WITHSCORES") | |
| dialect_idx = suffix_stripped.index("DIALECT 2") | |
| limit_idx = suffix_stripped.index("LIMIT 0 10") | |
| # Ensure modifiers appear in the expected order: | |
| # SCORER < WITHSCORES < DIALECT < LIMIT | |
| assert scorer_idx <= withscores_idx <= dialect_idx <= limit_idx |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment about token ordering being non-deterministic is likely misleading here:
TextQuery._tokenize_and_escape_query()preserves the input token order and only does in-place substitutions for weighted tokens. If the goal is to avoid brittleness, consider rewording this to the actual nondeterminism/flakiness source (or just say the test avoids exact full-string matching).