Skip to content

Commit 03d0ba7

Browse files
authored
Use exact search in VectorSimilarityQuery when traversalSimilarity is -infinity (#15836)
1 parent 1ccfbf8 commit 03d0ba7

3 files changed

Lines changed: 34 additions & 15 deletions

File tree

lucene/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,9 @@ Bug Fixes
282282
native code at all (wrong platform); disable native code on 32 bit platforms.
283283
(Uwe Schindler, thanks to Christian Ortlepp for finding the issue)
284284

285+
* GITHUB#15836: Use exact search in VectorSimilarityQuery when traversalSimilarity is -infinity to
286+
guarantee completeness. (Sagar Upadhyaya)
287+
285288
* GITHUB#12419, GITHUB#15119, GITHUB#15864: Fix circular dependency deadlock in
286289
TestSecrets initialization (Namgyu Kim, Uwe Schindler)
287290

lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,15 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
122122

123123
// If there is no filter
124124
if (filterWeight == null) {
125-
// Return exhaustive results
125+
if (traversalSimilarity == Float.NEGATIVE_INFINITY) {
126+
// When traversalSimilarity is -∞, the intent is to find all vectors above
127+
// resultSimilarity. The approximate graph search may miss nodes,
128+
// so use exact search to guarantee completeness.
129+
AcceptDocs acceptDocs = AcceptDocs.fromLiveDocs(liveDocs, leafReader.maxDoc());
130+
return VectorSimilarityScorerSupplier.fromAcceptDocs(
131+
boost, createVectorScorer(context), acceptDocs.iterator(), resultSimilarity);
132+
}
133+
// Return results via approximate graph search
126134
TopDocs results =
127135
approximateSearch(
128136
context,
@@ -150,6 +158,13 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
150158
return null;
151159
}
152160

161+
if (traversalSimilarity == Float.NEGATIVE_INFINITY) {
162+
// When traversalSimilarity is -∞, skip approximate search and go straight
163+
// to exact search over the filtered docs.
164+
return VectorSimilarityScorerSupplier.fromAcceptDocs(
165+
boost, createVectorScorer(context), acceptDocs.iterator(), resultSimilarity);
166+
}
167+
153168
// Perform an approximate search
154169
TopDocs results =
155170
approximateSearch(context, acceptDocs, cardinality, timeLimitingKnnCollectorManager);

lucene/core/src/test/org/apache/lucene/search/BaseVectorSimilarityQueryTestCase.java

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,14 @@ public void testFallbackToExact() throws IOException {
456456

457457
// Falls back to exact search
458458
expectThrows(UnsupportedOperationException.class, () -> searcher.count(query));
459+
460+
// When traversalSimilarity is -∞ without a filter, exact search should be used directly
461+
Query exactQuery =
462+
getThrowingVectorQuery(
463+
vectorField, queryVector, Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY, null);
464+
465+
// Falls back to exact search because traversalSimilarity is -∞
466+
expectThrows(UnsupportedOperationException.class, () -> searcher.count(exactQuery));
459467
}
460468
}
461469

@@ -510,15 +518,13 @@ public void testTimeout() throws IOException {
510518
// This query is cacheable, explicitly prevent it
511519
searcher.setQueryCache(null);
512520

521+
// Use Math.nextUp(Float.NEGATIVE_INFINITY) to ensure approximate graph search is used,
522+
// since Float.NEGATIVE_INFINITY now triggers exact search which bypasses graph traversal
523+
float nearNegInf = Math.nextUp(Float.NEGATIVE_INFINITY);
524+
513525
Query query =
514-
new CountingQuery(
515-
getVectorQuery(
516-
vectorField,
517-
queryVector,
518-
Float.NEGATIVE_INFINITY,
519-
Float.NEGATIVE_INFINITY,
520-
null));
521-
assertEquals(numDocs, searcher.count(query)); // Expect some results without timeout
526+
new CountingQuery(getVectorQuery(vectorField, queryVector, nearNegInf, nearNegInf, null));
527+
assertEquals(numDocs, searcher.count(query)); // Expect all results without timeout
522528

523529
searcher.setTimeout(() -> true); // Immediately timeout
524530
assertEquals(0, searcher.count(query)); // Expect no results with the timeout
@@ -534,12 +540,7 @@ public void testTimeout() throws IOException {
534540
Query filter = IntField.newSetQuery(idField, getFiltered(numFiltered));
535541
Query filteredQuery =
536542
new CountingQuery(
537-
getVectorQuery(
538-
vectorField,
539-
queryVector,
540-
Float.NEGATIVE_INFINITY,
541-
Float.NEGATIVE_INFINITY,
542-
filter));
543+
getVectorQuery(vectorField, queryVector, nearNegInf, nearNegInf, filter));
543544

544545
searcher.setTimeout(() -> false); // Set a timeout which is never met
545546
assertEquals(numFiltered, searcher.count(filteredQuery));

0 commit comments

Comments
 (0)