Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ API Changes

* GITHUB#15295 : Switched to a fixed CFS threshold (Shubham Sharma)

* GITHUB#15897: Change TopGroups.merge() to accept a list of TopGroups (Binlong Gao)

* GITHUB#15627 : Deferred lambda in TermStates.java according to prefetch (Shubham Sharma)

New Features
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ public class BlockGroupingCollector extends SimpleCollector {
private LeafReaderContext currentReaderContext;

private int topGroupDoc;
private int topGroupDocIdx;
private int totalHitCount;
private int totalGroupCount;
private int docBase;
Expand All @@ -100,6 +101,7 @@ private static final class OneGroup {
LeafReaderContext readerContext;
// int groupOrd;
int topGroupDoc;
int topGroupDocIdx;
int[] docs;
float[] scores;
int count;
Expand Down Expand Up @@ -145,6 +147,7 @@ private void processGroup() throws IOException {
final OneGroup og = new OneGroup();
og.count = subDocUpto;
og.topGroupDoc = docBase + topGroupDoc;
og.topGroupDocIdx = topGroupDocIdx;
og.docs = pendingSubDocs;
pendingSubDocs = new int[10];
if (needsScores) {
Expand Down Expand Up @@ -177,6 +180,7 @@ private void processGroup() throws IOException {
assert og != null;
og.count = subDocUpto;
og.topGroupDoc = docBase + topGroupDoc;
og.topGroupDocIdx = topGroupDocIdx;
// Swap pending docs
final int[] savDocs = og.docs;
og.docs = pendingSubDocs;
Expand Down Expand Up @@ -276,8 +280,6 @@ public TopGroups<?> getTopGroups(

final Score fakeScorer = new Score();

float maxScore = Float.MIN_VALUE;

@SuppressWarnings({"unchecked", "rawtypes"})
final GroupDocs<Object>[] groups = new GroupDocs[groupQueue.size() - groupOffset];
for (int downTo = groupQueue.size() - groupOffset - 1; downTo >= 0; downTo--) {
Expand All @@ -302,14 +304,13 @@ public TopGroups<?> getTopGroups(
.newCollector(); // TODO: disable exact counts?
}

float groupMaxScore = needsScores ? Float.NEGATIVE_INFINITY : Float.NaN;
float groupMaxScore = needsScores ? og.scores[og.topGroupDocIdx] : Float.NaN;
LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
leafCollector.setScorer(fakeScorer);
for (int docIDX = 0; docIDX < og.count; docIDX++) {
final int doc = og.docs[docIDX];
if (needsScores) {
fakeScorer.score = og.scores[docIDX];
groupMaxScore = Math.max(groupMaxScore, fakeScorer.score);
}
leafCollector.collect(doc);
}
Expand All @@ -334,7 +335,6 @@ public TopGroups<?> getTopGroups(
topDocs.scoreDocs,
null,
groupSortValues);
maxScore = Math.max(maxScore, groupMaxScore);
}

/*
Expand All @@ -352,7 +352,7 @@ public TopGroups<?> getTopGroups(
totalHitCount,
totalGroupedHitCount,
groups,
maxScore),
groups[0].maxScore()),
totalGroupCount);
}

Expand Down Expand Up @@ -405,6 +405,7 @@ public void collect(int doc) throws IOException {
fc.setBottom(bottomSlot);
}
topGroupDoc = doc;
topGroupDocIdx = subDocUpto - 1;
} else {
// Compare to bottomSlot
for (int compIDX = 0; ; compIDX++) {
Expand Down Expand Up @@ -433,6 +434,7 @@ public void collect(int doc) throws IOException {
fc.setBottom(bottomSlot);
}
topGroupDoc = doc;
topGroupDocIdx = subDocUpto - 1;
}
} else {
// We're not sure this group will make it into the
Expand Down Expand Up @@ -463,6 +465,7 @@ public void collect(int doc) throws IOException {
fc.setBottom(bottomSlot);
}
topGroupDoc = doc;
topGroupDocIdx = subDocUpto - 1;
// System.out.println(" doc competes w/ top groups");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.lucene.search.grouping;

import java.util.List;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
Expand Down Expand Up @@ -102,9 +103,9 @@ private static float nonNANmax(float a, float b) {
}

/**
* Merges an array of TopGroups, for example obtained from the second-pass collector across
* multiple shards. Each TopGroups must have been sorted by the same groupSort and docSort, and
* the top groups passed to all second-pass collectors must be the same.
* Merges a list of TopGroups, for example obtained from the second-pass collector across multiple
* shards. Each TopGroups must have been sorted by the same groupSort and docSort, and the top
* groups passed to all second-pass collectors must be the same.
*
* <p><b>NOTE</b>: We can't always compute an exact totalGroupCount. Documents belonging to a
* group may occur on more than one shard and thus the merged totalGroupCount can be higher than
Expand All @@ -114,7 +115,7 @@ private static float nonNANmax(float a, float b) {
* <p><b>NOTE</b>: the topDocs in each GroupDocs is actually an instance of TopDocsAndShards
*/
public static <T> TopGroups<T> merge(
TopGroups<T>[] shardGroups,
List<TopGroups<T>> shardGroups,
Sort groupSort,
Sort docSort,
int docOffset,
Expand All @@ -123,7 +124,7 @@ public static <T> TopGroups<T> merge(

// System.out.println("TopGroups.merge");

if (shardGroups.length == 0) {
if (shardGroups.isEmpty()) {
return null;
}

Expand All @@ -132,7 +133,9 @@ public static <T> TopGroups<T> merge(
// Optionally merge the totalGroupCount.
Integer totalGroupCount = null;

final int numGroups = shardGroups[0].groups.length;
TopGroups<T> firstShardGroup = shardGroups.getFirst();

final int numGroups = firstShardGroup.groups.length;
for (TopGroups<T> shard : shardGroups) {
if (numGroups != shard.groups.length) {
throw new IllegalArgumentException(
Expand All @@ -154,22 +157,22 @@ public static <T> TopGroups<T> merge(

final TopDocs[] shardTopDocs;
if (docSort.equals(Sort.RELEVANCE)) {
shardTopDocs = new TopDocs[shardGroups.length];
shardTopDocs = new TopDocs[shardGroups.size()];
} else {
shardTopDocs = new TopFieldDocs[shardGroups.length];
shardTopDocs = new TopFieldDocs[shardGroups.size()];
}
float totalMaxScore = Float.NaN;

for (int groupIDX = 0; groupIDX < numGroups; groupIDX++) {
final T groupValue = shardGroups[0].groups[groupIDX].groupValue();
final T groupValue = firstShardGroup.groups[groupIDX].groupValue();
// System.out.println(" merge groupValue=" + groupValue + " sortValues=" +
// Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
float maxScore = Float.NaN;
int totalHits = 0;
double scoreSum = 0.0;
for (int shardIDX = 0; shardIDX < shardGroups.length; shardIDX++) {
for (int shardIDX = 0; shardIDX < shardGroups.size(); shardIDX++) {
// System.out.println(" shard=" + shardIDX);
final TopGroups<T> shard = shardGroups[shardIDX];
final TopGroups<T> shard = shardGroups.get(shardIDX);
final GroupDocs<?> shardGroupDocs = shard.groups[groupIDX];
if (groupValue == null) {
if (shardGroupDocs.groupValue() != null) {
Expand Down Expand Up @@ -256,7 +259,7 @@ public static <T> TopGroups<T> merge(
new TotalHits(totalHits, TotalHits.Relation.EQUAL_TO),
mergedScoreDocs,
groupValue,
shardGroups[0].groups[groupIDX].groupSortValues());
firstShardGroup.groups[groupIDX].groupSortValues());
totalMaxScore = nonNANmax(totalMaxScore, maxScore);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,8 @@ public TopGroups<T> reduce(Collection<TopGroupsCollector<T>> collectors) throws
return null;
}

@SuppressWarnings({"unchecked", "rawtypes"})
TopGroups<T>[] shardGroups = (TopGroups<T>[]) shardGroupsList.toArray(TopGroups[]::new);
return TopGroups.merge(
shardGroups, groupSort, sortWithinGroup, 0, maxDocsPerGroup, scoreMergeMode);
shardGroupsList, groupSort, sortWithinGroup, 0, maxDocsPerGroup, scoreMergeMode);
}

public List<TopGroupsCollector<T>> getCollectors() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,16 +330,12 @@ public void testShardedGrouping() throws IOException {
TopGroups<T> singletonTopGroups =
control.getIndexSearcher().search(topLevel, topGroupsCollectorManager);

// TODO why does SearchGroup.merge() take a list but TopGroups.merge() take an array?
@SuppressWarnings("unchecked")
TopGroups<T>[] shardTopGroups = (TopGroups<T>[]) new TopGroups<?>[shards.length];
int j = 0;
List<TopGroups<T>> shardTopGroups = new ArrayList<>(shards.length);
for (Shard shard : shards) {
TopGroupsCollectorManager<T> scm =
new TopGroupsCollectorManager<>(
this::getGroupSelector, mergedGroups, sort, Sort.RELEVANCE, 5, true);
shardTopGroups[j] = shard.getIndexSearcher().search(topLevel, scm);
j++;
shardTopGroups.add(shard.getIndexSearcher().search(topLevel, scm));
}
TopGroups<T> mergedTopGroups =
TopGroups.merge(shardTopGroups, sort, Sort.RELEVANCE, 0, 5, TopGroups.ScoreMergeMode.None);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1528,8 +1528,7 @@ private TopGroups<BytesRef> searchShards(

if (mergedTopGroups != null) {
// Now 2nd pass:
@SuppressWarnings({"unchecked", "rawtypes"})
final TopGroups<BytesRef>[] shardTopGroups = new TopGroups[subSearchers.length];
final List<TopGroups<BytesRef>> shardTopGroups = new ArrayList<>(subSearchers.length);
for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
final TopGroupsCollector<?> secondPassCollector =
createSecondPassCollector(
Expand All @@ -1541,11 +1540,15 @@ private TopGroups<BytesRef> searchShards(
docOffset + topNDocs,
getMaxScores);
subSearchers[shardIDX].search(w, secondPassCollector);
shardTopGroups[shardIDX] = getTopGroups(secondPassCollector, 0);
shardTopGroups.add(getTopGroups(secondPassCollector, 0));
if (VERBOSE) {
System.out.println(
" " + shardTopGroups[shardIDX].groups.length + " shard[" + shardIDX + "] groups:");
for (GroupDocs<BytesRef> group : shardTopGroups[shardIDX].groups) {
" "
+ shardTopGroups.get(shardIDX).groups.length
+ " shard["
+ shardIDX
+ "] groups:");
for (GroupDocs<BytesRef> group : shardTopGroups.get(shardIDX).groups) {
System.out.println(
" ["
+ groupToString(group.groupValue())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.lucene.search.grouping;

import java.util.List;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TotalHits;
Expand Down Expand Up @@ -172,7 +173,7 @@ private void narrativeMergeTestImplementation(

final TopGroups<String> mergedTopGroups =
TopGroups.<String>merge(
combineTopGroups(shard1TopGroups, shard2TopGroups),
List.of(shard1TopGroups, shard2TopGroups),
sort /* groupSort */,
sort /* docSort */,
0 /* docOffset */,
Expand Down Expand Up @@ -254,13 +255,4 @@ private static GroupDocs<String>[] combineGroupDocs(
groups[1] = group1;
return groups;
}

private static TopGroups<String>[] combineTopGroups(
TopGroups<String> group0, TopGroups<String> group1) {
@SuppressWarnings({"unchecked", "rawtypes"})
final TopGroups<String>[] groups = new TopGroups[2];
groups[0] = group0;
groups[1] = group1;
return groups;
}
}
Loading