Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ API Changes

* GITHUB#15295 : Switched to a fixed CFS threshold (Shubham Sharma)

* GITHUB#15486: Change TopGroups.merge() to accept a list of TopGroups (Binlong Gao)

New Features
---------------------
* GITHUB#15505: Upgrade snowball to 2d2e312df56f2ede014a4ffb3e91e6dea43c24be. New stemmer: PolishStemmer (and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.lucene.search.grouping;

import java.util.List;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
Expand Down Expand Up @@ -102,9 +103,9 @@ private static float nonNANmax(float a, float b) {
}

/**
* Merges an array of TopGroups, for example obtained from the second-pass collector across
* multiple shards. Each TopGroups must have been sorted by the same groupSort and docSort, and
* the top groups passed to all second-pass collectors must be the same.
* Merges a list of TopGroups, for example obtained from the second-pass collector across multiple
* shards. Each TopGroups must have been sorted by the same groupSort and docSort, and the top
* groups passed to all second-pass collectors must be the same.
*
* <p><b>NOTE</b>: We can't always compute an exact totalGroupCount. Documents belonging to a
* group may occur on more than one shard and thus the merged totalGroupCount can be higher than
Expand All @@ -114,7 +115,7 @@ private static float nonNANmax(float a, float b) {
* <p><b>NOTE</b>: the topDocs in each GroupDocs is actually an instance of TopDocsAndShards
*/
public static <T> TopGroups<T> merge(
TopGroups<T>[] shardGroups,
List<TopGroups<T>> shardGroups,
Sort groupSort,
Sort docSort,
int docOffset,
Expand All @@ -123,7 +124,7 @@ public static <T> TopGroups<T> merge(

// System.out.println("TopGroups.merge");

if (shardGroups.length == 0) {
if (shardGroups.isEmpty()) {
return null;
}

Expand All @@ -132,7 +133,9 @@ public static <T> TopGroups<T> merge(
// Optionally merge the totalGroupCount.
Integer totalGroupCount = null;

final int numGroups = shardGroups[0].groups.length;
TopGroups<T> firstShardGroup = shardGroups.getFirst();

final int numGroups = firstShardGroup.groups.length;
for (TopGroups<T> shard : shardGroups) {
if (numGroups != shard.groups.length) {
throw new IllegalArgumentException(
Expand All @@ -154,22 +157,22 @@ public static <T> TopGroups<T> merge(

final TopDocs[] shardTopDocs;
if (docSort.equals(Sort.RELEVANCE)) {
shardTopDocs = new TopDocs[shardGroups.length];
shardTopDocs = new TopDocs[shardGroups.size()];
} else {
shardTopDocs = new TopFieldDocs[shardGroups.length];
shardTopDocs = new TopFieldDocs[shardGroups.size()];
}
float totalMaxScore = Float.NaN;

for (int groupIDX = 0; groupIDX < numGroups; groupIDX++) {
final T groupValue = shardGroups[0].groups[groupIDX].groupValue();
final T groupValue = firstShardGroup.groups[groupIDX].groupValue();
// System.out.println(" merge groupValue=" + groupValue + " sortValues=" +
// Arrays.toString(shardGroups[0].groups[groupIDX].groupSortValues));
float maxScore = Float.NaN;
int totalHits = 0;
double scoreSum = 0.0;
for (int shardIDX = 0; shardIDX < shardGroups.length; shardIDX++) {
for (int shardIDX = 0; shardIDX < shardGroups.size(); shardIDX++) {
// System.out.println(" shard=" + shardIDX);
final TopGroups<T> shard = shardGroups[shardIDX];
final TopGroups<T> shard = shardGroups.get(shardIDX);
final GroupDocs<?> shardGroupDocs = shard.groups[groupIDX];
if (groupValue == null) {
if (shardGroupDocs.groupValue() != null) {
Expand Down Expand Up @@ -256,7 +259,7 @@ public static <T> TopGroups<T> merge(
new TotalHits(totalHits, TotalHits.Relation.EQUAL_TO),
mergedScoreDocs,
groupValue,
shardGroups[0].groups[groupIDX].groupSortValues());
firstShardGroup.groups[groupIDX].groupSortValues());
totalMaxScore = nonNANmax(totalMaxScore, maxScore);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,8 @@ public TopGroups<T> reduce(Collection<TopGroupsCollector<T>> collectors) throws
return null;
}

@SuppressWarnings({"unchecked", "rawtypes"})
TopGroups<T>[] shardGroups = (TopGroups<T>[]) shardGroupsList.toArray(TopGroups[]::new);
return TopGroups.merge(
shardGroups, groupSort, sortWithinGroup, 0, maxDocsPerGroup, scoreMergeMode);
shardGroupsList, groupSort, sortWithinGroup, 0, maxDocsPerGroup, scoreMergeMode);
}

public List<TopGroupsCollector<T>> getCollectors() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,16 +330,12 @@ public void testShardedGrouping() throws IOException {
TopGroups<T> singletonTopGroups =
control.getIndexSearcher().search(topLevel, topGroupsCollectorManager);

// TODO why does SearchGroup.merge() take a list but TopGroups.merge() take an array?
@SuppressWarnings("unchecked")
TopGroups<T>[] shardTopGroups = (TopGroups<T>[]) new TopGroups<?>[shards.length];
int j = 0;
List<TopGroups<T>> shardTopGroups = new ArrayList<>();
Comment thread
gaobinlong marked this conversation as resolved.
Outdated
for (Shard shard : shards) {
TopGroupsCollectorManager<T> scm =
new TopGroupsCollectorManager<>(
this::getGroupSelector, mergedGroups, sort, Sort.RELEVANCE, 5, true);
shardTopGroups[j] = shard.getIndexSearcher().search(topLevel, scm);
j++;
shardTopGroups.add(shard.getIndexSearcher().search(topLevel, scm));
}
TopGroups<T> mergedTopGroups =
TopGroups.merge(shardTopGroups, sort, Sort.RELEVANCE, 0, 5, TopGroups.ScoreMergeMode.None);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1528,8 +1528,7 @@ private TopGroups<BytesRef> searchShards(

if (mergedTopGroups != null) {
// Now 2nd pass:
@SuppressWarnings({"unchecked", "rawtypes"})
final TopGroups<BytesRef>[] shardTopGroups = new TopGroups[subSearchers.length];
final List<TopGroups<BytesRef>> shardTopGroups = new ArrayList<>();
Comment thread
gaobinlong marked this conversation as resolved.
Outdated
for (int shardIDX = 0; shardIDX < subSearchers.length; shardIDX++) {
final TopGroupsCollector<?> secondPassCollector =
createSecondPassCollector(
Expand All @@ -1541,11 +1540,15 @@ private TopGroups<BytesRef> searchShards(
docOffset + topNDocs,
getMaxScores);
subSearchers[shardIDX].search(w, secondPassCollector);
shardTopGroups[shardIDX] = getTopGroups(secondPassCollector, 0);
shardTopGroups.add(getTopGroups(secondPassCollector, 0));
if (VERBOSE) {
System.out.println(
" " + shardTopGroups[shardIDX].groups.length + " shard[" + shardIDX + "] groups:");
for (GroupDocs<BytesRef> group : shardTopGroups[shardIDX].groups) {
" "
+ shardTopGroups.get(shardIDX).groups.length
+ " shard["
+ shardIDX
+ "] groups:");
for (GroupDocs<BytesRef> group : shardTopGroups.get(shardIDX).groups) {
System.out.println(
" ["
+ groupToString(group.groupValue())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.lucene.search.grouping;

import java.util.List;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TotalHits;
Expand Down Expand Up @@ -172,7 +173,7 @@ private void narrativeMergeTestImplementation(

final TopGroups<String> mergedTopGroups =
TopGroups.<String>merge(
combineTopGroups(shard1TopGroups, shard2TopGroups),
List.of(shard1TopGroups, shard2TopGroups),
sort /* groupSort */,
sort /* docSort */,
0 /* docOffset */,
Expand Down Expand Up @@ -254,13 +255,4 @@ private static GroupDocs<String>[] combineGroupDocs(
groups[1] = group1;
return groups;
}

private static TopGroups<String>[] combineTopGroups(
TopGroups<String> group0, TopGroups<String> group1) {
@SuppressWarnings({"unchecked", "rawtypes"})
final TopGroups<String>[] groups = new TopGroups[2];
groups[0] = group0;
groups[1] = group1;
return groups;
}
}
Loading