apache
diff --git a/‎lucene/CHANGES.txt‎
Lines changed: 10 additions & 0 deletions b/‎lucene/CHANGES.txt‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎lucene/MIGRATE.md‎
Lines changed: 14 additions & 2 deletions b/‎lucene/MIGRATE.md‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/PhraseScorerBenchmark.java‎
Lines changed: 141 additions & 0 deletions b/‎lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/PhraseScorerBenchmark.java‎
Lines changed: 141 additions & 0 deletions
diff --git a/‎lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java‎
Lines changed: 1 addition & 5 deletions b/‎lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java‎
Lines changed: 7 additions & 45 deletions b/‎lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java‎
Lines changed: 7 additions & 45 deletions
@@ -82,6 +82,12 @@ API Changes
   To force a classic DFA execution, use Operations.determinize() and AutomatonQuery.
   (Dimitris Rempapis)
 
+* GITHUB#15961: Removed WildcardQuery constructor overloads that exposed determinization controls
+  (determinizeWorkLimit). Determinization is now done only as-needed.
+  To force a classic DFA execution, use Operations.determinize() and AutomatonQuery.
+  Also removed QueryParserBase.setDeterminizeWorkLimit/getDeterminizeWorkLimit, which are
+  no longer needed. (Dimitris Rempapis)
+
 New Features
 ---------------------
 * GITHUB#15505: Upgrade snowball to 2d2e312df56f2ede014a4ffb3e91e6dea43c24be. New stemmer: PolishStemmer (and
@@ -298,6 +304,8 @@ Improvements
 
 Optimizations
 ---------------------
+* GITHUB#15861: Optimise PhraseScorer by short circuiting non competitive documents in TOP_SCORES mode. (Prithvi S)
+
 * GITHUB#15637: Lazily allocate ByteArrayDataInputs in SegmentTermsEnumFrame#15637 (Misha Dmitriev)
 
 * GITHUB#15718 Skip per-document stored field reads on sorted indices when no stored fields are present (Francisco Fernández Castaño)
@@ -389,6 +397,8 @@ Other
 
 * GITHUB#15951: Fix WindowsFS onClose race condition (Szymon Bialkowski)
 
+* GITHUB#15960: Move parent field from DWPT to IndexingChain. (Tim Brooks)
+
 ======================= Lucene 10.4.0 =======================
 
 API Changes
 
@@ -159,9 +159,9 @@ iwc.getConfig().getCodec().compoundFormat().getShouldUseCompoundFile();
 iwc.getConfig().getCodec().compoundFormat().getMaxCFSSegmentSizeMB();
 ```
 
-### Implicit determinization removed from RegexpQuery
+### Implicit determinization removed from RegexpQuery and WildcardQuery
 
-Previously, RegexpQuery would use DFA execution by default, even if it might be inefficient.
+Previously, RegexpQuery and WildcardQuery would use DFA execution by default, even if it might be inefficient.
 
 RegexpQuery will now only [determinize as-needed](https://swtch.com/~rsc/regexp/regexp1.html). This might be
 faster or slower depending upon your queries.
@@ -174,6 +174,18 @@ Automaton dfa = Operations.determinize(new RegExp(re).toAutomaton(), 10000);
 Query query = new AutomatonQuery(new Term("myfield", re), dfa);
 ```
 
+Similarly for WildcardQuery, the `determinizeWorkLimit` parameter has been removed from `WildcardQuery` constructors and from
+`WildcardQuery.toAutomaton`. `QueryParserBase.setDeterminizeWorkLimit` and `getDeterminizeWorkLimit`
+have also been removed.
+
+To force the previous behavior, use:
+
+```java
+String pattern = "foo*bar";
+Automaton dfa = Operations.determinize(WildcardQuery.toAutomaton(new Term("myfield", pattern)), 10000);
+Query query = new AutomatonQuery(new Term("myfield", pattern), dfa);
+```
+
 ### CollectionStatistics and TermStatistics have been renamed to FieldStats and TermStats (GITHUB#15929)
 
 Corresponding methods and parameters have been renamed accordingly.
 
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.benchmark.jmh;
+
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldCollectorManager;
+import org.apache.lucene.search.TopScoreDocCollectorManager;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MMapDirectory;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 5, time = 3)
+@Measurement(iterations = 10, time = 5)
+@Fork(
+    value = 3,
+    jvmArgsAppend = {"-Xmx2g", "-Xms2g"})
+public class PhraseScorerBenchmark {
+
+  private static final int NUM_HITS = 10;
+
+  private Directory dir;
+  private IndexReader reader;
+  private IndexSearcher searcher;
+  private PhraseQuery exactQuery;
+  private PhraseQuery sloppyQuery;
+
+  @Setup(Level.Trial)
+  public void setUp() throws IOException {
+    dir = new MMapDirectory(java.nio.file.Files.createTempDirectory("benchmark"));
+    IndexWriterConfig config = new IndexWriterConfig();
+    try (IndexWriter writer = new IndexWriter(dir, config)) {
+      // Create a corpus where most docs contain the individual query terms but only a small
+      // fraction contain the actual phrase. This maximises the number of documents whose maxFreq
+      // upper-bound check allows short-circuiting.
+      for (int i = 0; i < 1_000_000; i++) {
+        Document doc = new Document();
+        if (i % 1000 == 0) {
+          // 0.1% of docs: exact phrase match
+          doc.add(
+              new TextField(
+                  "text", "the quick brown fox jumped over the lazy dog", Field.Store.NO));
+        } else if (i % 2 == 0) {
+          // 50% of docs: terms present but not as a phrase (high freq, no match)
+          StringBuilder sb = new StringBuilder("quick ");
+          for (int j = 0; j < 100; j++) sb.append("padding ");
+          sb.append("fox");
+          doc.add(new TextField("text", sb.toString(), Field.Store.NO));
+        } else {
+          // 50% of docs: no query terms at all
+          doc.add(new TextField("text", "unrelated words", Field.Store.NO));
+        }
+        writer.addDocument(doc);
+      }
+    }
+    reader = DirectoryReader.open(dir);
+    searcher = new IndexSearcher(reader);
+    exactQuery = new PhraseQuery("text", "quick", "brown", "fox");
+    sloppyQuery = new PhraseQuery(10, "text", "quick", "fox");
+  }
+
+  @TearDown(Level.Trial)
+  public void tearDown() throws IOException {
+    reader.close();
+    dir.close();
+  }
+
+  @Benchmark
+  public TopDocs benchmarkExactTopScores() throws IOException {
+    return searcher.search(exactQuery, 10);
+  }
+
+  @Benchmark
+  public TopDocs benchmarkSloppyTopScores() throws IOException {
+    return searcher.search(sloppyQuery, 10);
+  }
+
+  @Benchmark
+  public TopDocs benchmarkExactComplete() throws IOException {
+    return searcher.search(
+        exactQuery, new TopScoreDocCollectorManager(NUM_HITS, Integer.MAX_VALUE));
+  }
+
+  @Benchmark
+  public TopDocs benchmarkExactCompleteNoScores() throws IOException {
+    return searcher.search(
+        exactQuery, new TopFieldCollectorManager(Sort.INDEXORDER, NUM_HITS, Integer.MAX_VALUE));
+  }
+
+  @Benchmark
+  public TopDocs benchmarkSloppyComplete() throws IOException {
+    return searcher.search(
+        sloppyQuery, new TopScoreDocCollectorManager(NUM_HITS, Integer.MAX_VALUE));
+  }
+
+  @Benchmark
+  public TopDocs benchmarkSloppyCompleteNoScores() throws IOException {
+    return searcher.search(
+        sloppyQuery, new TopFieldCollectorManager(Sort.INDEXORDER, NUM_HITS, Integer.MAX_VALUE));
+  }
+}
@@ -31,7 +31,6 @@
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.WildcardQuery;
-import org.apache.lucene.util.automaton.Operations;
 
 /**
  * A QueryMaker that uses common and uncommon actual Wikipedia queries for searching the English
@@ -132,10 +131,7 @@ public class EnwikiQueryMaker extends AbstractQueryMaker {
 
   private static Query[] getPrebuiltQueries(String field) {
     WildcardQuery wcq =
-        new WildcardQuery(
-            new Term(field, "fo*"),
-            Operations.DEFAULT_DETERMINIZE_WORK_LIMIT,
-            MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE);
+        new WildcardQuery(new Term(field, "fo*"), MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE);
     // be wary of unanalyzed text
     return new Query[] {
       new SpanFirstQuery(new SpanTermQuery(new Term(field, "ford")), 5),
 
@@ -24,7 +24,6 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
-import java.util.NoSuchElementException;
 import java.util.Objects;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
@@ -33,7 +32,6 @@
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
@@ -140,7 +138,7 @@ void abort() throws IOException {
   private int[] deleteDocIDs = new int[0];
   private int numDeletedDocIds = 0;
   private final int indexMajorVersionCreated;
-  private final IndexingChain.ReservedField<NumericDocValuesField> parentField;
+  private final boolean hasParentField;
 
   DocumentsWriterPerThread(
       int indexMajorVersionCreated,
@@ -197,13 +195,7 @@ void abort() throws IOException {
             fieldInfos,
             indexWriterConfig,
             this::onAbortingException);
-    if (indexWriterConfig.getParentField() != null) {
-      this.parentField =
-          indexingChain.markAsReserved(
-              new NumericDocValuesField(indexWriterConfig.getParentField(), -1));
-    } else {
-      this.parentField = null;
-    }
+    this.hasParentField = indexWriterConfig.getParentField() != null;
   }
 
   final void testPoint(String message) {
@@ -249,12 +241,10 @@ long updateDocuments(
         final Iterator<? extends Iterable<? extends IndexableField>> iterator = docs.iterator();
         while (iterator.hasNext()) {
           Iterable<? extends IndexableField> doc = iterator.next();
-          if (parentField != null) {
-            if (iterator.hasNext() == false) {
-              doc = addParentField(doc, parentField);
-            }
-          } else if (segmentInfo.getIndexSort() != null
-              && iterator.hasNext()
+          final boolean isLastDoc = iterator.hasNext() == false;
+          if (hasParentField == false
+              && segmentInfo.getIndexSort() != null
+              && isLastDoc == false
               && indexMajorVersionCreated >= Version.LUCENE_10_0_0.major) {
             // sort is configured but parent field is missing, yet we have a doc-block
             // yet we must not fail if this index was created in an earlier version where this
@@ -271,7 +261,7 @@ long updateDocuments(
           // vs non-aborting exceptions):
           reserveOneDoc();
           try {
-            indexingChain.processDocument(numDocsInRAM++, doc);
+            indexingChain.processDocument(numDocsInRAM++, doc, isLastDoc);
           } finally {
             onNewDocOnRAM.run();
           }
@@ -294,34 +284,6 @@ long updateDocuments(
     }
   }
 
-  private Iterable<? extends IndexableField> addParentField(
-      Iterable<? extends IndexableField> doc, IndexableField parentField) {
-    return () -> {
-      final Iterator<? extends IndexableField> first = doc.iterator();
-      return new Iterator<>() {
-        IndexableField additionalField = parentField;
-
-        @Override
-        public boolean hasNext() {
-          return additionalField != null || first.hasNext();
-        }
-
-        @Override
-        public IndexableField next() {
-          if (additionalField != null) {
-            IndexableField field = additionalField;
-            additionalField = null;
-            return field;
-          }
-          if (first.hasNext()) {
-            return first.next();
-          }
-          throw new NoSuchElementException();
-        }
-      };
-    };
-  }
-
   private long finishDocuments(DocumentsWriterDeleteQueue.Node<?> deleteNode, int docIdUpTo) {
     /*
      * here we actually finish the document in two steps 1. push the delete into