Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 0 additions & 50 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import java.util.Optional;
import java.util.function.Function;
import java.util.regex.Pattern;
import org.antlr.runtime.ClassicToken;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.tree.Tree;
import org.antlr.runtime.tree.TreeVisitor;
Expand Down Expand Up @@ -132,7 +131,6 @@
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.ImmutableNullableList;
import org.apache.calcite.util.Pair;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.CteSuggesterType;
Expand All @@ -144,7 +142,6 @@
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableAnalyzer;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
Expand Down Expand Up @@ -319,7 +316,6 @@
import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.plan.mapper.EmptyStatsSource;
import org.apache.hadoop.hive.ql.plan.mapper.StatsSource;
Expand All @@ -338,7 +334,6 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.joda.time.Interval;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.math.BigDecimal;
Expand Down Expand Up @@ -1051,51 +1046,6 @@ boolean continueJoinMerge() {
return !(runCBO && disableSemJoinReordering);
}

@Override
Table materializeCTE(String cteName, CTEClause cte) throws HiveException {

ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE));

ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME));
tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName)));

ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER));

createTable.addChild(tableName);
createTable.addChild(temporary);
createTable.addChild(cte.cteNode);

CreateTableAnalyzer analyzer = new CreateTableAnalyzer(queryState);
analyzer.initCtx(ctx);
analyzer.init(false);

// should share cte contexts
analyzer.aliasToCTEs.putAll(aliasToCTEs);

HiveOperation operation = queryState.getHiveOperation();
try {
analyzer.analyzeInternal(createTable);
} finally {
queryState.setCommandType(operation);
}

Table table = analyzer.tableDesc.toTable(conf);
Path location = table.getDataLocation();
try {
location.getFileSystem(conf).mkdirs(location);
} catch (IOException e) {
throw new HiveException(e);
}
table.setMaterializedTable(true);

LOG.info(cteName + " will be materialized into " + location);
cte.source = analyzer;

ctx.addMaterializedTable(cteName, table, getMaterializedTableStats(analyzer.getSinkOp()));

return table;
}

@Override
String fixCtasColumnName(String colName) {
if (runCBO) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
import org.apache.hadoop.hive.ql.ddl.DDLDescWithTableProperties;
import org.apache.hadoop.hive.ql.ddl.DDLWork;
import org.apache.hadoop.hive.ql.ddl.misc.hooks.InsertCommitHookDesc;
import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory;
import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc;
import org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc;
import org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableUnsetPropertiesDesc;
Expand Down Expand Up @@ -1568,7 +1569,7 @@ Table materializeCTE(String cteName, CTEClause cte) throws HiveException {
createTable.addChild(temporary);
createTable.addChild(cte.cteNode);

SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState);
SemanticAnalyzer analyzer = (SemanticAnalyzer) DDLSemanticAnalyzerFactory.getAnalyzer(createTable, queryState);
analyzer.initCtx(ctx);
analyzer.init(false);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.CALLS_REAL_METHODS;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.when;

Expand All @@ -46,6 +49,8 @@
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory;
import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableAnalyzer;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.QueryProperties.QueryType;
import org.apache.hadoop.hive.ql.QueryState;
Expand All @@ -65,6 +70,7 @@
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.mockito.MockedStatic;
import org.mockito.stubbing.Answer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -493,4 +499,51 @@ private void checkTablesUsed(String query, Set<String> tables) throws Exception

Assert.assertEquals(new TreeSet<>(tables), new TreeSet<>(result));
}

@Test
public void testMaterializeCTEWithCBODisabled() throws Exception {
testMaterializeCTEUsesDDLFactory(false);
}

@Test
public void testMaterializeCTEWithCBOEnabled() throws Exception {
testMaterializeCTEUsesDDLFactory(true);
}

private void testMaterializeCTEUsesDDLFactory(boolean cboEnabled) throws Exception {
HiveConf testConf = new HiveConf(conf);
testConf.setBoolVar(HiveConf.ConfVars.HIVE_CBO_ENABLED, cboEnabled);

SessionState.start(testConf);
Context ctx = new Context(testConf);

// Reference CTE 3 times to exceed default materialization threshold of 2
String query = "WITH cte AS (SELECT COUNT(*) as cnt FROM table1) " +
"SELECT * FROM cte UNION ALL SELECT * FROM cte UNION ALL SELECT * FROM cte";
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can also lower the threshold (hive.optimize.cte.materialize.threshold) to 1 in the scope of this test case and use a simpler query


ASTNode astNode = ParseUtils.parse(query, ctx);
QueryState queryState = new QueryState.Builder().withHiveConf(testConf).build();
BaseSemanticAnalyzer analyzer = SemanticAnalyzerFactory.get(queryState, astNode);
analyzer.initCtx(ctx);

try (MockedStatic<DDLSemanticAnalyzerFactory> mocked =
mockStatic(DDLSemanticAnalyzerFactory.class, CALLS_REAL_METHODS)) {
BaseSemanticAnalyzer[] cteAnalyzer = new BaseSemanticAnalyzer[1];

mocked.when(() -> DDLSemanticAnalyzerFactory.getAnalyzer(any(ASTNode.class), any(QueryState.class)))
.thenAnswer(invocation -> {
BaseSemanticAnalyzer result = (BaseSemanticAnalyzer) invocation.callRealMethod();
if (invocation.getArgument(0, ASTNode.class).getType() == HiveParser.TOK_CREATETABLE) {
cteAnalyzer[0] = result;
}
return result;
});

analyzer.analyze(astNode, ctx);

assertNotNull("DDLSemanticAnalyzerFactory should be called for CTE materialization", cteAnalyzer[0]);
assertTrue("CTE materialization should use CreateTableAnalyzer",
cteAnalyzer[0] instanceof CreateTableAnalyzer);
}
}
}
46 changes: 46 additions & 0 deletions ql/src/test/queries/clientpositive/cte_materialize.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
-- Test CTE materialization with both CBO enabled and disabled
-- Verifies DDLSemanticAnalyzerFactory is used for CTE materialization
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can this be verified from q test?

Copy link
Copy Markdown
Contributor Author

@konstantinb konstantinb Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that this is a bit too bold a statement - the test does confirm that the NPE is fixed, but does not really validate classes used

-- Also ensures that an NPE is no longer triggered with CBO off (HIVE-28724 regression)

-- Test with CBO enabled (default)
explain
WITH cte AS (
SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t
)
SELECT * FROM cte
UNION ALL
SELECT * FROM cte
UNION ALL
SELECT * FROM cte;

-- Test with CBO disabled
set hive.cbo.enable=false;

explain
WITH cte AS (
SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t
)
SELECT * FROM cte
UNION ALL
SELECT * FROM cte
UNION ALL
SELECT * FROM cte;

-- Test the recompile-without-CBO auto-trigger path.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've left very verbose comments in here to better explain the scenario. I will gladly remove or shorten this comment section once reviewed

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the scope of this patch, this path does not need to be covered because the previous test case already covers the non-CBO path.

TBH, I’m not sure it is necessary to add this .q file, since we already have many tests for CTE materialization (cte_mat_*.q). Those tests cover the CBO path, which should always be the default. In general, the non-CBO path is no longer supported.

https://issues.apache.org/jira/browse/HIVE-27830
https://issues.apache.org/jira/browse/HIVE-28741

Copy link
Copy Markdown
Contributor Author

@konstantinb konstantinb Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBH, I’m not sure it is necessary to add this .q file

@kasakrisz, while I can agree that testing the non-CBO configured path becomes redundant once full CBO deprecation gets addressed, I believe that having at least one new query that guards against similar regressions could be beneficial. The last query of the test file generates the following when executed against master: cte_materialize.q.test.log.txt

Based on the scope of this patch, this path does not need to be covered because the previous test case already covers the non-CBO path.

The very first test in the file is not broken in the current master. I thought it would be useful to leave it in to confirm that my changes introduce no additional regressions for the CBO path, but you helped me to see that it is indeed redundant now

-- With hive.cbo.fallback.strategy=ALWAYS, a CBO crash on `= ALL (subquery)`
-- causes ReCompileWithoutCBOPlugin to set hive.cbo.enable=false in conf and
-- recompile. The recompile constructs a plain SemanticAnalyzer via the factory,
-- and must materialize the CTE through the fixed SemanticAnalyzer.materializeCTE
-- path. Without the fix this path NPEs at SemanticAnalyzer.materializeCTE.
set hive.cbo.enable=true;
set hive.cbo.fallback.strategy=ALWAYS;

explain
WITH cte AS (
SELECT MAX(s) AS m FROM (SELECT 'a' AS s) t
)
SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte)
UNION ALL
SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte)
UNION ALL
SELECT s FROM (SELECT 'a' AS s) u WHERE s = ALL(SELECT m FROM cte);
Loading
Loading