From 60f7702de4a05e2f94279fbad858cf3051fadbf9 Mon Sep 17 00:00:00 2001 From: Felipe Fujiy Pessoto Date: Sun, 14 Jun 2026 06:37:32 +0000 Subject: [PATCH 1/3] [VL][DO NOT MERGE] Test-only repro for nested FieldReference crash (verifies #12290) Contains ONLY the regression test from #12290, WITHOUT the SubstraitToVeloxExpr.cc fix, to confirm the test genuinely reproduces the crash. The cpp unit test job (ctest) is expected to FAIL here -- toVeloxExpr still dereferences a null RowType and the test process crashes with a SIGSEGV -- and passes in #12290 which includes the fix. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cpp/velox/tests/CMakeLists.txt | 1 + .../tests/SubstraitVeloxExprConverterTest.cc | 66 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 cpp/velox/tests/SubstraitVeloxExprConverterTest.cc diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt index 3052ce23f6..0f5e634b9d 100644 --- a/cpp/velox/tests/CMakeLists.txt +++ b/cpp/velox/tests/CMakeLists.txt @@ -129,6 +129,7 @@ add_velox_test( Substrait2VeloxPlanValidatorTest.cc Substrait2VeloxValuesNodeConversionTest.cc SubstraitExtensionCollectorTest.cc + SubstraitVeloxExprConverterTest.cc VeloxSubstraitRoundTripTest.cc VeloxSubstraitSignatureTest.cc VeloxToSubstraitTypeTest.cc) diff --git a/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc new file mode 100644 index 0000000000..0a87b8ee56 --- /dev/null +++ b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "substrait/SubstraitToVeloxExpr.h" + +#include "velox/common/base/tests/GTestUtils.h" +#include "velox/type/Type.h" + +using namespace facebook::velox; + +namespace gluten { + +// Regression test for a SIGSEGV in +// SubstraitVeloxExprConverter::toVeloxExpr(Expression::FieldReference, ...). +// The direct-reference loop descends one nested struct_field at a time with +// `inputColumnType = asRowType(childAt(idx))`. When the field path traverses a +// non-struct child -- e.g. a field nested under an array, as produced by Delta's +// nested-array UPDATE rewrite ("nested data support - ... updating array type") +// -- asRowType() returns null and the next iteration dereferenced that null +// RowType, crashing the whole forked JVM. A SIGSEGV is not catchable, so plan +// validation could not fall back. The converter must instead throw a +// VeloxUserError, which SubstraitToVeloxPlanValidator catches to fall back to +// vanilla execution. +TEST(SubstraitVeloxExprConverterTest, nestedFieldReferenceIntoNonStructThrows) { + // Schema with a single array column. + RowTypePtr inputType = ROW({"arr"}, {ARRAY(INTEGER())}); + + // Reference column 0 (the array), then descend one more level via a child + // struct_field -- i.e. into the array's element, which is not a struct/row. + ::substrait::Expression::FieldReference fieldReference; + auto* structField = fieldReference.mutable_direct_reference()->mutable_struct_field(); + structField->set_field(0); + structField->mutable_child()->mutable_struct_field()->set_field(0); + + VELOX_ASSERT_THROW( + SubstraitVeloxExprConverter::toVeloxExpr(fieldReference, inputType), + "Nested field reference into a non-struct type"); +} + +// A field-reference index past the end of the row type must be rejected cleanly +// instead of indexing out of bounds (the raw RowType::childAt/nameOf accessors +// use unchecked operator[]). +TEST(SubstraitVeloxExprConverterTest, fieldReferenceIndexOutOfRangeThrows) { + RowTypePtr inputType = ROW({"a", "b"}, {INTEGER(), INTEGER()}); + + ::substrait::Expression::FieldReference fieldReference; + fieldReference.mutable_direct_reference()->mutable_struct_field()->set_field(5); + + VELOX_ASSERT_THROW(SubstraitVeloxExprConverter::toVeloxExpr(fieldReference, inputType), "out of range"); +} + +} // namespace gluten From 544bc8dcf278436cc45c230de925a256ab31b3fb Mon Sep 17 00:00:00 2001 From: Felipe Fujiy Pessoto Date: Tue, 16 Jun 2026 22:16:14 +0000 Subject: [PATCH 2/3] [VL][TEST] Add Spark-level repro for the nested-array FieldReference crash Adds VeloxDeltaNestedFieldArraySuite: a Delta MERGE that updates a field nested under an array (`value.a` where `value` is `array>`). On an unfixed build this drives SubstraitVeloxExprConverter::toVeloxExpr to descend into the array, dereference the null RowType returned by asRowType(), and crash the forked JVM with a SIGSEGV -- the Spark-level companion to the existing SubstraitVeloxExprConverterTest.cc. This reaches the native converter (and the crash) under the Spark 4.0/4.1 -Pdelta CI jobs (Delta 4.x). With the fix in #12290 the converter throws a catchable VeloxUserError, Gluten falls back, and Delta raises the expected AnalysisException. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../VeloxDeltaNestedFieldArraySuite.scala | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 backends-velox/src-delta/test/scala/org/apache/gluten/execution/VeloxDeltaNestedFieldArraySuite.scala diff --git a/backends-velox/src-delta/test/scala/org/apache/gluten/execution/VeloxDeltaNestedFieldArraySuite.scala b/backends-velox/src-delta/test/scala/org/apache/gluten/execution/VeloxDeltaNestedFieldArraySuite.scala new file mode 100644 index 0000000000..27403755b3 --- /dev/null +++ b/backends-velox/src-delta/test/scala/org/apache/gluten/execution/VeloxDeltaNestedFieldArraySuite.scala @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.execution + +import org.apache.spark.SparkConf +import org.apache.spark.sql.AnalysisException + +/** + * Reproduction for the native crash fixed in cpp/velox/substrait/SubstraitToVeloxExpr.cc + * (SubstraitVeloxExprConverter::toVeloxExpr). + * + * Delta rewrites a MERGE/UPDATE that assigns to a field nested under an array (e.g. `value.a` where + * `value` is `array>`) into a field reference whose path descends through the array + * element. While resolving that reference Gluten's converter did + * `inputColumnType = asRowType(childAt(idx))`; for the array child `asRowType()` returns null, and + * the next loop iteration dereferenced the null RowType, crashing the whole forked JVM with a + * SIGSEGV in `toVeloxExpr(FieldReference, ...)`. Because a SIGSEGV is not a catchable C++ + * exception, plan validation could not fall back. The crash was observed in the Delta Spark UT + * pipeline running Delta's `MergeIntoNestedDataSQLPathBasedSuite` test "nested data support - + * analysis error - updating array type". + * + * Updating a field under an array is unsupported, so Delta is expected to raise an + * AnalysisException; the query must never crash the JVM. With the fix the converter throws a + * VeloxUserError that SubstraitToVeloxPlanValidator catches, Gluten falls back, and the query fails + * cleanly with that AnalysisException. + * + * NOTE: this only reaches the native converter (and so the SIGSEGV on an unfixed build) when Gluten + * actually offloads the rewritten MERGE expression, which happens with the Delta 4.x plan used by + * the Spark 4.0/4.1 `-Pdelta` CI jobs. On Delta 3.3.x (the Spark 3.x jobs / a typical local run) + * the statement is rejected during analysis before the converter runs, so this test passes there + * without exercising the crash. It is the Spark-level companion to + * cpp/velox/tests/SubstraitVeloxExprConverterTest.cc, which reproduces the crash directly. + */ +class VeloxDeltaNestedFieldArraySuite extends WholeStageTransformerSuite { + protected val rootPath: String = getClass.getResource("/").getPath + override protected val resourcePath: String = "/tpch-data-parquet" + override protected val fileFormat: String = "parquet" + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager") + .set("spark.sql.files.maxPartitionBytes", "1g") + .set("spark.sql.shuffle.partitions", "1") + .set("spark.memory.offHeap.size", "2g") + .set("spark.sql.autoBroadcastJoinThreshold", "-1") + // Gluten blanket-falls-back when ANSI mode is on (Spark 4.x default); disable it so the + // MERGE update expression is actually offloaded and reaches the native converter. + .set("spark.sql.ansi.enabled", "false") + .set("spark.sql.sources.useV1SourceList", "avro") + .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + } + + testWithMinSparkVersion( + "delta: merge update of a field nested under an array does not crash natively", + "3.2") { + withTable("merge_array_src", "merge_array_tgt") { + spark.sql( + "CREATE TABLE merge_array_tgt (key STRING, value ARRAY>) USING delta") + spark.sql("INSERT INTO merge_array_tgt VALUES ('A', array(named_struct('a', 1)))") + spark.sql( + "CREATE TABLE merge_array_src (key STRING, value ARRAY>) USING delta") + spark.sql("INSERT INTO merge_array_src VALUES ('A', array(named_struct('a', 0)))") + + // Updating `value.a` (a field inside the array) is unsupported. The converter must surface a + // catchable error so Gluten falls back and Delta can report the analysis error, rather than + // dereferencing a null RowType and crashing the JVM with a SIGSEGV. + val e = intercept[AnalysisException] { + spark.sql(""" + |MERGE INTO merge_array_tgt t + |USING merge_array_src s + |ON s.key = t.key + |WHEN MATCHED THEN UPDATE SET value.a = 2 + |""".stripMargin) + } + assert(e.getMessage.contains("Updating nested fields is only supported for StructType")) + } + } +} From a9fb8edf85b320d5dc1c8e7c23be6942277fc612 Mon Sep 17 00:00:00 2001 From: Felipe Fujiy Pessoto Date: Wed, 17 Jun 2026 01:57:08 +0000 Subject: [PATCH 3/3] [VL][TEST] Add UPDATE variant of the nested-array repro Adds an UPDATE-based test (`UPDATE t SET value.a = 2` where value is array) next to the existing MERGE test, to empirically check whether UPDATE reaches the native converter. Delta's PreprocessTableUpdate is an analyzer rule that raises the AnalysisException during analysis, so this is expected to fall back/throw without crashing -- but the velox CI (Delta 4.x) will confirm whether that holds, in contrast to MERGE. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../VeloxDeltaNestedFieldArraySuite.scala | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/backends-velox/src-delta/test/scala/org/apache/gluten/execution/VeloxDeltaNestedFieldArraySuite.scala b/backends-velox/src-delta/test/scala/org/apache/gluten/execution/VeloxDeltaNestedFieldArraySuite.scala index 27403755b3..bbda3539e9 100644 --- a/backends-velox/src-delta/test/scala/org/apache/gluten/execution/VeloxDeltaNestedFieldArraySuite.scala +++ b/backends-velox/src-delta/test/scala/org/apache/gluten/execution/VeloxDeltaNestedFieldArraySuite.scala @@ -90,4 +90,23 @@ class VeloxDeltaNestedFieldArraySuite extends WholeStageTransformerSuite { assert(e.getMessage.contains("Updating nested fields is only supported for StructType")) } } + + testWithMinSparkVersion( + "delta: update of a field nested under an array does not crash natively", + "3.2") { + withTable("update_array_tgt") { + spark.sql( + "CREATE TABLE update_array_tgt (key STRING, value ARRAY>) USING delta") + spark.sql("INSERT INTO update_array_tgt VALUES ('A', array(named_struct('a', 1)))") + + // Same nested-array reference as the MERGE case, but via UPDATE. Delta is expected to reject + // it; the query must never crash the JVM. NOTE: Delta's PreprocessTableUpdate is an analyzer + // rule, so it raises this AnalysisException during analysis -- before Gluten's columnar + // planning -- which is why (unlike MERGE) this is not expected to reach the native converter. + val e = intercept[AnalysisException] { + spark.sql("UPDATE update_array_tgt SET value.a = 2 WHERE key = 'A'") + } + assert(e.getMessage.contains("Updating nested fields is only supported for StructType")) + } + } }