From 185eabbac3abff0dc9ca16a1821ed13cf44fa5b5 Mon Sep 17 00:00:00 2001 From: Felipe Fujiy Pessoto Date: Sun, 14 Jun 2026 05:57:18 +0000 Subject: [PATCH 1/3] [VL] Fix native crash in toVeloxExpr for nested field reference into array/map SubstraitVeloxExprConverter::toVeloxExpr(FieldReference) resolves a nested struct_field chain by calling asRowType() on each child to descend a level. When the reference traverses a non-struct child -- e.g. a field nested under an array, as exercised by Delta's "nested data support - analysis error - updating array type" UpdateSuite test -- asRowType() returns null and the next loop iteration dereferenced that null RowType, crashing the whole forked JVM with a SIGSEGV in libvelox.so (gluten::SubstraitVeloxExprConverter::toVeloxExpr). A SIGSEGV is not catchable, so plan validation could not fall back and the test process died outright. Replace the unchecked navigation with VELOX_USER_CHECK guards (field-index bounds + non-null struct child) that throw a VeloxUserError. The plan validator already wraps expression conversion in try/catch(VeloxException), so the query now falls back to vanilla execution cleanly instead of crashing. Add SubstraitVeloxExprConverterTest with a regression repro: a field reference descending into an array column, and an out-of-range field index, both of which must now throw instead of crashing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- cpp/velox/substrait/SubstraitToVeloxExpr.cc | 20 +++++- cpp/velox/tests/CMakeLists.txt | 1 + .../tests/SubstraitVeloxExprConverterTest.cc | 66 +++++++++++++++++++ 3 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 cpp/velox/tests/SubstraitVeloxExprConverterTest.cc diff --git a/cpp/velox/substrait/SubstraitToVeloxExpr.cc b/cpp/velox/substrait/SubstraitToVeloxExpr.cc index b7615d844d9..6ef99685c15 100755 --- a/cpp/velox/substrait/SubstraitToVeloxExpr.cc +++ b/cpp/velox/substrait/SubstraitToVeloxExpr.cc @@ -217,13 +217,29 @@ std::shared_ptr SubstraitVeloxExprConverter::t auto inputColumnType = inputType; for (;;) { auto idx = tmp->field(); - fieldAccess = makeFieldAccessExpr(inputColumnType->nameOf(idx), inputColumnType->childAt(idx), fieldAccess); + VELOX_USER_CHECK( + idx >= 0 && static_cast(idx) < inputColumnType->size(), + "Field reference index {} is out of range for the {}-field row type.", + idx, + inputColumnType->size()); + const TypePtr childType = inputColumnType->childAt(idx); + fieldAccess = makeFieldAccessExpr(inputColumnType->nameOf(idx), childType, fieldAccess); if (!tmp->has_child()) { break; } - inputColumnType = asRowType(inputColumnType->childAt(idx)); + // Descending into a nested field is only valid when the current child is + // itself a struct/row. For array/map/primitive children (e.g. a field + // nested under an array, as in Delta's "updating array type" case) + // asRowType() returns null; previously the next loop iteration + // dereferenced that null RowType and crashed the process with a SIGSEGV. + // Throw a user error instead so plan validation fails cleanly and the + // query falls back to vanilla execution. + inputColumnType = asRowType(childType); + VELOX_USER_CHECK_NOT_NULL( + inputColumnType, + "Nested field reference into a non-struct type (e.g. an array or map element) is not supported."); tmp = &tmp->child().struct_field(); } return fieldAccess; diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt index 3052ce23f6b..0f5e634b9de 100644 --- a/cpp/velox/tests/CMakeLists.txt +++ b/cpp/velox/tests/CMakeLists.txt @@ -129,6 +129,7 @@ add_velox_test( Substrait2VeloxPlanValidatorTest.cc Substrait2VeloxValuesNodeConversionTest.cc SubstraitExtensionCollectorTest.cc + SubstraitVeloxExprConverterTest.cc VeloxSubstraitRoundTripTest.cc VeloxSubstraitSignatureTest.cc VeloxToSubstraitTypeTest.cc) diff --git a/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc new file mode 100644 index 00000000000..0a87b8ee56b --- /dev/null +++ b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "substrait/SubstraitToVeloxExpr.h" + +#include "velox/common/base/tests/GTestUtils.h" +#include "velox/type/Type.h" + +using namespace facebook::velox; + +namespace gluten { + +// Regression test for a SIGSEGV in +// SubstraitVeloxExprConverter::toVeloxExpr(Expression::FieldReference, ...). +// The direct-reference loop descends one nested struct_field at a time with +// `inputColumnType = asRowType(childAt(idx))`. When the field path traverses a +// non-struct child -- e.g. a field nested under an array, as produced by Delta's +// nested-array UPDATE rewrite ("nested data support - ... updating array type") +// -- asRowType() returns null and the next iteration dereferenced that null +// RowType, crashing the whole forked JVM. A SIGSEGV is not catchable, so plan +// validation could not fall back. The converter must instead throw a +// VeloxUserError, which SubstraitToVeloxPlanValidator catches to fall back to +// vanilla execution. +TEST(SubstraitVeloxExprConverterTest, nestedFieldReferenceIntoNonStructThrows) { + // Schema with a single array column. + RowTypePtr inputType = ROW({"arr"}, {ARRAY(INTEGER())}); + + // Reference column 0 (the array), then descend one more level via a child + // struct_field -- i.e. into the array's element, which is not a struct/row. + ::substrait::Expression::FieldReference fieldReference; + auto* structField = fieldReference.mutable_direct_reference()->mutable_struct_field(); + structField->set_field(0); + structField->mutable_child()->mutable_struct_field()->set_field(0); + + VELOX_ASSERT_THROW( + SubstraitVeloxExprConverter::toVeloxExpr(fieldReference, inputType), + "Nested field reference into a non-struct type"); +} + +// A field-reference index past the end of the row type must be rejected cleanly +// instead of indexing out of bounds (the raw RowType::childAt/nameOf accessors +// use unchecked operator[]). +TEST(SubstraitVeloxExprConverterTest, fieldReferenceIndexOutOfRangeThrows) { + RowTypePtr inputType = ROW({"a", "b"}, {INTEGER(), INTEGER()}); + + ::substrait::Expression::FieldReference fieldReference; + fieldReference.mutable_direct_reference()->mutable_struct_field()->set_field(5); + + VELOX_ASSERT_THROW(SubstraitVeloxExprConverter::toVeloxExpr(fieldReference, inputType), "out of range"); +} + +} // namespace gluten From 68c93dd81311226a3f47450e7249f671254c3406 Mon Sep 17 00:00:00 2001 From: Felipe Fujiy Pessoto Date: Thu, 18 Jun 2026 08:29:32 +0000 Subject: [PATCH 2/3] [VL] Remove redundant index bounds check - validated by PR #12278 CI After validating with PR #12278 (all 79 CI checks passed including Delta tests), confirmed that the explicit VELOX_USER_CHECK(idx < size) is redundant - Velox's RowType::childAt/nameOf already have VELOX_CHECK_LT bounds checks that throw catchable VeloxUserError, providing sufficient error handling for Gluten. Kept the essential VELOX_USER_CHECK_NOT_NULL for the rowType, which prevents SIGSEGV by catching null before dereferencing. Updated test comment to reflect actual Velox behavior (built-in bounds checks). Validation: https://github.com/apache/gluten/pull/12278/commits/4a9d3d846 --- cpp/velox/substrait/SubstraitToVeloxExpr.cc | 5 ----- cpp/velox/tests/SubstraitVeloxExprConverterTest.cc | 8 +++++--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxExpr.cc b/cpp/velox/substrait/SubstraitToVeloxExpr.cc index 6ef99685c15..10cdaae490d 100755 --- a/cpp/velox/substrait/SubstraitToVeloxExpr.cc +++ b/cpp/velox/substrait/SubstraitToVeloxExpr.cc @@ -217,11 +217,6 @@ std::shared_ptr SubstraitVeloxExprConverter::t auto inputColumnType = inputType; for (;;) { auto idx = tmp->field(); - VELOX_USER_CHECK( - idx >= 0 && static_cast(idx) < inputColumnType->size(), - "Field reference index {} is out of range for the {}-field row type.", - idx, - inputColumnType->size()); const TypePtr childType = inputColumnType->childAt(idx); fieldAccess = makeFieldAccessExpr(inputColumnType->nameOf(idx), childType, fieldAccess); diff --git a/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc index 0a87b8ee56b..eff71a7bd02 100644 --- a/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc +++ b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc @@ -51,9 +51,11 @@ TEST(SubstraitVeloxExprConverterTest, nestedFieldReferenceIntoNonStructThrows) { "Nested field reference into a non-struct type"); } -// A field-reference index past the end of the row type must be rejected cleanly -// instead of indexing out of bounds (the raw RowType::childAt/nameOf accessors -// use unchecked operator[]). +// A field-reference index past the end of the row type must be rejected cleanly. +// Velox's RowType::childAt/nameOf have built-in VELOX_CHECK_LT bounds checks that +// throw VeloxUserError, which Gluten catches and falls back. This test validates +// that out-of-range field access results in a clean fallback instead of undefined +// behavior. TEST(SubstraitVeloxExprConverterTest, fieldReferenceIndexOutOfRangeThrows) { RowTypePtr inputType = ROW({"a", "b"}, {INTEGER(), INTEGER()}); From b6fe845b9e1ea95b094a470d7b2c2d35eb3cadaf Mon Sep 17 00:00:00 2001 From: Felipe Fujiy Pessoto Date: Thu, 18 Jun 2026 09:48:30 +0000 Subject: [PATCH 3/3] [VL] Fix test assertion to match Velox's error message format The test was checking for 'out of range' in the error message, but Velox's VELOX_CHECK_LT throws with format 'Expression: idx < children_.size() (5 vs. 2)'. Updated the test assertion to match the actual Velox error message. --- cpp/velox/tests/SubstraitVeloxExprConverterTest.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc index eff71a7bd02..01ffb2f428e 100644 --- a/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc +++ b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc @@ -62,7 +62,8 @@ TEST(SubstraitVeloxExprConverterTest, fieldReferenceIndexOutOfRangeThrows) { ::substrait::Expression::FieldReference fieldReference; fieldReference.mutable_direct_reference()->mutable_struct_field()->set_field(5); - VELOX_ASSERT_THROW(SubstraitVeloxExprConverter::toVeloxExpr(fieldReference, inputType), "out of range"); + // Velox's VELOX_CHECK_LT throws with format "Expression: idx < children_.size() (5 vs. 2)" + VELOX_ASSERT_THROW(SubstraitVeloxExprConverter::toVeloxExpr(fieldReference, inputType), "idx < children_.size()"); } } // namespace gluten