Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions cpp/velox/substrait/SubstraitToVeloxExpr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -217,13 +217,24 @@ std::shared_ptr<const core::FieldAccessTypedExpr> SubstraitVeloxExprConverter::t
auto inputColumnType = inputType;
for (;;) {
auto idx = tmp->field();
fieldAccess = makeFieldAccessExpr(inputColumnType->nameOf(idx), inputColumnType->childAt(idx), fieldAccess);
const TypePtr childType = inputColumnType->childAt(idx);
fieldAccess = makeFieldAccessExpr(inputColumnType->nameOf(idx), childType, fieldAccess);

if (!tmp->has_child()) {
break;
}

inputColumnType = asRowType(inputColumnType->childAt(idx));
// Descending into a nested field is only valid when the current child is
// itself a struct/row. For array/map/primitive children (e.g. a field
// nested under an array, as in Delta's "updating array type" case)
// asRowType() returns null; previously the next loop iteration
// dereferenced that null RowType and crashed the process with a SIGSEGV.
// Throw a user error instead so plan validation fails cleanly and the
// query falls back to vanilla execution.
inputColumnType = asRowType(childType);
VELOX_USER_CHECK_NOT_NULL(
inputColumnType,
"Nested field reference into a non-struct type (e.g. an array or map element) is not supported.");
tmp = &tmp->child().struct_field();
}
return fieldAccess;
Expand Down
1 change: 1 addition & 0 deletions cpp/velox/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ add_velox_test(
Substrait2VeloxPlanValidatorTest.cc
Substrait2VeloxValuesNodeConversionTest.cc
SubstraitExtensionCollectorTest.cc
SubstraitVeloxExprConverterTest.cc
VeloxSubstraitRoundTripTest.cc
VeloxSubstraitSignatureTest.cc
VeloxToSubstraitTypeTest.cc)
Expand Down
69 changes: 69 additions & 0 deletions cpp/velox/tests/SubstraitVeloxExprConverterTest.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "substrait/SubstraitToVeloxExpr.h"

#include "velox/common/base/tests/GTestUtils.h"
#include "velox/type/Type.h"

using namespace facebook::velox;

namespace gluten {

// Regression test for a SIGSEGV in
// SubstraitVeloxExprConverter::toVeloxExpr(Expression::FieldReference, ...).
// The direct-reference loop descends one nested struct_field at a time with
// `inputColumnType = asRowType(childAt(idx))`. When the field path traverses a
// non-struct child -- e.g. a field nested under an array, as produced by Delta's
// nested-array UPDATE rewrite ("nested data support - ... updating array type")
// -- asRowType() returns null and the next iteration dereferenced that null
// RowType, crashing the whole forked JVM. A SIGSEGV is not catchable, so plan
// validation could not fall back. The converter must instead throw a
// VeloxUserError, which SubstraitToVeloxPlanValidator catches to fall back to
// vanilla execution.
TEST(SubstraitVeloxExprConverterTest, nestedFieldReferenceIntoNonStructThrows) {
// Schema with a single array column.
RowTypePtr inputType = ROW({"arr"}, {ARRAY(INTEGER())});

// Reference column 0 (the array), then descend one more level via a child
// struct_field -- i.e. into the array's element, which is not a struct/row.
::substrait::Expression::FieldReference fieldReference;
auto* structField = fieldReference.mutable_direct_reference()->mutable_struct_field();
structField->set_field(0);
structField->mutable_child()->mutable_struct_field()->set_field(0);

VELOX_ASSERT_THROW(
SubstraitVeloxExprConverter::toVeloxExpr(fieldReference, inputType),
"Nested field reference into a non-struct type");
}

// A field-reference index past the end of the row type must be rejected cleanly.
// Velox's RowType::childAt/nameOf have built-in VELOX_CHECK_LT bounds checks that
// throw VeloxUserError, which Gluten catches and falls back. This test validates
// that out-of-range field access results in a clean fallback instead of undefined
// behavior.
TEST(SubstraitVeloxExprConverterTest, fieldReferenceIndexOutOfRangeThrows) {
RowTypePtr inputType = ROW({"a", "b"}, {INTEGER(), INTEGER()});

::substrait::Expression::FieldReference fieldReference;
fieldReference.mutable_direct_reference()->mutable_struct_field()->set_field(5);

// Velox's VELOX_CHECK_LT throws with format "Expression: idx < children_.size() (5 vs. 2)"
VELOX_ASSERT_THROW(SubstraitVeloxExprConverter::toVeloxExpr(fieldReference, inputType), "idx < children_.size()");
Comment on lines +65 to +66
}

} // namespace gluten
Loading