Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions onnxruntime/core/providers/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,11 @@ inline Status ComputePadAndOutputShape(const int64_t in_dim,
return Status::OK();
}

constexpr inline int64_t ComputeTotalPad(int64_t in_size, int64_t stride, int64_t adj,
int64_t kernel, int64_t dilation, int64_t out_size) {
return std::max<int64_t>(0, (in_size - 1) * stride + adj + (kernel - 1) * dilation + 1 - out_size);
inline int64_t ComputeTotalPad(int64_t in_size, int64_t stride, int64_t adj,
int64_t kernel, int64_t dilation, int64_t out_size) {
SafeInt<int64_t> safe_pad = (SafeInt<int64_t>(in_size) - 1) * stride + adj +
(SafeInt<int64_t>(kernel) - 1) * dilation + 1 - out_size;
return std::max<int64_t>(0, safe_pad);
}

inline void DistributePadding(AutoPadType pad_type, const int64_t& total_pad,
Expand Down
36 changes: 34 additions & 2 deletions onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,14 +305,46 @@ struct ConvTransposeAttributes : public ConvAttributes {
int64_t* out_size) const {
// Output shape is explicitly provided - pad values will have to be computed
if (*out_size != -1) {
if (*out_size < 0) {
if (*out_size <= 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Explicit output size is negative: ", *out_size);
"Explicit output size must be positive. Got: ", *out_size);
}
if (in_size <= 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Input spatial dimension must be positive. Got: ", in_size);
}
if (stride <= 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Stride must be positive. Got: ", stride);
}
if (kernel <= 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Kernel size must be positive. Got: ", kernel);
}
if (dilation <= 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Dilation must be positive. Got: ", dilation);
}
if (adj < 0) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Output padding must be non-negative. Got: ", adj);
}
// total pad
auto total_pad = ComputeTotalPad(in_size, stride, adj,
kernel, dilation, *out_size);
DistributePadding(pad_type, total_pad, *pad_head, *pad_tail);

// Verify that the forward-conv re-derivation of input size from the output size and pads
// is consistent with the actual input size. Col2im re-derives the input spatial extent as:
// derived_in = (out_size + pad_head + pad_tail - dkernel) / stride + 1
// If this exceeds in_size, Col2im would read past the col_buffer allocation.
SafeInt<int64_t> dkernel = (SafeInt<int64_t>(kernel) - 1) * dilation + 1;
int64_t derived_in = (SafeInt<int64_t>(*out_size) + *pad_head + *pad_tail - dkernel) / stride + 1;
if (derived_in != in_size) {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
"Explicit output_shape is inconsistent with input spatial dimensions. "
"Expected input size ",
derived_in, " but got ", in_size,
" (output_size=", *out_size, ", kernel=", kernel,
", stride=", stride, ", dilation=", dilation, ", adj=", adj, ").");
}
Comment thread
yuslepukhin marked this conversation as resolved.
return Status::OK();
}

Expand Down
282 changes: 282 additions & 0 deletions onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1729,5 +1729,287 @@ TEST(ConvTransposeTest, ConvTranspose_OutputPaddingExceedsStride) {
{kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider, kWebGpuExecutionProvider});
}

// Test that an inconsistent explicit output_shape is rejected (output_shape too large
// relative to input spatial dimensions causes a pad/buffer size mismatch).
TEST(ConvTransposeTest, ConvTranspose_InconsistentOutputShape) {
OpTester test("ConvTranspose", 11);
test.AddShapeToTensorData(false);
// Input: 1x1x3x3, kernel 3x3, stride 1, no dilation.
// Natural output without padding = (3-1)*1 + 3 = 5.
// Setting output_shape to 100x100 is inconsistent.
test.AddAttribute("output_shape", std::vector<int64_t>{100, 100});
test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddOutput<float>("Y", {0}, {});

// Other EPs either don't support explicit output_shape or perform their own validation.
test.Run(OpTester::ExpectResult::kExpectFailure, "inconsistent with input spatial dimensions",
{kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
}

// Test that output_shape=0 is rejected.
TEST(ConvTransposeTest, ConvTranspose_ZeroOutputShape) {
OpTester test("ConvTranspose", 11);
test.AddShapeToTensorData(false);
test.AddAttribute("output_shape", std::vector<int64_t>{0, 0});
test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddOutput<float>("Y", {0}, {});

test.Run(OpTester::ExpectResult::kExpectFailure, "output size must be positive",
{kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider, kWebGpuExecutionProvider});
}

// Test that an inconsistent 1D explicit output_shape is rejected.
TEST(ConvTransposeTest, ConvTranspose_1D_InconsistentOutputShape) {
OpTester test("ConvTranspose", 11);
test.AddShapeToTensorData(false);
// Input: 1x1x5, kernel_shape=3, stride=2, dilation=1.
// Natural (no-pad) output = (5-1)*2 + 3 = 11. output_shape=50 is way too large.
test.AddAttribute("kernel_shape", std::vector<int64_t>{3});
test.AddAttribute("strides", std::vector<int64_t>{2});
test.AddAttribute("output_shape", std::vector<int64_t>{50});
test.AddInput<float>("X", {1, 1, 5}, std::vector<float>(5, 1.0f));
test.AddInput<float>("W", {1, 1, 3}, std::vector<float>(3, 1.0f));
test.AddOutput<float>("Y", {0}, {});

test.Run(OpTester::ExpectResult::kExpectFailure, "inconsistent with input spatial dimensions",
{kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
}

// Test that an inconsistent 3D explicit output_shape is rejected.
TEST(ConvTransposeTest, ConvTranspose_3D_InconsistentOutputShape) {
OpTester test("ConvTranspose", 11);
test.AddShapeToTensorData(false);
// Input: 1x1x2x2x2, kernel 2x2x2, stride 1, dilation 1.
// Natural output = (2-1)*1 + 2 = 3 per dim. output_shape=10x10x10 is too large.
test.AddAttribute("kernel_shape", std::vector<int64_t>{2, 2, 2});
test.AddAttribute("output_shape", std::vector<int64_t>{10, 10, 10});
test.AddInput<float>("X", {1, 1, 2, 2, 2}, std::vector<float>(8, 1.0f));
test.AddInput<float>("W", {1, 1, 2, 2, 2}, std::vector<float>(8, 1.0f));
test.AddOutput<float>("Y", {0}, {});

// CUDA/WebGPU don't support 3D ConvTranspose in most builds.
test.Run(OpTester::ExpectResult::kExpectFailure, "inconsistent with input spatial dimensions",
{kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
}

// Test that a valid 3D explicit output_shape with non-trivial padding works correctly.
TEST(ConvTransposeTest, ConvTranspose_3D_ValidOutputShape) {
ConvTransposeOpAttributes attrs = {
std::vector<int64_t>{2, 2, 2}, // kernel_shape
{}, // output_padding
std::vector<int64_t>{3, 3, 3}, // output_shape (natural no-pad output for 2x2x2 input, k=2, s=1)
std::vector<int64_t>{0, 0, 0, 0, 0, 0}, // pads
std::vector<int64_t>{1, 1, 1}, // strides
std::vector<int64_t>{1, 1, 1}, // dilations
1, // group
"NOTSET" // auto_pad
};
// Input 1x1x2x2x2 with all ones, kernel 1x1x2x2x2 with all ones.
// Output should be 1x1x3x3x3. Each output voxel sums overlapping kernel positions.
std::vector<float> X(8, 1.0f);
std::vector<float> W(8, 1.0f);
std::vector<int64_t> X_shape = {1, 1, 2, 2, 2};
std::vector<int64_t> W_shape = {1, 1, 2, 2, 2};
std::vector<int64_t> Y_shape = {1, 1, 3, 3, 3};
// Corner=1, edge=2, face=4, center=8 (same as conv input for unit kernel).
std::vector<float> expected_vals = {
1.0f, 2.0f, 1.0f, 2.0f, 4.0f, 2.0f, 1.0f, 2.0f, 1.0f,
2.0f, 4.0f, 2.0f, 4.0f, 8.0f, 4.0f, 2.0f, 4.0f, 2.0f,
1.0f, 2.0f, 1.0f, 2.0f, 4.0f, 2.0f, 1.0f, 2.0f, 1.0f};
TestConvTransposeOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape,
OpTester::ExpectResult::kExpectSuccess, "",
{kTensorrtExecutionProvider, kCudaExecutionProvider,
kCudaNHWCExecutionProvider, kQnnExecutionProvider, kWebGpuExecutionProvider});
}

// Test group > 1 with explicit output_shape.
TEST(ConvTransposeTest, ConvTranspose_2D_Group2_OutputShape) {
ConvTransposeOpAttributes attrs = {
std::vector<int64_t>{3, 3}, // kernel_shape
{}, // output_padding
std::vector<int64_t>{5, 5}, // output_shape: natural unpadded output for in=3, k=3, s=1
std::vector<int64_t>{0, 0, 0, 0}, // pads
std::vector<int64_t>{1, 1}, // strides
std::vector<int64_t>{1, 1}, // dilations
2, // group
"NOTSET" // auto_pad
};
// X: 1x2x3x3 (2 input channels, group=2, so 1 channel per group)
// W: 2x1x3x3 (C=2, M/group=1, so output channels = 1*2 = 2)
std::vector<float> X(18, 1.0f);
std::vector<float> W(18, 1.0f);
std::vector<int64_t> X_shape = {1, 2, 3, 3};
std::vector<int64_t> W_shape = {2, 1, 3, 3};
std::vector<int64_t> Y_shape = {1, 2, 5, 5};
// Each group produces a 5x5 output. With all-ones input (3x3) and all-ones kernel (3x3),
// it's the correlation of two 3x3 boxes producing the expected pattern.
std::vector<float> expected_vals = {
1.0f, 2.0f, 3.0f, 2.0f, 1.0f,
2.0f, 4.0f, 6.0f, 4.0f, 2.0f,
3.0f, 6.0f, 9.0f, 6.0f, 3.0f,
2.0f, 4.0f, 6.0f, 4.0f, 2.0f,
1.0f, 2.0f, 3.0f, 2.0f, 1.0f,
// Second group — identical
1.0f, 2.0f, 3.0f, 2.0f, 1.0f,
2.0f, 4.0f, 6.0f, 4.0f, 2.0f,
3.0f, 6.0f, 9.0f, 6.0f, 3.0f,
2.0f, 4.0f, 6.0f, 4.0f, 2.0f,
1.0f, 2.0f, 3.0f, 2.0f, 1.0f};
TestConvTransposeOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape,
OpTester::ExpectResult::kExpectSuccess, "",
{kTensorrtExecutionProvider, kQnnExecutionProvider,
kOpenVINOExecutionProvider, kCudaNHWCExecutionProvider});
}

// Test with larger batch size and explicit output_shape.
TEST(ConvTransposeTest, ConvTranspose_2D_LargeBatch_OutputShape) {
ConvTransposeOpAttributes attrs = {
std::vector<int64_t>{2, 2}, // kernel_shape
{}, // output_padding
std::vector<int64_t>{3, 3}, // output_shape: (2-1)*1+2 = 3
std::vector<int64_t>{0, 0, 0, 0}, // pads
std::vector<int64_t>{1, 1}, // strides
std::vector<int64_t>{1, 1}, // dilations
1, // group
"NOTSET" // auto_pad
};
// X: 4x1x2x2 (batch=4), W: 1x1x2x2
std::vector<float> X(16, 1.0f);
std::vector<float> W = {1.0f, 1.0f, 1.0f, 1.0f};
std::vector<int64_t> X_shape = {4, 1, 2, 2};
std::vector<int64_t> W_shape = {1, 1, 2, 2};
std::vector<int64_t> Y_shape = {4, 1, 3, 3};
// Each batch image: ConvTranspose of 2x2 ones with 2x2 ones kernel → 3x3
std::vector<float> single_output = {1.0f, 2.0f, 1.0f,
2.0f, 4.0f, 2.0f,
1.0f, 2.0f, 1.0f};
std::vector<float> expected_vals;
for (int b = 0; b < 4; ++b) {
expected_vals.insert(expected_vals.end(), single_output.begin(), single_output.end());
}
TestConvTransposeOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape,
OpTester::ExpectResult::kExpectSuccess, "",
{kTensorrtExecutionProvider, kQnnExecutionProvider, kCudaNHWCExecutionProvider});
}

// Test that inconsistent output_shape with non-unit stride is caught.
TEST(ConvTransposeTest, ConvTranspose_2D_Stride2_InconsistentOutputShape) {
OpTester test("ConvTranspose", 11);
test.AddShapeToTensorData(false);
// Input: 1x1x3x3, kernel 3x3, stride 2, dilation 1.
// Natural output = (3-1)*2 + 3 = 7. output_shape=20x20 is too large.
test.AddAttribute("strides", std::vector<int64_t>{2, 2});
test.AddAttribute("output_shape", std::vector<int64_t>{20, 20});
Comment thread
yuslepukhin marked this conversation as resolved.
test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddOutput<float>("Y", {0}, {});

test.Run(OpTester::ExpectResult::kExpectFailure, "inconsistent with input spatial dimensions",
{kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
}

// Test that inconsistent output_shape with dilation > 1 is caught.
TEST(ConvTransposeTest, ConvTranspose_2D_Dilation_InconsistentOutputShape) {
OpTester test("ConvTranspose", 11);
test.AddShapeToTensorData(false);
// Input: 1x1x3x3, kernel 3x3, stride 1, dilation 2.
// dkernel = (3-1)*2+1 = 5. Natural output = (3-1)*1 + 5 = 7.
// output_shape=30x30 is too large.
test.AddAttribute("dilations", std::vector<int64_t>{2, 2});
test.AddAttribute("output_shape", std::vector<int64_t>{30, 30});
test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddOutput<float>("Y", {0}, {});

test.Run(OpTester::ExpectResult::kExpectFailure, "inconsistent with input spatial dimensions",
{kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
}

// Test output_shape that is slightly smaller than natural (requiring positive padding).
// This is the normal legitimate use case for output_shape.
TEST(ConvTransposeTest, ConvTranspose_2D_OutputShape_RequiringPadding) {
ConvTransposeOpAttributes attrs = {
std::vector<int64_t>{3, 3}, // kernel_shape
{}, // output_padding
std::vector<int64_t>{3, 3}, // output_shape: smaller than natural (5), so pads will be added
std::vector<int64_t>{0, 0, 0, 0}, // pads (will be overwritten by computed pads)
std::vector<int64_t>{1, 1}, // strides
std::vector<int64_t>{1, 1}, // dilations
1, // group
"NOTSET" // auto_pad
};
// Input 1x1x3x3 all ones, kernel 1x1x3x3 all ones.
// Natural output = 5x5. With output_shape=3x3, total_pad = (3-1)*1+(3-1)*1+1-3 = 2 per dim.
// pad_head=1, pad_tail=1 (NOTSET → pad more on head).
// The result is the center 3x3 of the natural 5x5 output.
std::vector<float> X(9, 1.0f);
std::vector<float> W(9, 1.0f);
std::vector<int64_t> X_shape = {1, 1, 3, 3};
std::vector<int64_t> W_shape = {1, 1, 3, 3};
std::vector<int64_t> Y_shape = {1, 1, 3, 3};
// Full 5x5 output would be: corner=1,edge=2,center area=3-9.
// With pad=1 on each side, we take center 3x3 of the 5x5 output, which equals:
// The center 3x3 of ConvTranspose(ones_3x3, ones_3x3, no padding).
// Full result: 1 2 3 2 1 / 2 4 6 4 2 / 3 6 9 6 3 / 2 4 6 4 2 / 1 2 3 2 1
// Center 3x3: 4 6 4 / 6 9 6 / 4 6 4
std::vector<float> expected_vals = {4.0f, 6.0f, 4.0f,
6.0f, 9.0f, 6.0f,
4.0f, 6.0f, 4.0f};
TestConvTransposeOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape,
OpTester::ExpectResult::kExpectSuccess, "",
{kTensorrtExecutionProvider, kQnnExecutionProvider,
kOpenVINOExecutionProvider, kCudaNHWCExecutionProvider});
}

#if !defined(ORT_NO_EXCEPTIONS)
// Test that extreme attribute values causing arithmetic overflow are caught.
// SafeInt throws on overflow; in no-exceptions builds this aborts, so skip there.
TEST(ConvTransposeTest, ConvTranspose_OverflowInPadComputation) {
OpTester test("ConvTranspose", 11);
test.AddShapeToTensorData(false);
// Use extreme dilation value: dilation * (kernel-1) overflows for kernel=3, dilation=INT64_MAX/4.
// W shape is kept small (kernel_shape attribute drives the computation, but the ORT_ENFORCE
// in the constructor fires on the attribute value before shape validation can occur).
test.AddAttribute("strides", std::vector<int64_t>{1, 1});
test.AddAttribute("dilations", std::vector<int64_t>{4611686018427387903LL, 4611686018427387903LL});
test.AddAttribute("kernel_shape", std::vector<int64_t>{3, 3});
test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddOutput<float>("Y", {0}, {});

test.Run(OpTester::ExpectResult::kExpectFailure, "",
{kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
}

// Test overflow in explicit output_shape path: large out_size combined with parameters
// that would overflow in ComputeTotalPad.
TEST(ConvTransposeTest, ConvTranspose_OverflowInExplicitOutputShapePath) {
OpTester test("ConvTranspose", 11);
test.AddShapeToTensorData(false);
// Large stride * (in_size - 1) where in_size = 3, stride = INT64_MAX/4.
// (3-1) * (INT64_MAX/4) = INT64_MAX/2, which is fine. But adding kernel-1=2 * dilation=INT64_MAX/4
// overflows.
test.AddAttribute("strides", std::vector<int64_t>{2305843009213693952LL, 1});
test.AddAttribute("dilations", std::vector<int64_t>{2305843009213693952LL, 1});
test.AddAttribute("kernel_shape", std::vector<int64_t>{3, 3});
test.AddAttribute("output_shape", std::vector<int64_t>{5, 5});
test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
test.AddOutput<float>("Y", {0}, {});

test.Run(OpTester::ExpectResult::kExpectFailure, "",
{kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
}
#endif // !defined(ORT_NO_EXCEPTIONS)

} // namespace test
} // namespace onnxruntime
Loading