microsoft · yuslepukhin · Jul 1, 2026 · Jun 30, 2026 · Jun 30, 2026 · Jun 30, 2026
diff --git a/onnxruntime/core/providers/common.h b/onnxruntime/core/providers/common.h
@@ -155,9 +155,11 @@ inline Status ComputePadAndOutputShape(const int64_t in_dim,
   return Status::OK();
 }
 
-constexpr inline int64_t ComputeTotalPad(int64_t in_size, int64_t stride, int64_t adj,
-                                         int64_t kernel, int64_t dilation, int64_t out_size) {
-  return std::max<int64_t>(0, (in_size - 1) * stride + adj + (kernel - 1) * dilation + 1 - out_size);
+inline int64_t ComputeTotalPad(int64_t in_size, int64_t stride, int64_t adj,
+                               int64_t kernel, int64_t dilation, int64_t out_size) {
+  SafeInt<int64_t> safe_pad = (SafeInt<int64_t>(in_size) - 1) * stride + adj +
+                              (SafeInt<int64_t>(kernel) - 1) * dilation + 1 - out_size;
+  return std::max<int64_t>(0, safe_pad);
 }
 
 inline void DistributePadding(AutoPadType pad_type, const int64_t& total_pad,

diff --git a/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h b/onnxruntime/core/providers/cpu/nn/conv_transpose_attributes.h
@@ -305,14 +305,46 @@ struct ConvTransposeAttributes : public ConvAttributes {
       int64_t* out_size) const {
     // Output shape is explicitly provided - pad values will have to be computed
     if (*out_size != -1) {
-      if (*out_size < 0) {
+      if (*out_size <= 0) {
         return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                               "Explicit output size is negative: ", *out_size);
+                               "Explicit output size must be positive. Got: ", *out_size);
+      }
+      if (in_size <= 0) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Input spatial dimension must be positive. Got: ", in_size);
+      }
+      if (stride <= 0) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Stride must be positive. Got: ", stride);
+      }
+      if (kernel <= 0) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Kernel size must be positive. Got: ", kernel);
+      }
+      if (dilation <= 0) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Dilation must be positive. Got: ", dilation);
+      }
+      if (adj < 0) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Output padding must be non-negative. Got: ", adj);
       }
       // total pad
       auto total_pad = ComputeTotalPad(in_size, stride, adj,
                                        kernel, dilation, *out_size);
       DistributePadding(pad_type, total_pad, *pad_head, *pad_tail);
+
+      // Verify that the forward-conv re-derivation of input size from the output size and pads
+      // is consistent with the actual input size. Col2im re-derives the input spatial extent as:
+      //   derived_in = (out_size + pad_head + pad_tail - dkernel) / stride + 1
+      // If this exceeds in_size, Col2im would read past the col_buffer allocation.
+      SafeInt<int64_t> dkernel = (SafeInt<int64_t>(kernel) - 1) * dilation + 1;
+      int64_t derived_in = (SafeInt<int64_t>(*out_size) + *pad_head + *pad_tail - dkernel) / stride + 1;
+      if (derived_in != in_size) {
+        return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                               "Explicit output_shape is inconsistent with input spatial dimensions. "
+                               "Expected input size ",
+                               derived_in, " but got ", in_size,
+                               " (output_size=", *out_size, ", kernel=", kernel,
+                               ", stride=", stride, ", dilation=", dilation, ", adj=", adj, ").");
+      }
       return Status::OK();
     }
 

diff --git a/onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc b/onnxruntime/test/providers/cpu/nn/conv_transpose_op_test.cc
@@ -1729,5 +1729,287 @@ TEST(ConvTransposeTest, ConvTranspose_OutputPaddingExceedsStride) {
            {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider, kWebGpuExecutionProvider});
 }
 
+// Test that an inconsistent explicit output_shape is rejected (output_shape too large
+// relative to input spatial dimensions causes a pad/buffer size mismatch).
+TEST(ConvTransposeTest, ConvTranspose_InconsistentOutputShape) {
+  OpTester test("ConvTranspose", 11);
+  test.AddShapeToTensorData(false);
+  // Input: 1x1x3x3, kernel 3x3, stride 1, no dilation.
+  // Natural output without padding = (3-1)*1 + 3 = 5.
+  // Setting output_shape to 100x100 is inconsistent.
+  test.AddAttribute("output_shape", std::vector<int64_t>{100, 100});
+  test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddOutput<float>("Y", {0}, {});
+
+  // Other EPs either don't support explicit output_shape or perform their own validation.
+  test.Run(OpTester::ExpectResult::kExpectFailure, "inconsistent with input spatial dimensions",
+           {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
+            kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
+}
+
+// Test that output_shape=0 is rejected.
+TEST(ConvTransposeTest, ConvTranspose_ZeroOutputShape) {
+  OpTester test("ConvTranspose", 11);
+  test.AddShapeToTensorData(false);
+  test.AddAttribute("output_shape", std::vector<int64_t>{0, 0});
+  test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddOutput<float>("Y", {0}, {});
+
+  test.Run(OpTester::ExpectResult::kExpectFailure, "output size must be positive",
+           {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider, kWebGpuExecutionProvider});
+}
+
+// Test that an inconsistent 1D explicit output_shape is rejected.
+TEST(ConvTransposeTest, ConvTranspose_1D_InconsistentOutputShape) {
+  OpTester test("ConvTranspose", 11);
+  test.AddShapeToTensorData(false);
+  // Input: 1x1x5, kernel_shape=3, stride=2, dilation=1.
+  // Natural (no-pad) output = (5-1)*2 + 3 = 11. output_shape=50 is way too large.
+  test.AddAttribute("kernel_shape", std::vector<int64_t>{3});
+  test.AddAttribute("strides", std::vector<int64_t>{2});
+  test.AddAttribute("output_shape", std::vector<int64_t>{50});
+  test.AddInput<float>("X", {1, 1, 5}, std::vector<float>(5, 1.0f));
+  test.AddInput<float>("W", {1, 1, 3}, std::vector<float>(3, 1.0f));
+  test.AddOutput<float>("Y", {0}, {});
+
+  test.Run(OpTester::ExpectResult::kExpectFailure, "inconsistent with input spatial dimensions",
+           {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
+            kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
+}
+
+// Test that an inconsistent 3D explicit output_shape is rejected.
+TEST(ConvTransposeTest, ConvTranspose_3D_InconsistentOutputShape) {
+  OpTester test("ConvTranspose", 11);
+  test.AddShapeToTensorData(false);
+  // Input: 1x1x2x2x2, kernel 2x2x2, stride 1, dilation 1.
+  // Natural output = (2-1)*1 + 2 = 3 per dim. output_shape=10x10x10 is too large.
+  test.AddAttribute("kernel_shape", std::vector<int64_t>{2, 2, 2});
+  test.AddAttribute("output_shape", std::vector<int64_t>{10, 10, 10});
+  test.AddInput<float>("X", {1, 1, 2, 2, 2}, std::vector<float>(8, 1.0f));
+  test.AddInput<float>("W", {1, 1, 2, 2, 2}, std::vector<float>(8, 1.0f));
+  test.AddOutput<float>("Y", {0}, {});
+
+  // CUDA/WebGPU don't support 3D ConvTranspose in most builds.
+  test.Run(OpTester::ExpectResult::kExpectFailure, "inconsistent with input spatial dimensions",
+           {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
+            kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
+}
+
+// Test that a valid 3D explicit output_shape with non-trivial padding works correctly.
+TEST(ConvTransposeTest, ConvTranspose_3D_ValidOutputShape) {
+  ConvTransposeOpAttributes attrs = {
+      std::vector<int64_t>{2, 2, 2},           // kernel_shape
+      {},                                      // output_padding
+      std::vector<int64_t>{3, 3, 3},           // output_shape (natural no-pad output for 2x2x2 input, k=2, s=1)
+      std::vector<int64_t>{0, 0, 0, 0, 0, 0},  // pads
+      std::vector<int64_t>{1, 1, 1},           // strides
+      std::vector<int64_t>{1, 1, 1},           // dilations
+      1,                                       // group
+      "NOTSET"                                 // auto_pad
+  };
+  // Input 1x1x2x2x2 with all ones, kernel 1x1x2x2x2 with all ones.
+  // Output should be 1x1x3x3x3. Each output voxel sums overlapping kernel positions.
+  std::vector<float> X(8, 1.0f);
+  std::vector<float> W(8, 1.0f);
+  std::vector<int64_t> X_shape = {1, 1, 2, 2, 2};
+  std::vector<int64_t> W_shape = {1, 1, 2, 2, 2};
+  std::vector<int64_t> Y_shape = {1, 1, 3, 3, 3};
+  // Corner=1, edge=2, face=4, center=8 (same as conv input for unit kernel).
+  std::vector<float> expected_vals = {
+      1.0f, 2.0f, 1.0f, 2.0f, 4.0f, 2.0f, 1.0f, 2.0f, 1.0f,
+      2.0f, 4.0f, 2.0f, 4.0f, 8.0f, 4.0f, 2.0f, 4.0f, 2.0f,
+      1.0f, 2.0f, 1.0f, 2.0f, 4.0f, 2.0f, 1.0f, 2.0f, 1.0f};
+  TestConvTransposeOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape,
+                      OpTester::ExpectResult::kExpectSuccess, "",
+                      {kTensorrtExecutionProvider, kCudaExecutionProvider,
+                       kCudaNHWCExecutionProvider, kQnnExecutionProvider, kWebGpuExecutionProvider});
+}
+
+// Test group > 1 with explicit output_shape.
+TEST(ConvTransposeTest, ConvTranspose_2D_Group2_OutputShape) {
+  ConvTransposeOpAttributes attrs = {
+      std::vector<int64_t>{3, 3},        // kernel_shape
+      {},                                // output_padding
+      std::vector<int64_t>{5, 5},        // output_shape: natural unpadded output for in=3, k=3, s=1
+      std::vector<int64_t>{0, 0, 0, 0},  // pads
+      std::vector<int64_t>{1, 1},        // strides
+      std::vector<int64_t>{1, 1},        // dilations
+      2,                                 // group
+      "NOTSET"                           // auto_pad
+  };
+  // X: 1x2x3x3 (2 input channels, group=2, so 1 channel per group)
+  // W: 2x1x3x3 (C=2, M/group=1, so output channels = 1*2 = 2)
+  std::vector<float> X(18, 1.0f);
+  std::vector<float> W(18, 1.0f);
+  std::vector<int64_t> X_shape = {1, 2, 3, 3};
+  std::vector<int64_t> W_shape = {2, 1, 3, 3};
+  std::vector<int64_t> Y_shape = {1, 2, 5, 5};
+  // Each group produces a 5x5 output. With all-ones input (3x3) and all-ones kernel (3x3),
+  // it's the correlation of two 3x3 boxes producing the expected pattern.
+  std::vector<float> expected_vals = {
+      1.0f, 2.0f, 3.0f, 2.0f, 1.0f,
+      2.0f, 4.0f, 6.0f, 4.0f, 2.0f,
+      3.0f, 6.0f, 9.0f, 6.0f, 3.0f,
+      2.0f, 4.0f, 6.0f, 4.0f, 2.0f,
+      1.0f, 2.0f, 3.0f, 2.0f, 1.0f,
+      // Second group — identical
+      1.0f, 2.0f, 3.0f, 2.0f, 1.0f,
+      2.0f, 4.0f, 6.0f, 4.0f, 2.0f,
+      3.0f, 6.0f, 9.0f, 6.0f, 3.0f,
+      2.0f, 4.0f, 6.0f, 4.0f, 2.0f,
+      1.0f, 2.0f, 3.0f, 2.0f, 1.0f};
+  TestConvTransposeOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape,
+                      OpTester::ExpectResult::kExpectSuccess, "",
+                      {kTensorrtExecutionProvider, kQnnExecutionProvider,
+                       kOpenVINOExecutionProvider, kCudaNHWCExecutionProvider});
+}
+
+// Test with larger batch size and explicit output_shape.
+TEST(ConvTransposeTest, ConvTranspose_2D_LargeBatch_OutputShape) {
+  ConvTransposeOpAttributes attrs = {
+      std::vector<int64_t>{2, 2},        // kernel_shape
+      {},                                // output_padding
+      std::vector<int64_t>{3, 3},        // output_shape: (2-1)*1+2 = 3
+      std::vector<int64_t>{0, 0, 0, 0},  // pads
+      std::vector<int64_t>{1, 1},        // strides
+      std::vector<int64_t>{1, 1},        // dilations
+      1,                                 // group
+      "NOTSET"                           // auto_pad
+  };
+  // X: 4x1x2x2 (batch=4), W: 1x1x2x2
+  std::vector<float> X(16, 1.0f);
+  std::vector<float> W = {1.0f, 1.0f, 1.0f, 1.0f};
+  std::vector<int64_t> X_shape = {4, 1, 2, 2};
+  std::vector<int64_t> W_shape = {1, 1, 2, 2};
+  std::vector<int64_t> Y_shape = {4, 1, 3, 3};
+  // Each batch image: ConvTranspose of 2x2 ones with 2x2 ones kernel → 3x3
+  std::vector<float> single_output = {1.0f, 2.0f, 1.0f,
+                                      2.0f, 4.0f, 2.0f,
+                                      1.0f, 2.0f, 1.0f};
+  std::vector<float> expected_vals;
+  for (int b = 0; b < 4; ++b) {
+    expected_vals.insert(expected_vals.end(), single_output.begin(), single_output.end());
+  }
+  TestConvTransposeOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape,
+                      OpTester::ExpectResult::kExpectSuccess, "",
+                      {kTensorrtExecutionProvider, kQnnExecutionProvider, kCudaNHWCExecutionProvider});
+}
+
+// Test that inconsistent output_shape with non-unit stride is caught.
+TEST(ConvTransposeTest, ConvTranspose_2D_Stride2_InconsistentOutputShape) {
+  OpTester test("ConvTranspose", 11);
+  test.AddShapeToTensorData(false);
+  // Input: 1x1x3x3, kernel 3x3, stride 2, dilation 1.
+  // Natural output = (3-1)*2 + 3 = 7. output_shape=20x20 is too large.
+  test.AddAttribute("strides", std::vector<int64_t>{2, 2});
+  test.AddAttribute("output_shape", std::vector<int64_t>{20, 20});
+  test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddOutput<float>("Y", {0}, {});
+
+  test.Run(OpTester::ExpectResult::kExpectFailure, "inconsistent with input spatial dimensions",
+           {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
+            kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
+}
+
+// Test that inconsistent output_shape with dilation > 1 is caught.
+TEST(ConvTransposeTest, ConvTranspose_2D_Dilation_InconsistentOutputShape) {
+  OpTester test("ConvTranspose", 11);
+  test.AddShapeToTensorData(false);
+  // Input: 1x1x3x3, kernel 3x3, stride 1, dilation 2.
+  // dkernel = (3-1)*2+1 = 5. Natural output = (3-1)*1 + 5 = 7.
+  // output_shape=30x30 is too large.
+  test.AddAttribute("dilations", std::vector<int64_t>{2, 2});
+  test.AddAttribute("output_shape", std::vector<int64_t>{30, 30});
+  test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddOutput<float>("Y", {0}, {});
+
+  test.Run(OpTester::ExpectResult::kExpectFailure, "inconsistent with input spatial dimensions",
+           {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
+            kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
+}
+
+// Test output_shape that is slightly smaller than natural (requiring positive padding).
+// This is the normal legitimate use case for output_shape.
+TEST(ConvTransposeTest, ConvTranspose_2D_OutputShape_RequiringPadding) {
+  ConvTransposeOpAttributes attrs = {
+      std::vector<int64_t>{3, 3},        // kernel_shape
+      {},                                // output_padding
+      std::vector<int64_t>{3, 3},        // output_shape: smaller than natural (5), so pads will be added
+      std::vector<int64_t>{0, 0, 0, 0},  // pads (will be overwritten by computed pads)
+      std::vector<int64_t>{1, 1},        // strides
+      std::vector<int64_t>{1, 1},        // dilations
+      1,                                 // group
+      "NOTSET"                           // auto_pad
+  };
+  // Input 1x1x3x3 all ones, kernel 1x1x3x3 all ones.
+  // Natural output = 5x5. With output_shape=3x3, total_pad = (3-1)*1+(3-1)*1+1-3 = 2 per dim.
+  // pad_head=1, pad_tail=1 (NOTSET → pad more on head).
+  // The result is the center 3x3 of the natural 5x5 output.
+  std::vector<float> X(9, 1.0f);
+  std::vector<float> W(9, 1.0f);
+  std::vector<int64_t> X_shape = {1, 1, 3, 3};
+  std::vector<int64_t> W_shape = {1, 1, 3, 3};
+  std::vector<int64_t> Y_shape = {1, 1, 3, 3};
+  // Full 5x5 output would be: corner=1,edge=2,center area=3-9.
+  // With pad=1 on each side, we take center 3x3 of the 5x5 output, which equals:
+  // The center 3x3 of ConvTranspose(ones_3x3, ones_3x3, no padding).
+  // Full result: 1 2 3 2 1 / 2 4 6 4 2 / 3 6 9 6 3 / 2 4 6 4 2 / 1 2 3 2 1
+  // Center 3x3: 4 6 4 / 6 9 6 / 4 6 4
+  std::vector<float> expected_vals = {4.0f, 6.0f, 4.0f,
+                                      6.0f, 9.0f, 6.0f,
+                                      4.0f, 6.0f, 4.0f};
+  TestConvTransposeOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape,
+                      OpTester::ExpectResult::kExpectSuccess, "",
+                      {kTensorrtExecutionProvider, kQnnExecutionProvider,
+                       kOpenVINOExecutionProvider, kCudaNHWCExecutionProvider});
+}
+
+#if !defined(ORT_NO_EXCEPTIONS)
+// Test that extreme attribute values causing arithmetic overflow are caught.
+// SafeInt throws on overflow; in no-exceptions builds this aborts, so skip there.
+TEST(ConvTransposeTest, ConvTranspose_OverflowInPadComputation) {
+  OpTester test("ConvTranspose", 11);
+  test.AddShapeToTensorData(false);
+  // Use extreme dilation value: dilation * (kernel-1) overflows for kernel=3, dilation=INT64_MAX/4.
+  // W shape is kept small (kernel_shape attribute drives the computation, but the ORT_ENFORCE
+  // in the constructor fires on the attribute value before shape validation can occur).
+  test.AddAttribute("strides", std::vector<int64_t>{1, 1});
+  test.AddAttribute("dilations", std::vector<int64_t>{4611686018427387903LL, 4611686018427387903LL});
+  test.AddAttribute("kernel_shape", std::vector<int64_t>{3, 3});
+  test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddOutput<float>("Y", {0}, {});
+
+  test.Run(OpTester::ExpectResult::kExpectFailure, "",
+           {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
+            kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
+}
+
+// Test overflow in explicit output_shape path: large out_size combined with parameters
+// that would overflow in ComputeTotalPad.
+TEST(ConvTransposeTest, ConvTranspose_OverflowInExplicitOutputShapePath) {
+  OpTester test("ConvTranspose", 11);
+  test.AddShapeToTensorData(false);
+  // Large stride * (in_size - 1) where in_size = 3, stride = INT64_MAX/4.
+  // (3-1) * (INT64_MAX/4) = INT64_MAX/2, which is fine. But adding kernel-1=2 * dilation=INT64_MAX/4
+  // overflows.
+  test.AddAttribute("strides", std::vector<int64_t>{2305843009213693952LL, 1});
+  test.AddAttribute("dilations", std::vector<int64_t>{2305843009213693952LL, 1});
+  test.AddAttribute("kernel_shape", std::vector<int64_t>{3, 3});
+  test.AddAttribute("output_shape", std::vector<int64_t>{5, 5});
+  test.AddInput<float>("X", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddInput<float>("W", {1, 1, 3, 3}, std::vector<float>(9, 1.0f));
+  test.AddOutput<float>("Y", {0}, {});
+
+  test.Run(OpTester::ExpectResult::kExpectFailure, "",
+           {kTensorrtExecutionProvider, kQnnExecutionProvider, kDmlExecutionProvider,
+            kCudaExecutionProvider, kCudaNHWCExecutionProvider, kWebGpuExecutionProvider});
+}
+#endif  // !defined(ORT_NO_EXCEPTIONS)
+
 }  // namespace test
 }  // namespace onnxruntime