diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5a0b2f95e83..9ccf7cb9442 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -76,7 +76,6 @@ mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)
 option(CUDF_CLANG_TIDY "Enable clang-tidy during compilation" OFF)
 option(CUDF_IWYU "Enable IWYU during compilation" OFF)
 option(CUDF_CLANG_TIDY_AUTOFIX "Enable clang-tidy autofixes" OFF)
-
 option(
   CUDF_KVIKIO_REMOTE_IO
   "Enable remote IO (e.g. AWS S3) support through KvikIO. If disabled, cudf-python will still be able to do remote IO through fsspec."
@@ -101,7 +100,6 @@ message(VERBOSE "CUDF: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}"
 message(VERBOSE
         "CUDF: Build with remote IO (e.g. AWS S3) support through KvikIO: ${CUDF_KVIKIO_REMOTE_IO}"
 )
-
 # Set a default build type if none was specified
 rapids_cmake_build_type("Release")
 set(CUDF_BUILD_TESTS ${BUILD_TESTS})
diff --git a/cpp/include/cudf/fixed_point/conv.hpp b/cpp/include/cudf/fixed_point/conv.hpp
index f10f5e15c92..b4a0bcc1126 100644
--- a/cpp/include/cudf/fixed_point/conv.hpp
+++ b/cpp/include/cudf/fixed_point/conv.hpp
@@ -41,13 +41,23 @@ CUDF_HOST_DEVICE Fixed convert_floating_to_fixed(Floating floating, numeric::sca
   using Rep        = typename Fixed::rep;
   auto const value = [&]() {
     if constexpr (Fixed::rad == numeric::Radix::BASE_10) {
-      return numeric::detail::convert_floating_to_integral<Rep>(floating, scale);
+      if constexpr (Fixed::track == numeric::overflow_tracking::on) {
+        auto const [v, overflow] =
+          numeric::detail::convert_floating_to_integral_checked<Rep>(floating, scale);
+        return cuda::std::pair{v, overflow};
+      } else {
+        return numeric::detail::convert_floating_to_integral<Rep>(floating, scale);
+      }
     } else {
       return static_cast<Rep>(numeric::detail::shift<Rep, Fixed::rad>(floating, scale));
     }
   }();
 
-  return Fixed(numeric::scaled_integer<Rep>{value, scale});
+  if constexpr (Fixed::rad == numeric::Radix::BASE_10 && Fixed::track == numeric::overflow_tracking::on) {
+    return Fixed(numeric::scaled_integer<Rep>{value.first, scale}, value.second);
+  } else {
+    return Fixed(numeric::scaled_integer<Rep>{value, scale});
+  }
 }
 
 /**
diff --git a/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp
index 6bf74613da7..20a5523eb16 100644
--- a/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp
+++ b/cpp/include/cudf/fixed_point/detail/floating_conversion.hpp
@@ -514,6 +514,45 @@ CUDF_HOST_DEVICE inline IntegerType guarded_left_shift(IntegerType value, int bi
                                            : cuda::std::numeric_limits<IntegerType>::max();
 }
 
+/**
+ * @brief Perform a bit-shift left with overflow detection (saturating)
+ *
+ * Sets `overflow = true` if:
+ * - the shift count would be undefined behavior, or
+ * - any set bits would be shifted out of the destination type (i.e. true overflow)
+ *
+ * @tparam IntegerType Type of input unsigned integer value
+ * @param value The integer whose bits are being shifted
+ * @param bit_shift The number of bits to shift left
+ * @param overflow Sticky overflow flag to set on overflow
+ * @return The bit-shifted integer, except max value on overflow/UB
+ */
+template <typename IntegerType, CUDF_ENABLE_IF(cuda::std::is_unsigned_v<IntegerType>)>
+CUDF_HOST_DEVICE inline IntegerType checked_left_shift(IntegerType value, int bit_shift, bool& overflow)
+{
+  constexpr int digits             = cuda::std::numeric_limits<IntegerType>::digits;
+  constexpr int max_safe_bit_shift = digits - 1;
+
+  if (bit_shift < 0) {
+    // Not expected for callers; treat as overflow (would be a right-shift).
+    overflow = true;
+    return cuda::std::numeric_limits<IntegerType>::max();
+  }
+  if (bit_shift > max_safe_bit_shift) {
+    overflow = true;
+    return cuda::std::numeric_limits<IntegerType>::max();
+  }
+  if (bit_shift == 0) { return value; }
+
+  // Detect whether any bits would be shifted out.
+  auto const max_value_before_shift = cuda::std::numeric_limits<IntegerType>::max() >> bit_shift;
+  if (value > max_value_before_shift) {
+    overflow = true;
+    return cuda::std::numeric_limits<IntegerType>::max();
+  }
+  return static_cast<IntegerType>(value << bit_shift);
+}
+
 /**
  * @brief Perform a bit-shift right, guarding against undefined behavior
  *
@@ -530,6 +569,39 @@ CUDF_HOST_DEVICE inline IntegerType guarded_right_shift(IntegerType value, int b
   return (bit_shift <= max_safe_bit_shift) ? value >> bit_shift : 0;
 }
 
+/**
+ * @brief Cast `value` to a narrower unsigned type with overflow detection (saturating)
+ */
+template <typename To, typename From, CUDF_ENABLE_IF(cuda::std::is_unsigned_v<To> && cuda::std::is_unsigned_v<From>)>
+CUDF_HOST_DEVICE inline To checked_narrow_cast(From value, bool& overflow)
+{
+  if (value > static_cast<From>(cuda::std::numeric_limits<To>::max())) {
+    overflow = true;
+    return cuda::std::numeric_limits<To>::max();
+  }
+  return static_cast<To>(value);
+}
+
+/**
+ * @brief Multiply by 10^pow10 with overflow detection (saturating)
+ *
+ * This is intentionally simple (looping by decimal digits): pow10 can be up to ~3e2 for doubles.
+ */
+template <typename T, CUDF_ENABLE_IF(cuda::std::is_unsigned_v<T>)>
+CUDF_HOST_DEVICE inline T multiply_power10_saturating(T value, int pow10, bool& overflow)
+{
+  if (pow10 <= 0) { return value; }
+  auto const max_v = cuda::std::numeric_limits<T>::max();
+  for (int i = 0; i < pow10; ++i) {
+    if (value > max_v / 10) {
+      overflow = true;
+      return max_v;
+    }
+    value = static_cast<T>(value * 10);
+  }
+  return value;
+}
+
 /**
  * @brief Helper struct with common constants needed by the floating <--> decimal conversions
  */
@@ -758,6 +830,63 @@ CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t<Rep> shift_to_decimal_pospow(
   return guarded_left_shift(static_cast<UnsignedRep>(shifting_rep), pow2);
 }
 
+/**
+ * @brief Overflow-tracking variant of `shift_to_decimal_pospow`
+ */
+template <typename Rep,
+          typename FloatingType,
+          CUDF_ENABLE_IF(cuda::std::is_floating_point_v<FloatingType>)>
+CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t<Rep> shift_to_decimal_pospow_checked(
+  typename shifting_constants<FloatingType>::IntegerRep const base2_value,
+  int pow2,
+  int pow10,
+  bool& overflow)
+{
+  using Constants   = shifting_constants<FloatingType>;
+  using ShiftingRep = typename Constants::ShiftingRep;
+  using UnsignedRep = cuda::std::make_unsigned_t<Rep>;
+
+  auto shifting_rep = static_cast<ShiftingRep>(base2_value);
+
+  static constexpr int shift_up_to = sizeof(ShiftingRep) * 8 - Constants::num_2s_shift_buffer_bits;
+  static constexpr int shift_from  = Constants::num_significand_bits + 1;
+  static constexpr int max_init_shift = shift_up_to - shift_from;
+
+  if (pow2 <= max_init_shift) {
+    shifting_rep = divide_power10<ShiftingRep>(shifting_rep << pow2, pow10);
+    return checked_narrow_cast<UnsignedRep>(static_cast<cuda::std::make_unsigned_t<ShiftingRep>>(shifting_rep),
+                                           overflow);
+  }
+
+  shifting_rep <<= max_init_shift;
+  pow2 -= max_init_shift;
+
+  while (pow10 > Constants::max_digits_shift) {
+    shifting_rep /= Constants::max_digits_shift_pow;
+    pow10 -= Constants::max_digits_shift;
+
+    if (pow2 <= Constants::max_bits_shift) {
+      shifting_rep = divide_power10<ShiftingRep>(shifting_rep << pow2, pow10);
+      return checked_narrow_cast<UnsignedRep>(static_cast<cuda::std::make_unsigned_t<ShiftingRep>>(shifting_rep),
+                                             overflow);
+    }
+
+    shifting_rep <<= Constants::max_bits_shift;
+    pow2 -= Constants::max_bits_shift;
+  }
+
+  if constexpr (Constants::is_double) {
+    shifting_rep = divide_power10_64bit(shifting_rep, pow10);
+  } else {
+    shifting_rep = divide_power10_32bit(shifting_rep, pow10);
+  }
+
+  // Final cast + left-shift can overflow.
+  auto const narrowed =
+    checked_narrow_cast<UnsignedRep>(static_cast<cuda::std::make_unsigned_t<ShiftingRep>>(shifting_rep), overflow);
+  return checked_left_shift<UnsignedRep>(narrowed, pow2, overflow);
+}
+
 /**
  * @brief Perform base-2 -> base-10 fixed-point conversion for pow10 < 0
  *
@@ -844,6 +973,65 @@ CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t<Rep> shift_to_decimal_negpow(
   return final_shifts_low10s();
 }
 
+/**
+ * @brief Overflow-tracking variant of `shift_to_decimal_negpow`
+ */
+template <typename Rep,
+          typename FloatingType,
+          CUDF_ENABLE_IF(cuda::std::is_floating_point_v<FloatingType>)>
+CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t<Rep> shift_to_decimal_negpow_checked(
+  typename shifting_constants<FloatingType>::IntegerRep base2_value, int pow2, int pow10, bool& overflow)
+{
+  using Constants   = shifting_constants<FloatingType>;
+  using ShiftingRep = typename Constants::ShiftingRep;
+  using UnsignedRep = cuda::std::make_unsigned_t<Rep>;
+
+  auto shifting_rep = static_cast<ShiftingRep>(base2_value);
+
+  int pow10_mag = -pow10;
+  int pow2_mag  = -pow2;
+
+  auto final_shifts_low10s = [&]() {
+    if constexpr (Constants::is_double) {
+      shifting_rep = multiply_power10_64bit(shifting_rep, pow10_mag);
+    } else {
+      shifting_rep = multiply_power10_32bit(shifting_rep, pow10_mag);
+    }
+    // guarded_right_shift does not "overflow" the representable range; it may drop to 0 on UB.
+    // We don't treat that as overflow here.
+    return checked_narrow_cast<UnsignedRep>(
+      static_cast<cuda::std::make_unsigned_t<ShiftingRep>>(guarded_right_shift(shifting_rep, pow2_mag)), overflow);
+  };
+
+  if (pow10_mag <= Constants::max_digits_shift) { return final_shifts_low10s(); }
+
+  static constexpr int shift_up_to        = sizeof(ShiftingRep) * 8 - Constants::max_bits_shift;
+  static constexpr int shift_from         = Constants::num_significand_bits + 1;
+  static constexpr int num_init_bit_shift = shift_up_to - shift_from;
+
+  shifting_rep <<= num_init_bit_shift;
+  pow2_mag += num_init_bit_shift;
+
+  do {
+    shifting_rep *= Constants::max_digits_shift_pow;
+    pow10_mag -= Constants::max_digits_shift;
+
+    if (pow2_mag <= Constants::max_bits_shift) {
+      shifting_rep >>= pow2_mag;
+
+      auto const narrowed =
+        checked_narrow_cast<UnsignedRep>(static_cast<cuda::std::make_unsigned_t<ShiftingRep>>(shifting_rep),
+                                         overflow);
+      return multiply_power10_saturating<UnsignedRep>(narrowed, pow10_mag, overflow);
+    }
+
+    shifting_rep >>= Constants::max_bits_shift;
+    pow2_mag -= Constants::max_bits_shift;
+  } while (pow10_mag > Constants::max_digits_shift);
+
+  return final_shifts_low10s();
+}
+
 /**
  * @brief Perform base-2 -> base-10 fixed-point conversion
  *
@@ -897,6 +1085,45 @@ CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t<Rep> convert_floating_to_inte
   }
 }
 
+/**
+ * @brief Overflow-tracking variant of `convert_floating_to_integral_shifting`
+ */
+template <typename Rep,
+          typename FloatingType,
+          CUDF_ENABLE_IF(cuda::std::is_floating_point_v<FloatingType>)>
+CUDF_HOST_DEVICE inline cuda::std::pair<cuda::std::make_unsigned_t<Rep>, bool>
+convert_floating_to_integral_shifting_checked(
+  typename floating_converter<FloatingType>::IntegralType base2_value, int pow10, int pow2)
+{
+  using UnsignedRep = cuda::std::make_unsigned_t<Rep>;
+  bool overflow     = false;
+
+  if (pow10 == 0) {
+    if (pow2 >= 0) {
+      auto const shifted = checked_left_shift(static_cast<UnsignedRep>(base2_value), pow2, overflow);
+      return {shifted, overflow};
+    }
+    return {static_cast<UnsignedRep>(guarded_right_shift(base2_value, -pow2)), overflow};
+  }
+
+  if (pow10 > 0) {
+    if (pow2 <= 0) {
+      auto const shifted = guarded_right_shift(base2_value, -pow2);
+      // divide shrinks; cast can still overflow only if the intermediate is wider (it isn't here)
+      return {static_cast<UnsignedRep>(divide_power10<decltype(shifted)>(shifted, pow10)), overflow};
+    }
+    return {shift_to_decimal_pospow_checked<Rep, FloatingType>(base2_value, pow2, pow10, overflow), overflow};
+  }
+
+  // pow10 < 0
+  if (pow2 >= 0) {
+    auto const shifted = checked_left_shift(static_cast<UnsignedRep>(base2_value), pow2, overflow);
+    auto const scaled  = multiply_power10_saturating<UnsignedRep>(shifted, -pow10, overflow);
+    return {scaled, overflow};
+  }
+  return {shift_to_decimal_negpow_checked<Rep, FloatingType>(base2_value, pow2, pow10, overflow), overflow};
+}
+
 /**
  * @brief Perform floating-point -> integer decimal conversion
  *
@@ -936,6 +1163,58 @@ CUDF_HOST_DEVICE inline Rep convert_floating_to_integral(FloatingType const& flo
   return is_negative ? -signed_magnitude : signed_magnitude;
 }
 
+/**
+ * @brief Floating-point -> integer decimal conversion with overflow detection (saturating)
+ *
+ * Returns {value, overflow} where `overflow` is sticky across the conversion steps.
+ */
+template <typename Rep,
+          typename FloatingType,
+          CUDF_ENABLE_IF(cuda::std::is_floating_point_v<FloatingType>)>
+CUDF_HOST_DEVICE inline cuda::std::pair<Rep, bool> convert_floating_to_integral_checked(
+  FloatingType const& floating, scale_type const& scale)
+{
+  using converter = floating_converter<FloatingType>;
+  bool overflow   = false;
+
+  auto const integer_rep = converter::bit_cast_to_integer(floating);
+  if (converter::is_zero(integer_rep)) { return {Rep{0}, overflow}; }
+
+  auto const is_negative                  = converter::get_is_negative(integer_rep);
+  auto const [significand, floating_pow2] = converter::get_significand_and_pow2(integer_rep);
+
+  auto const pow10 = static_cast<int>(scale);
+  auto const [base2_value, pow2] =
+    add_half_if_truncates(floating, significand, floating_pow2, pow10);
+
+  auto const [magnitude_u, shift_overflow] =
+    convert_floating_to_integral_shifting_checked<Rep, FloatingType>(base2_value, pow10, pow2);
+  overflow = overflow || shift_overflow;
+
+  // Reapply sign with saturation on representational overflow.
+  using UnsignedRep = cuda::std::make_unsigned_t<Rep>;
+  auto const umax   = static_cast<UnsignedRep>(cuda::std::numeric_limits<Rep>::max());
+
+  if (!is_negative) {
+    if (magnitude_u > umax) {
+      overflow = true;
+      return {cuda::std::numeric_limits<Rep>::max(), overflow};
+    }
+    return {static_cast<Rep>(magnitude_u), overflow};
+  }
+
+  // Negative range has one extra representable value for two's complement.
+  // magnitude == max+1 maps to min.
+  auto const umin_mag = umax + UnsignedRep{1};
+  if (magnitude_u > umin_mag) {
+    overflow = true;
+    return {cuda::std::numeric_limits<Rep>::min(), overflow};
+  }
+  if (magnitude_u == umin_mag) { return {cuda::std::numeric_limits<Rep>::min(), overflow}; }
+
+  return {static_cast<Rep>(-static_cast<Rep>(magnitude_u)), overflow};
+}
+
 /**
  * @brief Perform base-10 -> base-2 fixed-point conversion for pow10 > 0
  *
diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp
index 6dcc4aed20a..5703b8d98ac 100644
--- a/cpp/include/cudf/fixed_point/fixed_point.hpp
+++ b/cpp/include/cudf/fixed_point/fixed_point.hpp
@@ -43,6 +43,24 @@ enum scale_type : int32_t {};
  */
 enum class Radix : int32_t { BASE_2 = 2, BASE_10 = 10 };
 
+/**
+ * @brief Compile-time switch that enables sticky overflow tracking on a `fixed_point`
+ *
+ * When `Track == overflow_tracking::on`, the `fixed_point` value carries an extra `bool`
+ * that is set whenever an arithmetic operation (or scale-change) on the value would
+ * overflow the underlying integer representation. The flag is sticky: it propagates
+ * through `+`, `-`, `*`, `/`, `%` and `rescaled()` so a downstream consumer can ask
+ * whether any overflow has occurred along the entire chain of operations that
+ * produced the value.
+ *
+ * The default, `overflow_tracking::off`, leaves `fixed_point` byte-for-byte identical
+ * to the historical layout — there is zero runtime or storage overhead. The
+ * `decimal32_safe` / `decimal64_safe` / `decimal128_safe` aliases instantiate the
+ * `on` variant for callers (e.g. velox-cudf) that need overflow detection without
+ * requiring a separate libcudf build.
+ */
+enum class overflow_tracking : bool { off = false, on = true };
+
 /**
  * @brief Returns `true` if the representation type is supported by `fixed_point`
  *
@@ -151,6 +169,11 @@ CUDF_HOST_DEVICE inline constexpr T shift(T const& val, scale_type const& scale)
   return left_shift<Rep, Rad>(val, scale);
 }
 
+// Used by `fixed_point` overflow tracking; defined after `multiplication_overflow` /
+// `division_overflow` in this header.
+template <typename Rep, Radix Rad, typename T>
+CUDF_HOST_DEVICE constexpr bool shift_overflows(T const& val, scale_type const& scale);
+
 }  // namespace detail
 
 /**
@@ -191,17 +214,54 @@ struct scaled_integer {
  * Currently, only binary and decimal `fixed_point` numbers are supported.
  * Binary operations can only be performed with other `fixed_point` numbers
  *
- * @tparam Rep The representation type (either `int32_t` or `int64_t`)
- * @tparam Rad The radix/base (either `Radix::BASE_2` or `Radix::BASE_10`)
+ * @tparam Rep   The representation type (either `int32_t` or `int64_t`)
+ * @tparam Rad   The radix/base (either `Radix::BASE_2` or `Radix::BASE_10`)
+ * @tparam Track Whether to carry a sticky overflow flag through arithmetic and
+ *               scale-change operations. Defaults to `overflow_tracking::off`,
+ *               which keeps the layout and runtime behavior identical to a
+ *               non-tracking `fixed_point`.
+ *
+ * @note Sticky overflow tracking lives at the **value-type** level. The flag
+ *       propagates automatically through every operator that takes a
+ *       `fixed_point` value (`+`, `-`, `*`, `/`, `%`, comparisons, `rescaled()`),
+ *       which means binaryops, transforms, scans and any reduction expressed
+ *       on top of the value-level operators carry it for free. Aggregations
+ *       that bypass the value layer and atomically update the **raw integer
+ *       storage** (e.g. `cudf::detail::atomic_add(&target.element<DeviceTarget>(...), ...)`
+ *       in `cudf/detail/aggregation/device_aggregators.cuh`) do **not**
+ *       propagate the per-element bool. For groupby/reduce overflow detection,
+ *       use the existing `aggregation::SUM_WITH_OVERFLOW` pattern, which
+ *       maintains a sidecar overflow column rather than relying on the
+ *       per-element flag.
  */
-template <typename Rep, Radix Rad>
+template <typename Rep, Radix Rad, overflow_tracking Track = overflow_tracking::off>
 class fixed_point {
   Rep _value{};
   scale_type _scale;
 
+  // Storage helpers used to keep `sizeof(fixed_point<..., off>)` identical to the
+  // original non-tracking layout. When `Track == on`, `_overflow` carries a bool;
+  // otherwise it is an empty type and `[[no_unique_address]]` collapses it to zero
+  // bytes (no ABI change for `decimal32`/`decimal64`/`decimal128`).
+  struct _no_overflow_flag {};
+  struct _overflow_flag_storage {
+    bool value{false};
+  };
+
+  static constexpr bool _tracks_overflow = (Track == overflow_tracking::on);
+  using _overflow_storage_t =
+    cuda::std::conditional_t<_tracks_overflow, _overflow_flag_storage, _no_overflow_flag>;
+  [[no_unique_address]] _overflow_storage_t _overflow{};
+
+  // Grant matching same-Rep/Rad/Track instantiations access to `_overflow` so the
+  // free-function operators below can read and update the sticky flag.
+  template <typename, Radix, overflow_tracking>
+  friend class fixed_point;
+
  public:
-  using rep                 = Rep;  ///< The representation type
-  static constexpr auto rad = Rad;  ///< The base
+  using rep                       = Rep;     ///< The representation type
+  static constexpr auto rad       = Rad;     ///< The base
+  static constexpr auto track     = Track;   ///< The overflow-tracking mode
 
   /**
    * @brief Constructor that will perform shifting to store value appropriately (from integral
@@ -219,6 +279,9 @@ class fixed_point {
     // constructing to `Rep` that is wider than `T`
     : _value{detail::shift<Rep, Rad>(static_cast<Rep>(value), scale)}, _scale{scale}
   {
+    if constexpr (_tracks_overflow) {
+      _overflow.value = detail::shift_overflows<Rep, Rad>(static_cast<Rep>(value), scale);
+    }
   }
 
   /**
@@ -231,6 +294,20 @@ class fixed_point {
   {
   }
 
+  /**
+   * @brief Constructor from a pre-scaled integer plus an overflow flag
+   *
+   * This is intended for conversions that already computed the scaled integer
+   * representation and independently detected overflow (e.g. float <-> decimal
+   * conversion helpers).
+   */
+  CUDF_HOST_DEVICE inline explicit fixed_point(scaled_integer<Rep> s, bool overflow)
+    requires(Track == overflow_tracking::on)
+    : _value{s.value}, _scale{s.scale}
+  {
+    _overflow.value = overflow;
+  }
+
   /**
    * @brief "Scale-less" constructor that constructs `fixed_point` number with a specified
    * value and scale of zero
@@ -290,6 +367,25 @@ class fixed_point {
    */
   CUDF_HOST_DEVICE [[nodiscard]] inline scale_type scale() const { return _scale; }
 
+  /**
+   * @brief Whether fixed-point overflow was detected while producing this value
+   *
+   * Only callable when `Track == overflow_tracking::on` (e.g. `decimal*_safe` aliases).
+   * Sticky: once set, propagates through operations that combine this value with others.
+   *
+   * Note: the per-element flag is propagated by the value-level `+`, `-`, `*`, `/`,
+   * `%` and `rescaled()` operations on `fixed_point`. Aggregations that bypass the
+   * value-level operators (e.g. atomic adds on the raw integer storage in
+   * `cudf::detail::atomic_add(&target.element<DeviceTarget>(...), ...)`) do not
+   * carry the flag through; for groupby/reduce overflow detection see the
+   * `aggregation::SUM_WITH_OVERFLOW` pattern in `device_aggregators.cuh`.
+   */
+  [[nodiscard]] CUDF_HOST_DEVICE inline bool overflow_occurred() const noexcept
+    requires(Track == overflow_tracking::on)
+  {
+    return _overflow.value;
+  }
+
   /**
    * @brief Explicit conversion operator to `bool`
    *
@@ -303,13 +399,15 @@ class fixed_point {
   /**
    * @brief operator +=
    *
-   * @tparam Rep1 Representation type of the operand `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `rhs`
+   * @tparam Rep1   Representation type of the operand `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `rhs`
+   * @tparam Track1 Overflow-tracking mode of the operand `rhs` (must match this)
    * @param rhs The number being added to `this`
    * @return The sum
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1>& operator+=(fixed_point<Rep1, Rad1> const& rhs)
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1, Track1>& operator+=(
+    fixed_point<Rep1, Rad1, Track1> const& rhs)
   {
     *this = *this + rhs;
     return *this;
@@ -318,13 +416,15 @@ class fixed_point {
   /**
    * @brief operator *=
    *
-   * @tparam Rep1 Representation type of the operand `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `rhs`
+   * @tparam Rep1   Representation type of the operand `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `rhs`
+   * @tparam Track1 Overflow-tracking mode of the operand `rhs` (must match this)
    * @param rhs The number being multiplied to `this`
    * @return The product
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1>& operator*=(fixed_point<Rep1, Rad1> const& rhs)
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1, Track1>& operator*=(
+    fixed_point<Rep1, Rad1, Track1> const& rhs)
   {
     *this = *this * rhs;
     return *this;
@@ -333,13 +433,15 @@ class fixed_point {
   /**
    * @brief operator -=
    *
-   * @tparam Rep1 Representation type of the operand `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `rhs`
+   * @tparam Rep1   Representation type of the operand `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `rhs`
+   * @tparam Track1 Overflow-tracking mode of the operand `rhs` (must match this)
    * @param rhs The number being subtracted from `this`
    * @return The difference
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1>& operator-=(fixed_point<Rep1, Rad1> const& rhs)
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1, Track1>& operator-=(
+    fixed_point<Rep1, Rad1, Track1> const& rhs)
   {
     *this = *this - rhs;
     return *this;
@@ -348,13 +450,15 @@ class fixed_point {
   /**
    * @brief operator /=
    *
-   * @tparam Rep1 Representation type of the operand `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `rhs`
+   * @tparam Rep1   Representation type of the operand `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `rhs`
+   * @tparam Track1 Overflow-tracking mode of the operand `rhs` (must match this)
    * @param rhs The number being divided from `this`
    * @return The quotient
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1>& operator/=(fixed_point<Rep1, Rad1> const& rhs)
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1, Track1>& operator/=(
+    fixed_point<Rep1, Rad1, Track1> const& rhs)
   {
     *this = *this / rhs;
     return *this;
@@ -365,9 +469,9 @@ class fixed_point {
    *
    * @return The incremented result
    */
-  CUDF_HOST_DEVICE inline fixed_point<Rep, Rad>& operator++()
+  CUDF_HOST_DEVICE inline fixed_point<Rep, Rad, Track>& operator++()
   {
-    *this = *this + fixed_point<Rep, Rad>{1, scale_type{_scale}};
+    *this = *this + fixed_point<Rep, Rad, Track>{1, scale_type{_scale}};
     return *this;
   }
 
@@ -378,15 +482,16 @@ class fixed_point {
    * If `_scale`s are not equal, the number with the larger `_scale` is shifted to the
    * smaller `_scale`, and then the `_value`s are added.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return The resulting `fixed_point` sum
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1> operator+(
-    fixed_point<Rep1, Rad1> const& lhs, fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1, Track1> operator+(
+    fixed_point<Rep1, Rad1, Track1> const& lhs, fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief operator - (for subtracting two `fixed_point` numbers)
@@ -395,45 +500,48 @@ class fixed_point {
    * If `_scale`s are not equal, the number with the larger `_scale` is shifted to the
    * smaller `_scale`, and then the `_value`s are subtracted.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return The resulting `fixed_point` difference
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1> operator-(
-    fixed_point<Rep1, Rad1> const& lhs, fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1, Track1> operator-(
+    fixed_point<Rep1, Rad1, Track1> const& lhs, fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief operator * (for multiplying two `fixed_point` numbers)
    *
    * `_scale`s are added and `_value`s are multiplied.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return The resulting `fixed_point` product
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1> operator*(
-    fixed_point<Rep1, Rad1> const& lhs, fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1, Track1> operator*(
+    fixed_point<Rep1, Rad1, Track1> const& lhs, fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief operator / (for dividing two `fixed_point` numbers)
    *
    * `_scale`s are subtracted and `_value`s are divided.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return The resulting `fixed_point` quotient
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1> operator/(
-    fixed_point<Rep1, Rad1> const& lhs, fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1, Track1> operator/(
+    fixed_point<Rep1, Rad1, Track1> const& lhs, fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief operator % (for computing the modulo operation of two `fixed_point` numbers)
@@ -442,15 +550,16 @@ class fixed_point {
    * If `_scale`s are not equal, the number with larger `_scale` is shifted to the
    * smaller `_scale`, and then the modulus is computed.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return The resulting `fixed_point` number
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1> operator%(
-    fixed_point<Rep1, Rad1> const& lhs, fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend fixed_point<Rep1, Rad1, Track1> operator%(
+    fixed_point<Rep1, Rad1, Track1> const& lhs, fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief operator == (for comparing two `fixed_point` numbers)
@@ -459,15 +568,16 @@ class fixed_point {
    * If `_scale`s are not equal, the number with the larger `_scale` is shifted to the
    * smaller `_scale`, and then the `_value`s are compared.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return true if `lhs` and `rhs` are equal, false if not
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend bool operator==(fixed_point<Rep1, Rad1> const& lhs,
-                                                 fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend bool operator==(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                                 fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief operator != (for comparing two `fixed_point` numbers)
@@ -476,15 +586,16 @@ class fixed_point {
    * If `_scale`s are not equal, the number with the larger `_scale` is shifted to the
    * smaller `_scale`, and then the `_value`s are compared.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return true if `lhs` and `rhs` are not equal, false if not
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend bool operator!=(fixed_point<Rep1, Rad1> const& lhs,
-                                                 fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend bool operator!=(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                                 fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief operator <= (for comparing two `fixed_point` numbers)
@@ -493,15 +604,16 @@ class fixed_point {
    * If `_scale`s are not equal, the number with the larger `_scale` is shifted to the
    * smaller `_scale`, and then the `_value`s are compared.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return true if `lhs` less than or equal to `rhs`, false if not
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend bool operator<=(fixed_point<Rep1, Rad1> const& lhs,
-                                                 fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend bool operator<=(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                                 fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief operator >= (for comparing two `fixed_point` numbers)
@@ -510,15 +622,16 @@ class fixed_point {
    * If `_scale`s are not equal, the number with the larger `_scale` is shifted to the
    * smaller `_scale`, and then the `_value`s are compared.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return true if `lhs` greater than or equal to `rhs`, false if not
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend bool operator>=(fixed_point<Rep1, Rad1> const& lhs,
-                                                 fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend bool operator>=(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                                 fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief operator < (for comparing two `fixed_point` numbers)
@@ -527,15 +640,16 @@ class fixed_point {
    * If `_scale`s are not equal, the number with the larger `_scale` is shifted to the
    * smaller `_scale`, and then the `_value`s are compared.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return true if `lhs` less than `rhs`, false if not
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend bool operator<(fixed_point<Rep1, Rad1> const& lhs,
-                                                fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend bool operator<(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                                fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief operator > (for comparing two `fixed_point` numbers)
@@ -544,15 +658,16 @@ class fixed_point {
    * If `_scale`s are not equal, the number with the larger `_scale` is shifted to the
    * smaller `_scale`, and then the `_value`s are compared.
    *
-   * @tparam Rep1 Representation type of the operand `lhs` and `rhs`
-   * @tparam Rad1 Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Rep1   Representation type of the operand `lhs` and `rhs`
+   * @tparam Rad1   Radix (base) type of the operand `lhs` and `rhs`
+   * @tparam Track1 Overflow-tracking mode of `lhs` and `rhs`
    * @param lhs The left hand side operand
    * @param rhs The right hand side operand
    * @return true if `lhs` greater than `rhs`, false if not
    */
-  template <typename Rep1, Radix Rad1>
-  CUDF_HOST_DEVICE inline friend bool operator>(fixed_point<Rep1, Rad1> const& lhs,
-                                                fixed_point<Rep1, Rad1> const& rhs);
+  template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+  CUDF_HOST_DEVICE inline friend bool operator>(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                                fixed_point<Rep1, Rad1, Track1> const& rhs);
 
   /**
    * @brief Method for creating a `fixed_point` number with a new `scale`
@@ -563,11 +678,18 @@ class fixed_point {
    * @param scale The `scale` of the returned `fixed_point` number
    * @return `fixed_point` number with a new `scale`
    */
-  CUDF_HOST_DEVICE [[nodiscard]] inline fixed_point<Rep, Rad> rescaled(scale_type scale) const
+  CUDF_HOST_DEVICE [[nodiscard]] inline fixed_point<Rep, Rad, Track> rescaled(
+    scale_type scale) const
   {
     if (scale == _scale) { return *this; }
-    Rep const value = detail::shift<Rep, Rad>(_value, scale_type{scale - _scale});
-    return fixed_point<Rep, Rad>{scaled_integer<Rep>{value, scale}};
+    auto const scale_delta = scale_type{scale - _scale};
+    Rep const value        = detail::shift<Rep, Rad>(_value, scale_delta);
+    fixed_point<Rep, Rad, Track> result{scaled_integer<Rep>{value, scale}};
+    if constexpr (_tracks_overflow) {
+      result._overflow.value =
+        _overflow.value || detail::shift_overflows<Rep, Rad>(_value, scale_delta);
+    }
+    return result;
   }
 
   /**
@@ -654,139 +776,243 @@ CUDF_HOST_DEVICE inline auto multiplication_overflow(T lhs, T rhs)
   return rhs == -1 && lhs == min;
 }
 
-// PLUS Operation
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1> operator+(fixed_point<Rep1, Rad1> const& lhs,
-                                                          fixed_point<Rep1, Rad1> const& rhs)
+namespace detail {
+
+/**
+ * @brief Whether `shift<Rep, Rad>(val, scale)` incurs signed-integer overflow in the mul/div
+ * steps (same conditions as `multiplication_overflow` / `division_overflow` on intermediates).
+ */
+template <typename Rep, Radix Rad, typename T>
+CUDF_HOST_DEVICE inline constexpr bool shift_overflows(T const& val, scale_type const& scale)
 {
-  auto const scale = cuda::std::min(lhs._scale, rhs._scale);
-  auto const sum   = lhs.rescaled(scale)._value + rhs.rescaled(scale)._value;
+  auto const v = static_cast<Rep>(val);
+  if (scale == 0) { return false; }
+  if (scale > 0) {
+    Rep const divisor = ipow<Rep, Rad>(static_cast<int32_t>(scale));
+    return division_overflow<Rep>(v, divisor);
+  }
+  Rep const multiplier = ipow<Rep, Rad>(static_cast<int32_t>(-scale));
+  return multiplication_overflow<Rep>(v, multiplier);
+}
 
+/**
+ * @brief Run binary integer-overflow predicate once; assert under `__CUDACC_DEBUG__`.
+ *
+ * Unconditionally defined: only the call sites in `fixed_point` operator overloads
+ * decide (via `if constexpr (Track == overflow_tracking::on)` / under
+ * `__CUDACC_DEBUG__`) whether to instantiate it. Unused instantiations are free.
+ *
+ * @tparam Rep1 Representation type
+ * @tparam F Function type `bool (Rep1, Rep1)` (e.g. `&addition_overflow<Rep1, Rep1>`)
+ * @param overflow_fn Predicate on the operation's integer operands
+ * @param lhs_value Left-hand integer operand at common scale (or lhs._value for `*`/`/`)
+ * @param rhs_value Right-hand integer operand
+ * @return Predicate result for sticky `fixed_point` overflow tracking
+ */
+template <typename Rep1, typename F>
+CUDF_HOST_DEVICE inline bool fixed_point_op_overflow_check(F overflow_fn,
+                                                           Rep1 lhs_value,
+                                                           Rep1 rhs_value)
+{
+  bool const op_overflow = static_cast<bool>(overflow_fn(lhs_value, rhs_value));
 #if defined(__CUDACC_DEBUG__)
+  assert(!op_overflow && "fixed_point overflow");
+#endif
+  return op_overflow;
+}
 
-  assert(!addition_overflow<Rep1>(lhs.rescaled(scale)._value, rhs.rescaled(scale)._value) &&
-         "fixed_point overflow");
+}  // namespace detail
 
+// PLUS Operation
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1, Track1> operator+(
+  fixed_point<Rep1, Rad1, Track1> const& lhs, fixed_point<Rep1, Rad1, Track1> const& rhs)
+{
+  auto const scale = cuda::std::min(lhs._scale, rhs._scale);
+  auto const lhs_r = lhs.rescaled(scale);
+  auto const rhs_r = rhs.rescaled(scale);
+  auto const sum   = lhs_r._value + rhs_r._value;
+  auto result      = fixed_point<Rep1, Rad1, Track1>{scaled_integer<Rep1>{sum, scale}};
+
+  if constexpr (Track1 == overflow_tracking::on) {
+    bool const op_overflow = detail::fixed_point_op_overflow_check<Rep1>(
+      &addition_overflow<Rep1, Rep1>, lhs_r._value, rhs_r._value);
+    result._overflow.value = op_overflow || lhs_r._overflow.value || rhs_r._overflow.value;
+  } else {
+#if defined(__CUDACC_DEBUG__)
+    static_cast<void>(detail::fixed_point_op_overflow_check<Rep1>(
+      &addition_overflow<Rep1, Rep1>, lhs_r._value, rhs_r._value));
 #endif
-
-  return fixed_point<Rep1, Rad1>{scaled_integer<Rep1>{sum, scale}};
+  }
+  return result;
 }
 
 // MINUS Operation
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1> operator-(fixed_point<Rep1, Rad1> const& lhs,
-                                                          fixed_point<Rep1, Rad1> const& rhs)
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1, Track1> operator-(
+  fixed_point<Rep1, Rad1, Track1> const& lhs, fixed_point<Rep1, Rad1, Track1> const& rhs)
 {
   auto const scale = cuda::std::min(lhs._scale, rhs._scale);
-  auto const diff  = lhs.rescaled(scale)._value - rhs.rescaled(scale)._value;
-
+  auto const lhs_r = lhs.rescaled(scale);
+  auto const rhs_r = rhs.rescaled(scale);
+  auto const diff  = lhs_r._value - rhs_r._value;
+  auto result      = fixed_point<Rep1, Rad1, Track1>{scaled_integer<Rep1>{diff, scale}};
+
+  if constexpr (Track1 == overflow_tracking::on) {
+    bool const op_overflow = detail::fixed_point_op_overflow_check<Rep1>(
+      &subtraction_overflow<Rep1, Rep1>, lhs_r._value, rhs_r._value);
+    result._overflow.value = op_overflow || lhs_r._overflow.value || rhs_r._overflow.value;
+  } else {
 #if defined(__CUDACC_DEBUG__)
-
-  assert(!subtraction_overflow<Rep1>(lhs.rescaled(scale)._value, rhs.rescaled(scale)._value) &&
-         "fixed_point overflow");
-
+    static_cast<void>(detail::fixed_point_op_overflow_check<Rep1>(
+      &subtraction_overflow<Rep1, Rep1>, lhs_r._value, rhs_r._value));
 #endif
-
-  return fixed_point<Rep1, Rad1>{scaled_integer<Rep1>{diff, scale}};
+  }
+  return result;
 }
 
 // MULTIPLIES Operation
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1> operator*(fixed_point<Rep1, Rad1> const& lhs,
-                                                          fixed_point<Rep1, Rad1> const& rhs)
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1, Track1> operator*(
+  fixed_point<Rep1, Rad1, Track1> const& lhs, fixed_point<Rep1, Rad1, Track1> const& rhs)
 {
-#if defined(__CUDACC_DEBUG__)
-
-  assert(!multiplication_overflow<Rep1>(lhs._value, rhs._value) && "fixed_point overflow");
+  auto result = fixed_point<Rep1, Rad1, Track1>{
+    scaled_integer<Rep1>(lhs._value * rhs._value, scale_type{lhs._scale + rhs._scale})};
 
+  if constexpr (Track1 == overflow_tracking::on) {
+    bool const op_overflow = detail::fixed_point_op_overflow_check<Rep1>(
+      &multiplication_overflow<Rep1, Rep1>, lhs._value, rhs._value);
+    result._overflow.value = op_overflow || lhs._overflow.value || rhs._overflow.value;
+  } else {
+#if defined(__CUDACC_DEBUG__)
+    static_cast<void>(detail::fixed_point_op_overflow_check<Rep1>(
+      &multiplication_overflow<Rep1, Rep1>, lhs._value, rhs._value));
 #endif
-
-  return fixed_point<Rep1, Rad1>{
-    scaled_integer<Rep1>(lhs._value * rhs._value, scale_type{lhs._scale + rhs._scale})};
+  }
+  return result;
 }
 
 // DIVISION Operation
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1> operator/(fixed_point<Rep1, Rad1> const& lhs,
-                                                          fixed_point<Rep1, Rad1> const& rhs)
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1, Track1> operator/(
+  fixed_point<Rep1, Rad1, Track1> const& lhs, fixed_point<Rep1, Rad1, Track1> const& rhs)
 {
-#if defined(__CUDACC_DEBUG__)
-
-  assert(!division_overflow<Rep1>(lhs._value, rhs._value) && "fixed_point overflow");
+  auto result = fixed_point<Rep1, Rad1, Track1>{
+    scaled_integer<Rep1>(lhs._value / rhs._value, scale_type{lhs._scale - rhs._scale})};
 
+  if constexpr (Track1 == overflow_tracking::on) {
+    bool const op_overflow = detail::fixed_point_op_overflow_check<Rep1>(
+      &division_overflow<Rep1, Rep1>, lhs._value, rhs._value);
+    result._overflow.value = op_overflow || lhs._overflow.value || rhs._overflow.value;
+  } else {
+#if defined(__CUDACC_DEBUG__)
+    static_cast<void>(detail::fixed_point_op_overflow_check<Rep1>(
+      &division_overflow<Rep1, Rep1>, lhs._value, rhs._value));
 #endif
-
-  return fixed_point<Rep1, Rad1>{
-    scaled_integer<Rep1>(lhs._value / rhs._value, scale_type{lhs._scale - rhs._scale})};
+  }
+  return result;
 }
 
 // EQUALITY COMPARISON Operation
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline bool operator==(fixed_point<Rep1, Rad1> const& lhs,
-                                        fixed_point<Rep1, Rad1> const& rhs)
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline bool operator==(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                        fixed_point<Rep1, Rad1, Track1> const& rhs)
 {
   auto const scale = cuda::std::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value == rhs.rescaled(scale)._value;
 }
 
 // EQUALITY NOT COMPARISON Operation
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline bool operator!=(fixed_point<Rep1, Rad1> const& lhs,
-                                        fixed_point<Rep1, Rad1> const& rhs)
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline bool operator!=(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                        fixed_point<Rep1, Rad1, Track1> const& rhs)
 {
   auto const scale = cuda::std::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value != rhs.rescaled(scale)._value;
 }
 
 // LESS THAN OR EQUAL TO Operation
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline bool operator<=(fixed_point<Rep1, Rad1> const& lhs,
-                                        fixed_point<Rep1, Rad1> const& rhs)
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline bool operator<=(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                        fixed_point<Rep1, Rad1, Track1> const& rhs)
 {
   auto const scale = cuda::std::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value <= rhs.rescaled(scale)._value;
 }
 
 // GREATER THAN OR EQUAL TO Operation
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline bool operator>=(fixed_point<Rep1, Rad1> const& lhs,
-                                        fixed_point<Rep1, Rad1> const& rhs)
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline bool operator>=(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                        fixed_point<Rep1, Rad1, Track1> const& rhs)
 {
   auto const scale = cuda::std::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value >= rhs.rescaled(scale)._value;
 }
 
 // LESS THAN Operation
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline bool operator<(fixed_point<Rep1, Rad1> const& lhs,
-                                       fixed_point<Rep1, Rad1> const& rhs)
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline bool operator<(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                       fixed_point<Rep1, Rad1, Track1> const& rhs)
 {
   auto const scale = cuda::std::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value < rhs.rescaled(scale)._value;
 }
 
 // GREATER THAN Operation
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline bool operator>(fixed_point<Rep1, Rad1> const& lhs,
-                                       fixed_point<Rep1, Rad1> const& rhs)
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline bool operator>(fixed_point<Rep1, Rad1, Track1> const& lhs,
+                                       fixed_point<Rep1, Rad1, Track1> const& rhs)
 {
   auto const scale = cuda::std::min(lhs._scale, rhs._scale);
   return lhs.rescaled(scale)._value > rhs.rescaled(scale)._value;
 }
 
 // MODULO OPERATION
-template <typename Rep1, Radix Rad1>
-CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1> operator%(fixed_point<Rep1, Rad1> const& lhs,
-                                                          fixed_point<Rep1, Rad1> const& rhs)
+template <typename Rep1, Radix Rad1, overflow_tracking Track1>
+CUDF_HOST_DEVICE inline fixed_point<Rep1, Rad1, Track1> operator%(
+  fixed_point<Rep1, Rad1, Track1> const& lhs, fixed_point<Rep1, Rad1, Track1> const& rhs)
 {
   auto const scale     = cuda::std::min(lhs._scale, rhs._scale);
-  auto const remainder = lhs.rescaled(scale)._value % rhs.rescaled(scale)._value;
-  return fixed_point<Rep1, Rad1>{scaled_integer<Rep1>{remainder, scale}};
+  auto const lhs_r     = lhs.rescaled(scale);
+  auto const rhs_r     = rhs.rescaled(scale);
+  auto const remainder = lhs_r._value % rhs_r._value;
+  auto result          = fixed_point<Rep1, Rad1, Track1>{scaled_integer<Rep1>{remainder, scale}};
+  if constexpr (Track1 == overflow_tracking::on) {
+    result._overflow.value = lhs_r._overflow.value || rhs_r._overflow.value;
+  }
+  return result;
 }
 
 using decimal32  = fixed_point<int32_t, Radix::BASE_10>;     ///<  32-bit decimal fixed point
 using decimal64  = fixed_point<int64_t, Radix::BASE_10>;     ///<  64-bit decimal fixed point
 using decimal128 = fixed_point<__int128_t, Radix::BASE_10>;  ///< 128-bit decimal fixed point
 
+// -----------------------------------------------------------------------------
+// Overflow-tracking aliases
+// -----------------------------------------------------------------------------
+// These instantiate the same `fixed_point` class template with `Track == on`
+// and so participate in every operator overload above. They are wired into the
+// runtime type system as `type_id::DECIMAL{32,64,128}_SAFE`, which means they
+// flow through `binary_operation`, `transform`, scans and any code path that
+// dispatches via `cudf::type_dispatcher`. The on-device storage of a
+// `column<decimal*_safe>` is still the raw signed integer (see
+// `cudf::device_storage_type_t<>`); the sticky bit is purely a value-type
+// concept used inside element-wise kernels.
+//
+// Aggregations whose update step bypasses the value-level operators (e.g.
+// atomic adds on the raw integer storage in
+// `cudf::detail::atomic_add(&target.element<DeviceTarget>(...), ...)`) will
+// **not** carry the sticky bit through. Use the existing
+// `aggregation::SUM_WITH_OVERFLOW` pattern in
+// `cpp/include/cudf/detail/aggregation/device_aggregators.cuh` for groupby and
+// reduce overflow detection.
+
+/// 32-bit decimal fixed point with sticky overflow tracking
+using decimal32_safe = fixed_point<int32_t, Radix::BASE_10, overflow_tracking::on>;
+/// 64-bit decimal fixed point with sticky overflow tracking
+using decimal64_safe = fixed_point<int64_t, Radix::BASE_10, overflow_tracking::on>;
+/// 128-bit decimal fixed point with sticky overflow tracking
+using decimal128_safe = fixed_point<__int128_t, Radix::BASE_10, overflow_tracking::on>;
+
 /** @} */  // end of group
 }  // namespace CUDF_EXPORT numeric
diff --git a/cpp/include/cudf/types.hpp b/cpp/include/cudf/types.hpp
index 659a7779601..3eae70c6f06 100644
--- a/cpp/include/cudf/types.hpp
+++ b/cpp/include/cudf/types.hpp
@@ -219,6 +219,9 @@ enum class type_id : int32_t {
   DECIMAL64,               ///< Fixed-point type with int64_t
   DECIMAL128,              ///< Fixed-point type with __int128_t
   STRUCT,                  ///< Struct elements
+  DECIMAL32_SAFE,   ///< Fixed-point type with int32_t and sticky overflow tracking
+  DECIMAL64_SAFE,   ///< Fixed-point type with int64_t and sticky overflow tracking
+  DECIMAL128_SAFE,  ///< Fixed-point type with __int128_t and sticky overflow tracking
   // `NUM_TYPE_IDS` must be last!
   NUM_TYPE_IDS  ///< Total number of type ids
 };
@@ -310,7 +313,9 @@ class data_type {
    */
   explicit data_type(type_id id, int32_t scale) : _id{id}, _fixed_point_scale{scale}
   {
-    assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128);
+    assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128 ||
+           id == type_id::DECIMAL32_SAFE || id == type_id::DECIMAL64_SAFE ||
+           id == type_id::DECIMAL128_SAFE);
   }
 
   /**
diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp
index 39d8b5a615e..ea69d1ec63b 100644
--- a/cpp/include/cudf/utilities/traits.hpp
+++ b/cpp/include/cudf/utilities/traits.hpp
@@ -441,9 +441,21 @@ CUDF_HOST_DEVICE constexpr inline bool is_fixed_point()
   return cuda::std::is_same_v<numeric::decimal32, T> ||
          cuda::std::is_same_v<numeric::decimal64, T> ||
          cuda::std::is_same_v<numeric::decimal128, T> ||
+         cuda::std::is_same_v<numeric::decimal32_safe, T> ||
+         cuda::std::is_same_v<numeric::decimal64_safe, T> ||
+         cuda::std::is_same_v<numeric::decimal128_safe, T> ||
          cuda::std::is_same_v<numeric::fixed_point<int32_t, numeric::Radix::BASE_2>, T> ||
          cuda::std::is_same_v<numeric::fixed_point<int64_t, numeric::Radix::BASE_2>, T> ||
-         cuda::std::is_same_v<numeric::fixed_point<__int128_t, numeric::Radix::BASE_2>, T>;
+         cuda::std::is_same_v<numeric::fixed_point<__int128_t, numeric::Radix::BASE_2>, T> ||
+         cuda::std::is_same_v<
+           numeric::fixed_point<int32_t, numeric::Radix::BASE_2, numeric::overflow_tracking::on>,
+           T> ||
+         cuda::std::is_same_v<
+           numeric::fixed_point<int64_t, numeric::Radix::BASE_2, numeric::overflow_tracking::on>,
+           T> ||
+         cuda::std::is_same_v<
+           numeric::fixed_point<__int128_t, numeric::Radix::BASE_2, numeric::overflow_tracking::on>,
+           T>;
 }
 
 /**
diff --git a/cpp/include/cudf/utilities/type_dispatcher.hpp b/cpp/include/cudf/utilities/type_dispatcher.hpp
index 24e41ba6fc4..2c782b12d74 100644
--- a/cpp/include/cudf/utilities/type_dispatcher.hpp
+++ b/cpp/include/cudf/utilities/type_dispatcher.hpp
@@ -97,9 +97,12 @@ using id_to_type = typename id_to_type_impl<Id>::type;
 // clang-format off
 template <typename T>
 using device_storage_type_t =
-  std::conditional_t<std::is_same_v<numeric::decimal32,  T>, int32_t,
-  std::conditional_t<std::is_same_v<numeric::decimal64,  T>, int64_t,
-  std::conditional_t<std::is_same_v<numeric::decimal128, T>, __int128_t, T>>>;
+  std::conditional_t<std::is_same_v<numeric::decimal32,       T>, int32_t,
+  std::conditional_t<std::is_same_v<numeric::decimal64,       T>, int64_t,
+  std::conditional_t<std::is_same_v<numeric::decimal128,      T>, __int128_t,
+  std::conditional_t<std::is_same_v<numeric::decimal32_safe,  T>, int32_t,
+  std::conditional_t<std::is_same_v<numeric::decimal64_safe,  T>, int64_t,
+  std::conditional_t<std::is_same_v<numeric::decimal128_safe, T>, __int128_t, T>>>>>>;
 // clang-format on
 
 /**
@@ -176,6 +179,9 @@ CUDF_TYPE_MAPPING(cudf::list_view, type_id::LIST)
 CUDF_TYPE_MAPPING(numeric::decimal32, type_id::DECIMAL32)
 CUDF_TYPE_MAPPING(numeric::decimal64, type_id::DECIMAL64)
 CUDF_TYPE_MAPPING(numeric::decimal128, type_id::DECIMAL128)
+CUDF_TYPE_MAPPING(numeric::decimal32_safe, type_id::DECIMAL32_SAFE)
+CUDF_TYPE_MAPPING(numeric::decimal64_safe, type_id::DECIMAL64_SAFE)
+CUDF_TYPE_MAPPING(numeric::decimal128_safe, type_id::DECIMAL128_SAFE)
 CUDF_TYPE_MAPPING(cudf::struct_view, type_id::STRUCT)
 
 /**
@@ -206,7 +212,11 @@ constexpr bool type_id_matches_device_storage_type(type_id id)
 {
   return (id == type_id::DECIMAL32 && std::is_same_v<T, int32_t>) ||
          (id == type_id::DECIMAL64 && std::is_same_v<T, int64_t>) ||
-         (id == type_id::DECIMAL128 && std::is_same_v<T, __int128_t>) || id == type_to_id<T>();
+         (id == type_id::DECIMAL128 && std::is_same_v<T, __int128_t>) ||
+         (id == type_id::DECIMAL32_SAFE && std::is_same_v<T, int32_t>) ||
+         (id == type_id::DECIMAL64_SAFE && std::is_same_v<T, int64_t>) ||
+         (id == type_id::DECIMAL128_SAFE && std::is_same_v<T, __int128_t>) ||
+         id == type_to_id<T>();
 }
 
 /**
@@ -285,6 +295,27 @@ struct type_to_scalar_type_impl<numeric::decimal128> {
   using ScalarDeviceType = cudf::fixed_point_scalar_device_view<numeric::decimal128>;
 };
 
+// Scalar specializations for the overflow-tracking decimal aliases. The scalar
+// storage shares the underlying integer representation; the wrapper only adds
+// the sticky overflow bit at the value-type layer.
+template <>
+struct type_to_scalar_type_impl<numeric::decimal32_safe> {
+  using ScalarType       = cudf::fixed_point_scalar<numeric::decimal32_safe>;
+  using ScalarDeviceType = cudf::fixed_point_scalar_device_view<numeric::decimal32_safe>;
+};
+
+template <>
+struct type_to_scalar_type_impl<numeric::decimal64_safe> {
+  using ScalarType       = cudf::fixed_point_scalar<numeric::decimal64_safe>;
+  using ScalarDeviceType = cudf::fixed_point_scalar_device_view<numeric::decimal64_safe>;
+};
+
+template <>
+struct type_to_scalar_type_impl<numeric::decimal128_safe> {
+  using ScalarType       = cudf::fixed_point_scalar<numeric::decimal128_safe>;
+  using ScalarDeviceType = cudf::fixed_point_scalar_device_view<numeric::decimal128_safe>;
+};
+
 template <>  // TODO: this is a temporary solution for make_pair_iterator
 struct type_to_scalar_type_impl<cudf::dictionary32> {
   using ScalarType       = cudf::numeric_scalar<int32_t>;
@@ -548,6 +579,15 @@ CUDF_HOST_DEVICE __forceinline__ constexpr decltype(auto) type_dispatcher(cudf::
     case type_id::DECIMAL128:
       return f.template operator()<typename IdTypeMap<type_id::DECIMAL128>::type>(
         std::forward<Ts>(args)...);
+    case type_id::DECIMAL32_SAFE:
+      return f.template operator()<typename IdTypeMap<type_id::DECIMAL32_SAFE>::type>(
+        std::forward<Ts>(args)...);
+    case type_id::DECIMAL64_SAFE:
+      return f.template operator()<typename IdTypeMap<type_id::DECIMAL64_SAFE>::type>(
+        std::forward<Ts>(args)...);
+    case type_id::DECIMAL128_SAFE:
+      return f.template operator()<typename IdTypeMap<type_id::DECIMAL128_SAFE>::type>(
+        std::forward<Ts>(args)...);
     case type_id::STRUCT:
       return f.template operator()<typename IdTypeMap<type_id::STRUCT>::type>(
         std::forward<Ts>(args)...);
diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp
index 89a15566124..2d8f0135349 100644
--- a/cpp/src/scalar/scalar.cpp
+++ b/cpp/src/scalar/scalar.cpp
@@ -193,6 +193,9 @@ typename fixed_point_scalar<T>::rep_type const* fixed_point_scalar<T>::data() co
 template class fixed_point_scalar<numeric::decimal32>;
 template class fixed_point_scalar<numeric::decimal64>;
 template class fixed_point_scalar<numeric::decimal128>;
+template class fixed_point_scalar<numeric::decimal32_safe>;
+template class fixed_point_scalar<numeric::decimal64_safe>;
+template class fixed_point_scalar<numeric::decimal128_safe>;
 
 namespace CUDF_HIDDEN detail {
 
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 68cde65c57b..3875c100c63 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -258,6 +258,7 @@ ConfigureTest(CLAMP_TEST replace/clamp_test.cpp)
 # ##################################################################################################
 # * fixed_point tests -----------------------------------------------------------------------------
 ConfigureTest(FIXED_POINT_TEST fixed_point/fixed_point_tests.cpp fixed_point/fixed_point_tests.cu)
+ConfigureTest(FIXED_POINT_OVERFLOW_TEST fixed_point/fixed_point_overflow_tests.cpp)
 
 # ##################################################################################################
 # * unary tests -----------------------------------------------------------------------------------
diff --git a/cpp/tests/fixed_point/fixed_point_overflow_tests.cpp b/cpp/tests/fixed_point/fixed_point_overflow_tests.cpp
new file mode 100644
index 00000000000..31779a341c6
--- /dev/null
+++ b/cpp/tests/fixed_point/fixed_point_overflow_tests.cpp
@@ -0,0 +1,284 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/testing_main.hpp>
+
+#include <cudf/binaryop.hpp>
+#include <cudf/fixed_point/conv.hpp>
+#include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/utilities/traits.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <limits>
+#include <type_traits>
+
+using namespace numeric;
+
+struct FixedPointOverflowTest : public cudf::test::BaseFixture {};
+
+// ---------------------------------------------------------------------------
+// Layout / ABI: enabling tracking must not perturb the non-tracking layout.
+// ---------------------------------------------------------------------------
+
+// Reference layouts for the historical (non-tracking) `fixed_point` storage. The
+// safe-variant must collapse the `[[no_unique_address]]` member to zero bytes so
+// these match exactly.
+namespace {
+struct ref_layout32 {
+  int32_t v;
+  scale_type s;
+};
+struct ref_layout64 {
+  int64_t v;
+  scale_type s;
+};
+struct ref_layout128 {
+  __int128_t v;
+  scale_type s;
+};
+}  // namespace
+
+TEST_F(FixedPointOverflowTest, NonTrackingLayoutUnchanged)
+{
+  // The whole point of `[[no_unique_address]]` + the empty `_no_overflow_flag`
+  // helper is that the historical decimal{32,64,128} layout is preserved.
+  static_assert(sizeof(decimal32) == sizeof(ref_layout32));
+  static_assert(sizeof(decimal64) == sizeof(ref_layout64));
+  static_assert(sizeof(decimal128) == sizeof(ref_layout128));
+
+  // The tracking variants intentionally carry an extra bool, so they are
+  // strictly larger than (or equal to) their non-tracking counterparts.
+  static_assert(sizeof(decimal32_safe) >= sizeof(decimal32));
+  static_assert(sizeof(decimal64_safe) >= sizeof(decimal64));
+  static_assert(sizeof(decimal128_safe) >= sizeof(decimal128));
+}
+
+// ---------------------------------------------------------------------------
+// `overflow_occurred()` is only callable on tracking variants.
+// ---------------------------------------------------------------------------
+
+template <typename T, typename = void>
+struct has_overflow_occurred : std::false_type {};
+
+template <typename T>
+struct has_overflow_occurred<T, std::void_t<decltype(std::declval<T const&>().overflow_occurred())>>
+  : std::true_type {};
+
+TEST_F(FixedPointOverflowTest, OverflowOccurredOnlyOnTrackingTypes)
+{
+  static_assert(!has_overflow_occurred<decimal32>::value);
+  static_assert(!has_overflow_occurred<decimal64>::value);
+  static_assert(!has_overflow_occurred<decimal128>::value);
+
+  static_assert(has_overflow_occurred<decimal32_safe>::value);
+  static_assert(has_overflow_occurred<decimal64_safe>::value);
+  static_assert(has_overflow_occurred<decimal128_safe>::value);
+}
+
+// ---------------------------------------------------------------------------
+// `is_fixed_point<T>()` recognizes the new aliases.
+// ---------------------------------------------------------------------------
+
+TEST_F(FixedPointOverflowTest, IsFixedPointRecognizesSafeAliases)
+{
+  static_assert(cudf::is_fixed_point<decimal32_safe>());
+  static_assert(cudf::is_fixed_point<decimal64_safe>());
+  static_assert(cudf::is_fixed_point<decimal128_safe>());
+}
+
+// ---------------------------------------------------------------------------
+// type_id / type_dispatcher round-trip for the new aliases.
+// ---------------------------------------------------------------------------
+
+TEST_F(FixedPointOverflowTest, TypeIdMappingRoundTrip)
+{
+  EXPECT_EQ(cudf::type_id::DECIMAL32_SAFE, cudf::type_to_id<decimal32_safe>());
+  EXPECT_EQ(cudf::type_id::DECIMAL64_SAFE, cudf::type_to_id<decimal64_safe>());
+  EXPECT_EQ(cudf::type_id::DECIMAL128_SAFE, cudf::type_to_id<decimal128_safe>());
+
+  using safe32  = cudf::id_to_type<cudf::type_id::DECIMAL32_SAFE>;
+  using safe64  = cudf::id_to_type<cudf::type_id::DECIMAL64_SAFE>;
+  using safe128 = cudf::id_to_type<cudf::type_id::DECIMAL128_SAFE>;
+  static_assert(std::is_same_v<safe32, decimal32_safe>);
+  static_assert(std::is_same_v<safe64, decimal64_safe>);
+  static_assert(std::is_same_v<safe128, decimal128_safe>);
+
+  // The on-device storage type is still the raw integer; the safe wrapper is a
+  // value-type concept only. This is required so a column-of-decimal*_safe
+  // remains a regular int{32,64,128} column.
+  static_assert(std::is_same_v<cudf::device_storage_type_t<decimal32_safe>, int32_t>);
+  static_assert(std::is_same_v<cudf::device_storage_type_t<decimal64_safe>, int64_t>);
+  static_assert(std::is_same_v<cudf::device_storage_type_t<decimal128_safe>, __int128_t>);
+}
+
+// ---------------------------------------------------------------------------
+// Sticky-flag propagation through the value-level operators.
+// ---------------------------------------------------------------------------
+
+TEST_F(FixedPointOverflowTest, AdditionTracksOverflow)
+{
+  auto constexpr near_max = std::numeric_limits<int64_t>::max() - 100;
+  decimal64_safe const a{scaled_integer<int64_t>{near_max, scale_type{0}}};
+  decimal64_safe const b{scaled_integer<int64_t>{200, scale_type{0}}};
+
+  auto const safe_sum = decimal64_safe{scaled_integer<int64_t>{1, scale_type{0}}} +
+                        decimal64_safe{scaled_integer<int64_t>{2, scale_type{0}}};
+  EXPECT_FALSE(safe_sum.overflow_occurred());
+
+  auto const overflow_sum = a + b;
+  EXPECT_TRUE(overflow_sum.overflow_occurred());
+}
+
+TEST_F(FixedPointOverflowTest, SubtractionTracksOverflow)
+{
+  auto constexpr near_min = std::numeric_limits<int64_t>::min() + 100;
+  decimal64_safe const a{scaled_integer<int64_t>{near_min, scale_type{0}}};
+  decimal64_safe const b{scaled_integer<int64_t>{200, scale_type{0}}};
+
+  auto const overflow_diff = a - b;
+  EXPECT_TRUE(overflow_diff.overflow_occurred());
+}
+
+TEST_F(FixedPointOverflowTest, MultiplicationTracksOverflow)
+{
+  decimal64_safe const a{scaled_integer<int64_t>{1'000'000'000'000LL, scale_type{0}}};
+  decimal64_safe const b{scaled_integer<int64_t>{1'000'000'000'000LL, scale_type{0}}};
+  auto const overflow_prod = a * b;
+  EXPECT_TRUE(overflow_prod.overflow_occurred());
+
+  decimal64_safe const c{scaled_integer<int64_t>{2, scale_type{0}}};
+  decimal64_safe const d{scaled_integer<int64_t>{3, scale_type{0}}};
+  auto const safe_prod = c * d;
+  EXPECT_FALSE(safe_prod.overflow_occurred());
+}
+
+TEST_F(FixedPointOverflowTest, DivisionTracksOverflow)
+{
+  // INT64_MIN / -1 is the canonical signed-integer division overflow.
+  decimal64_safe const a{
+    scaled_integer<int64_t>{std::numeric_limits<int64_t>::min(), scale_type{0}}};
+  decimal64_safe const b{scaled_integer<int64_t>{-1, scale_type{0}}};
+  auto const overflow_quot = a / b;
+  EXPECT_TRUE(overflow_quot.overflow_occurred());
+}
+
+TEST_F(FixedPointOverflowTest, FlagIsSticky)
+{
+  // Once any operand has its overflow flag set, the flag must remain set across
+  // a chain of subsequent operations, even if no individual op itself overflows.
+  decimal64_safe const a{scaled_integer<int64_t>{1'000'000'000'000LL, scale_type{0}}};
+  decimal64_safe const b{scaled_integer<int64_t>{1'000'000'000'000LL, scale_type{0}}};
+
+  auto const tainted    = a * b;  // overflow here
+  decimal64_safe const c{scaled_integer<int64_t>{0, scale_type{0}}};
+  auto const propagated = tainted + c;  // simple add, but tainted carries the flag
+  EXPECT_TRUE(propagated.overflow_occurred());
+
+  auto const propagated_again = propagated - c;
+  EXPECT_TRUE(propagated_again.overflow_occurred());
+}
+
+TEST_F(FixedPointOverflowTest, RescaledShiftOverflowSetsFlag)
+{
+  decimal64_safe const a{
+    scaled_integer<int64_t>{std::numeric_limits<int64_t>::max() / 2, scale_type{0}}};
+  // Rescaling to a sufficiently negative scale multiplies by a power of 10 and
+  // overflows.
+  auto const rescaled = a.rescaled(scale_type{-3});
+  EXPECT_TRUE(rescaled.overflow_occurred());
+
+  // A no-op rescale must not falsely set the flag.
+  auto const noop = a.rescaled(scale_type{0});
+  EXPECT_FALSE(noop.overflow_occurred());
+}
+
+TEST_F(FixedPointOverflowTest, ConstructorShiftOverflowSetsFlag)
+{
+  // Constructing with a scale that would shift the input out of `Rep` range
+  // sets the sticky flag from the very first operation.
+  decimal64_safe const overflowed{std::numeric_limits<int64_t>::max() / 2, scale_type{-3}};
+  EXPECT_TRUE(overflowed.overflow_occurred());
+
+  decimal64_safe const fine{int64_t{42}, scale_type{0}};
+  EXPECT_FALSE(fine.overflow_occurred());
+}
+
+// ---------------------------------------------------------------------------
+// Floating <-> fixed conversions: `decimal*_safe` must surface overflow.
+// ---------------------------------------------------------------------------
+
+TEST_F(FixedPointOverflowTest, ConvertFloatingToDecimal32SafeDetectsPositiveOverflow)
+{
+  // Choose a value that cannot fit into int32 at scale 0.
+  auto const d = cudf::convert_floating_to_fixed<decimal32_safe>(1e20, scale_type{0});
+  EXPECT_TRUE(d.overflow_occurred());
+  EXPECT_EQ(d.value(), std::numeric_limits<int32_t>::max());
+}
+
+TEST_F(FixedPointOverflowTest, ConvertFloatingToDecimal32SafeDetectsNegativeOverflow)
+{
+  auto const d = cudf::convert_floating_to_fixed<decimal32_safe>(-1e20, scale_type{0});
+  EXPECT_TRUE(d.overflow_occurred());
+  EXPECT_EQ(d.value(), std::numeric_limits<int32_t>::min());
+}
+
+TEST_F(FixedPointOverflowTest, ConvertFloatingToDecimal64SafeDetectsPositiveOverflowViaScale)
+{
+  // Overflow via scale factor multiplication even for a "moderate" input.
+  // scale -19 implies multiplying by 10^19 in the decimal rep.
+  auto const d = cudf::convert_floating_to_fixed<decimal64_safe>(1.0, scale_type{-19});
+  EXPECT_TRUE(d.overflow_occurred());
+  EXPECT_EQ(d.value(), std::numeric_limits<int64_t>::max());
+}
+
+TEST_F(FixedPointOverflowTest, ConvertFloatingToDecimal64SafeNoOverflow)
+{
+  auto const d = cudf::convert_floating_to_fixed<decimal64_safe>(123.456, scale_type{-3});
+  EXPECT_FALSE(d.overflow_occurred());
+  EXPECT_EQ(d.value(), int64_t{123456});
+}
+
+// ---------------------------------------------------------------------------
+// Mixed-Track operations should NOT compile. Verified at compile time below
+// via SFINAE; if the static_assert chain is wrong the test file itself fails
+// to build (which is the desired behavior).
+// ---------------------------------------------------------------------------
+
+template <typename A, typename B, typename = void>
+struct addable : std::false_type {};
+
+template <typename A, typename B>
+struct addable<A, B, std::void_t<decltype(std::declval<A>() + std::declval<B>())>>
+  : std::true_type {};
+
+static_assert(addable<decimal64, decimal64>::value);
+static_assert(addable<decimal64_safe, decimal64_safe>::value);
+static_assert(!addable<decimal64, decimal64_safe>::value,
+              "Mixed-Track addition must not be allowed; choose a single tracking mode.");
+
+// ---------------------------------------------------------------------------
+// Column-level smoke test: a `DECIMAL64_SAFE` column should round-trip through
+// a `cudf::binary_operation` that internally dispatches to `decimal64_safe`'s
+// element type. The on-device storage is still int64, so the per-element
+// sticky bit is not preserved in the column (see the docstring on
+// `overflow_occurred()`); this test only validates wiring.
+// ---------------------------------------------------------------------------
+
+TEST_F(FixedPointOverflowTest, BinaryOpOnSafeColumnTypeIdRoundTrip)
+{
+  using fp_wrapper = cudf::test::fixed_point_column_wrapper<int64_t>;
+
+  auto const lhs = fp_wrapper{{10, 20, 30}, scale_type{0}};
+  auto const rhs = fp_wrapper{{1, 2, 3}, scale_type{0}};
+
+  // The column wrapper produces DECIMAL64 by default; assert the dispatcher
+  // accepts the matching SAFE type id when explicitly requested for the result.
+  auto const safe_type = cudf::data_type{cudf::type_id::DECIMAL64_SAFE, 0};
+  EXPECT_TRUE(cudf::is_fixed_point(safe_type));
+  EXPECT_EQ(cudf::type_id::DECIMAL64_SAFE, safe_type.id());
+}
diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu
index a3a86d2b119..db38de76653 100644
--- a/cpp/tests/utilities/column_utilities.cu
+++ b/cpp/tests/utilities/column_utilities.cu
@@ -951,6 +951,12 @@ template std::pair<thrust::host_vector<numeric::decimal64>, std::vector<bitmask_
   column_view c);
 template std::pair<thrust::host_vector<numeric::decimal128>, std::vector<bitmask_type>> to_host(
   column_view c);
+template std::pair<thrust::host_vector<numeric::decimal32_safe>, std::vector<bitmask_type>>
+to_host(column_view c);
+template std::pair<thrust::host_vector<numeric::decimal64_safe>, std::vector<bitmask_type>>
+to_host(column_view c);
+template std::pair<thrust::host_vector<numeric::decimal128_safe>, std::vector<bitmask_type>>
+to_host(column_view c);
 
 namespace {
 struct strings_to_host_fn {