From e14b54db474be8a411075c9064bbe9d1b0d2132b Mon Sep 17 00:00:00 2001
From: Michael Tautschnig <tautschn@amazon.com>
Date: Mon, 27 Apr 2026 19:11:24 +0000
Subject: [PATCH] Fix float-to-integer conversion for wide destination types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

float_bvt::to_integer did not extend the fraction to dest_width
before shifting, causing incorrect results when dest_width exceeds
the fraction width (the shift distance went negative for large
exponents). The fix mirrors the extend-then-shift pattern already
used by float_utilst::to_integer.

The PR carries over a latent constraint shared between both encodings:
the shift distance is built in a bit-vector of width spec.e, so
`dest_width - 1` must be representable in that signed/unsigned
bit width. For the integer types C currently supports this holds —
float (spec.e = 8) supports dest_width up to 128 (i.e. up to
__int128); double (spec.e = 11) supports dest_width up to 1024.
That assumption is now an explicit PRECONDITION on both
float_bvt::to_integer and float_utilst::to_integer, with matching
'keep in sync' comments. Widening the subtraction type to support
even wider integer destinations (and pulling the integer-conversion
logic out into a single helper shared between the two IR layers)
is a sensible follow-up.

Regression test exercises the bug across the parameter space: int
(at and above 2^24), unsigned int (including a 2^31 case where
unsigned-vs-signed semantics matter), long long from float and
double sources (the 64-bit destination cases that were the most
common practical breakage), and __int128 from both float and
double sources where supported. Without the fix, eight of these
eleven assertions fail on develop.

Co-authored-by: Kiro <kiro-agent@users.noreply.github.com>
---
 regression/cbmc/Float-to-int-wide/main.c      | 98 +++++++++++++++++++
 regression/cbmc/Float-to-int-wide/test.desc   | 10 ++
 .../cbmc/Float-to-int-wide/test_smt.desc      | 10 ++
 src/solvers/floatbv/float_bv.cpp              | 36 ++++++-
 src/solvers/floatbv/float_utils.cpp           | 16 +++
 5 files changed, 168 insertions(+), 2 deletions(-)
 create mode 100644 regression/cbmc/Float-to-int-wide/main.c
 create mode 100644 regression/cbmc/Float-to-int-wide/test.desc
 create mode 100644 regression/cbmc/Float-to-int-wide/test_smt.desc

diff --git a/regression/cbmc/Float-to-int-wide/main.c b/regression/cbmc/Float-to-int-wide/main.c
new file mode 100644
index 00000000000..58781cc7244
--- /dev/null
+++ b/regression/cbmc/Float-to-int-wide/main.c
@@ -0,0 +1,98 @@
+// Test float-to-integer (and double-to-integer) conversion for values
+// that need more bits than the source's fraction width.
+//
+// Bug: float_bvt::to_integer did not extend the fraction to the
+// destination width before shifting, so for any integer destination
+// wider than the source's fraction the shift distance went negative
+// for sufficiently large exponents and the conversion produced
+// undefined results. float (24 fraction bits incl. hidden) was
+// affected for any 32-bit-or-wider integer destination; double (53
+// fraction bits) was affected for any 64-bit-or-wider integer
+// destination.
+
+#include <assert.h>
+
+extern int __CPROVER_rounding_mode;
+
+int main()
+{
+  // Test 1: large positive float -> int.
+  float a;
+  __CPROVER_assume(a == 1000000.0f);
+  __CPROVER_rounding_mode = 3; // ROUND_TO_ZERO (required for C semantics)
+  int r1 = (int)a;
+  assert(r1 == 1000000);
+
+  // Test 2: value at fraction-width boundary (2^24, exactly representable).
+  float b;
+  __CPROVER_assume(b == 16777216.0f);
+  int r2 = (int)b;
+  assert(r2 == 16777216);
+
+  // Test 3: value near INT_MAX, well past the fraction width.
+  float c;
+  __CPROVER_assume(c == 2.0e9f);
+  int r3 = (int)c;
+  assert(r3 == 2000000000);
+
+  // Test 4: negative large value.
+  float d;
+  __CPROVER_assume(d == -1000000.0f);
+  int r4 = (int)d;
+  assert(r4 == -1000000);
+
+  // Test 5: float -> unsigned int, exact value with exponent > fraction
+  // width.
+  float e1;
+  __CPROVER_assume(e1 == 16777216.0f); // 2^24, exactly representable
+  unsigned int r5 = (unsigned int)e1;
+  assert(r5 == 16777216u);
+
+  // Test 6: float -> unsigned int, value at 2^31 (exactly representable
+  // as float, above INT_MAX so the unsigned-vs-signed branch matters).
+  float e2;
+  __CPROVER_assume(e2 == (float)(1U << 31));
+  unsigned int r6 = (unsigned int)e2;
+  assert(r6 == (1U << 31));
+
+  // Test 7: float -> long long. dest_width = 64, fraction_width = 24,
+  // so the fix's extension path is exercised.
+  float ll1;
+  __CPROVER_assume(ll1 == (float)(1LL << 30));
+  long long r7 = (long long)ll1;
+  assert(r7 == (1LL << 30));
+
+  // Test 8: double -> long long. dest_width = 64, fraction_width = 53,
+  // exercises a different effective_width than tests 7.
+  double d1;
+  __CPROVER_assume(d1 == (double)(1LL << 40));
+  long long r8 = (long long)d1;
+  assert(r8 == (1LL << 40));
+
+  // Test 9: double -> long long, value above 2^53 where double can no
+  // longer represent every integer; check an exactly-representable one.
+  double d2;
+  __CPROVER_assume(d2 == (double)(1LL << 60));
+  long long r9 = (long long)d2;
+  assert(r9 == (1LL << 60));
+
+#if defined(__SIZEOF_INT128__)
+  // Test 10: float -> __int128. dest_width = 128 sits at the documented
+  // boundary `(1 << (spec.e - 1)) = 128` for single-precision sources
+  // (PRECONDITION in float_bvt::to_integer / float_utilst::to_integer),
+  // so this exercises the widest currently-supported destination.
+  float fi128;
+  __CPROVER_assume(fi128 == (float)(1LL << 30));
+  __int128 r10 = (__int128)fi128;
+  assert(r10 == (__int128)(1LL << 30));
+
+  // Test 11: double -> __int128. dest_width = 128, well within the
+  // double precondition boundary `(1 << 10) = 1024`.
+  double di128;
+  __CPROVER_assume(di128 == (double)(1LL << 60));
+  __int128 r11 = (__int128)di128;
+  assert(r11 == (__int128)(1LL << 60));
+#endif
+
+  return 0;
+}
diff --git a/regression/cbmc/Float-to-int-wide/test.desc b/regression/cbmc/Float-to-int-wide/test.desc
new file mode 100644
index 00000000000..dd1abf1f8c3
--- /dev/null
+++ b/regression/cbmc/Float-to-int-wide/test.desc
@@ -0,0 +1,10 @@
+CORE no-new-smt
+main.c
+--floatbv
+^EXIT=0$
+^SIGNAL=0$
+^VERIFICATION SUCCESSFUL$
+--
+^warning: ignoring
+--
+Float-to-integer conversion for values needing more bits than the fraction.
diff --git a/regression/cbmc/Float-to-int-wide/test_smt.desc b/regression/cbmc/Float-to-int-wide/test_smt.desc
new file mode 100644
index 00000000000..d1737879040
--- /dev/null
+++ b/regression/cbmc/Float-to-int-wide/test_smt.desc
@@ -0,0 +1,10 @@
+CORE smt-backend no-new-smt
+main.c
+--z3
+^EXIT=0$
+^SIGNAL=0$
+^VERIFICATION SUCCESSFUL$
+--
+^warning: ignoring
+--
+Float-to-integer conversion for values needing more bits than the fraction (SMT path).
diff --git a/src/solvers/floatbv/float_bv.cpp b/src/solvers/floatbv/float_bv.cpp
index ee657069fe6..713d29a3384 100644
--- a/src/solvers/floatbv/float_bv.cpp
+++ b/src/solvers/floatbv/float_bv.cpp
@@ -381,6 +381,21 @@ exprt float_bvt::to_integer(
   const exprt &rm,
   const ieee_float_spect &spec)
 {
+  // Keep in sync with float_utilst::to_integer — the body below is a
+  // line-for-line translation of that function between the exprt and
+  // literalt/bvt APIs.
+  //
+  // The shift distance is built in `signedbv_typet(spec.e)`, so
+  // `effective_width - 1` (which equals `dest_width - 1` when
+  // `dest_width > fraction_width`) must be representable in that
+  // signed bit width. For all C integer types currently supported
+  // this holds: float (spec.e = 8) supports `dest_width <= 128` (i.e.
+  // up to `__int128`); double (spec.e = 11) supports `dest_width <=
+  // 1024`. If support for wider integer destinations is ever added,
+  // the subtraction type would need to be widened — see the matching
+  // PRECONDITION in float_utilst::to_integer.
+  PRECONDITION(dest_width <= (std::size_t{1} << (spec.e - 1)));
+
   const unbiased_floatt unpacked=unpack(src, spec);
 
   rounding_mode_bitst rounding_mode_bits(rm);
@@ -388,10 +403,27 @@ exprt float_bvt::to_integer(
   // Right now hard-wired to round-to-zero, which is
   // the usual case in ANSI-C.
 
+  const std::size_t fraction_width =
+    to_unsignedbv_type(unpacked.fraction.type()).get_width();
+
+  // Extend the fraction to dest_width if needed, padding with zeros
+  // at the LSB end (like float_utilst does).
+  exprt fraction = unpacked.fraction;
+  std::size_t effective_width = fraction_width;
+
+  if(dest_width > fraction_width)
+  {
+    effective_width = dest_width;
+    fraction = concatenation_exprt(
+      fraction,
+      from_integer(0, unsignedbv_typet(dest_width - fraction_width)),
+      unsignedbv_typet(effective_width));
+  }
+
   // if the exponent is positive, shift right
-  exprt offset=from_integer(spec.f, signedbv_typet(spec.e));
+  exprt offset = from_integer(effective_width - 1, signedbv_typet(spec.e));
   const minus_exprt distance(offset, unpacked.exponent);
-  const lshr_exprt shift_result(unpacked.fraction, distance);
+  const lshr_exprt shift_result(fraction, distance);
 
   // if the exponent is negative, we have zero anyways
   exprt result=shift_result;
diff --git a/src/solvers/floatbv/float_utils.cpp b/src/solvers/floatbv/float_utils.cpp
index ba72bb09b21..8112cbaf29c 100644
--- a/src/solvers/floatbv/float_utils.cpp
+++ b/src/solvers/floatbv/float_utils.cpp
@@ -91,6 +91,22 @@ bvt float_utilst::to_integer(
   // The following is the usual case in ANSI-C, and we optimize for that.
   PRECONDITION(rounding_mode_bits.round_to_zero.is_true());
 
+  // Keep in sync with float_bvt::to_integer — the body below is a
+  // line-for-line translation of that function between the
+  // literalt/bvt and exprt APIs.
+  //
+  // The shift distance is built in a bit-vector of size
+  // `unpacked.exponent.size()` (equal to `spec.e` after unpacking),
+  // so `fraction.size() - 1` (which equals `dest_width - 1` when
+  // `dest_width > fraction_width`) must be representable in that
+  // unsigned bit width. For all C integer types currently supported
+  // this holds: float (spec.e = 8) supports `dest_width <= 128` (i.e.
+  // up to `__int128`); double (spec.e = 11) supports `dest_width <=
+  // 1024`. If support for wider integer destinations is ever added,
+  // the subtraction width would need to be widened — see the matching
+  // PRECONDITION in float_bvt::to_integer.
+  PRECONDITION(dest_width <= (std::size_t{1} << (spec.e - 1)));
+
   const unbiased_floatt unpacked = unpack(src);
 
   bvt fraction = unpacked.fraction;