From e14b54db474be8a411075c9064bbe9d1b0d2132b Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Mon, 27 Apr 2026 19:11:24 +0000 Subject: [PATCH] Fix float-to-integer conversion for wide destination types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit float_bvt::to_integer did not extend the fraction to dest_width before shifting, causing incorrect results when dest_width exceeds the fraction width (the shift distance went negative for large exponents). The fix mirrors the extend-then-shift pattern already used by float_utilst::to_integer. The PR carries over a latent constraint shared between both encodings: the shift distance is built in a bit-vector of width spec.e, so `dest_width - 1` must be representable in that signed/unsigned bit width. For the integer types C currently supports this holds — float (spec.e = 8) supports dest_width up to 128 (i.e. up to __int128); double (spec.e = 11) supports dest_width up to 1024. That assumption is now an explicit PRECONDITION on both float_bvt::to_integer and float_utilst::to_integer, with matching 'keep in sync' comments. Widening the subtraction type to support even wider integer destinations (and pulling the integer-conversion logic out into a single helper shared between the two IR layers) is a sensible follow-up. Regression test exercises the bug across the parameter space: int (at and above 2^24), unsigned int (including a 2^31 case where unsigned-vs-signed semantics matter), long long from float and double sources (the 64-bit destination cases that were the most common practical breakage), and __int128 from both float and double sources where supported. Without the fix, eight of these eleven assertions fail on develop. Co-authored-by: Kiro --- regression/cbmc/Float-to-int-wide/main.c | 98 +++++++++++++++++++ regression/cbmc/Float-to-int-wide/test.desc | 10 ++ .../cbmc/Float-to-int-wide/test_smt.desc | 10 ++ src/solvers/floatbv/float_bv.cpp | 36 ++++++- src/solvers/floatbv/float_utils.cpp | 16 +++ 5 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 regression/cbmc/Float-to-int-wide/main.c create mode 100644 regression/cbmc/Float-to-int-wide/test.desc create mode 100644 regression/cbmc/Float-to-int-wide/test_smt.desc diff --git a/regression/cbmc/Float-to-int-wide/main.c b/regression/cbmc/Float-to-int-wide/main.c new file mode 100644 index 00000000000..58781cc7244 --- /dev/null +++ b/regression/cbmc/Float-to-int-wide/main.c @@ -0,0 +1,98 @@ +// Test float-to-integer (and double-to-integer) conversion for values +// that need more bits than the source's fraction width. +// +// Bug: float_bvt::to_integer did not extend the fraction to the +// destination width before shifting, so for any integer destination +// wider than the source's fraction the shift distance went negative +// for sufficiently large exponents and the conversion produced +// undefined results. float (24 fraction bits incl. hidden) was +// affected for any 32-bit-or-wider integer destination; double (53 +// fraction bits) was affected for any 64-bit-or-wider integer +// destination. + +#include + +extern int __CPROVER_rounding_mode; + +int main() +{ + // Test 1: large positive float -> int. + float a; + __CPROVER_assume(a == 1000000.0f); + __CPROVER_rounding_mode = 3; // ROUND_TO_ZERO (required for C semantics) + int r1 = (int)a; + assert(r1 == 1000000); + + // Test 2: value at fraction-width boundary (2^24, exactly representable). + float b; + __CPROVER_assume(b == 16777216.0f); + int r2 = (int)b; + assert(r2 == 16777216); + + // Test 3: value near INT_MAX, well past the fraction width. + float c; + __CPROVER_assume(c == 2.0e9f); + int r3 = (int)c; + assert(r3 == 2000000000); + + // Test 4: negative large value. + float d; + __CPROVER_assume(d == -1000000.0f); + int r4 = (int)d; + assert(r4 == -1000000); + + // Test 5: float -> unsigned int, exact value with exponent > fraction + // width. + float e1; + __CPROVER_assume(e1 == 16777216.0f); // 2^24, exactly representable + unsigned int r5 = (unsigned int)e1; + assert(r5 == 16777216u); + + // Test 6: float -> unsigned int, value at 2^31 (exactly representable + // as float, above INT_MAX so the unsigned-vs-signed branch matters). + float e2; + __CPROVER_assume(e2 == (float)(1U << 31)); + unsigned int r6 = (unsigned int)e2; + assert(r6 == (1U << 31)); + + // Test 7: float -> long long. dest_width = 64, fraction_width = 24, + // so the fix's extension path is exercised. + float ll1; + __CPROVER_assume(ll1 == (float)(1LL << 30)); + long long r7 = (long long)ll1; + assert(r7 == (1LL << 30)); + + // Test 8: double -> long long. dest_width = 64, fraction_width = 53, + // exercises a different effective_width than tests 7. + double d1; + __CPROVER_assume(d1 == (double)(1LL << 40)); + long long r8 = (long long)d1; + assert(r8 == (1LL << 40)); + + // Test 9: double -> long long, value above 2^53 where double can no + // longer represent every integer; check an exactly-representable one. + double d2; + __CPROVER_assume(d2 == (double)(1LL << 60)); + long long r9 = (long long)d2; + assert(r9 == (1LL << 60)); + +#if defined(__SIZEOF_INT128__) + // Test 10: float -> __int128. dest_width = 128 sits at the documented + // boundary `(1 << (spec.e - 1)) = 128` for single-precision sources + // (PRECONDITION in float_bvt::to_integer / float_utilst::to_integer), + // so this exercises the widest currently-supported destination. + float fi128; + __CPROVER_assume(fi128 == (float)(1LL << 30)); + __int128 r10 = (__int128)fi128; + assert(r10 == (__int128)(1LL << 30)); + + // Test 11: double -> __int128. dest_width = 128, well within the + // double precondition boundary `(1 << 10) = 1024`. + double di128; + __CPROVER_assume(di128 == (double)(1LL << 60)); + __int128 r11 = (__int128)di128; + assert(r11 == (__int128)(1LL << 60)); +#endif + + return 0; +} diff --git a/regression/cbmc/Float-to-int-wide/test.desc b/regression/cbmc/Float-to-int-wide/test.desc new file mode 100644 index 00000000000..dd1abf1f8c3 --- /dev/null +++ b/regression/cbmc/Float-to-int-wide/test.desc @@ -0,0 +1,10 @@ +CORE no-new-smt +main.c +--floatbv +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Float-to-integer conversion for values needing more bits than the fraction. diff --git a/regression/cbmc/Float-to-int-wide/test_smt.desc b/regression/cbmc/Float-to-int-wide/test_smt.desc new file mode 100644 index 00000000000..d1737879040 --- /dev/null +++ b/regression/cbmc/Float-to-int-wide/test_smt.desc @@ -0,0 +1,10 @@ +CORE smt-backend no-new-smt +main.c +--z3 +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Float-to-integer conversion for values needing more bits than the fraction (SMT path). diff --git a/src/solvers/floatbv/float_bv.cpp b/src/solvers/floatbv/float_bv.cpp index ee657069fe6..713d29a3384 100644 --- a/src/solvers/floatbv/float_bv.cpp +++ b/src/solvers/floatbv/float_bv.cpp @@ -381,6 +381,21 @@ exprt float_bvt::to_integer( const exprt &rm, const ieee_float_spect &spec) { + // Keep in sync with float_utilst::to_integer — the body below is a + // line-for-line translation of that function between the exprt and + // literalt/bvt APIs. + // + // The shift distance is built in `signedbv_typet(spec.e)`, so + // `effective_width - 1` (which equals `dest_width - 1` when + // `dest_width > fraction_width`) must be representable in that + // signed bit width. For all C integer types currently supported + // this holds: float (spec.e = 8) supports `dest_width <= 128` (i.e. + // up to `__int128`); double (spec.e = 11) supports `dest_width <= + // 1024`. If support for wider integer destinations is ever added, + // the subtraction type would need to be widened — see the matching + // PRECONDITION in float_utilst::to_integer. + PRECONDITION(dest_width <= (std::size_t{1} << (spec.e - 1))); + const unbiased_floatt unpacked=unpack(src, spec); rounding_mode_bitst rounding_mode_bits(rm); @@ -388,10 +403,27 @@ exprt float_bvt::to_integer( // Right now hard-wired to round-to-zero, which is // the usual case in ANSI-C. + const std::size_t fraction_width = + to_unsignedbv_type(unpacked.fraction.type()).get_width(); + + // Extend the fraction to dest_width if needed, padding with zeros + // at the LSB end (like float_utilst does). + exprt fraction = unpacked.fraction; + std::size_t effective_width = fraction_width; + + if(dest_width > fraction_width) + { + effective_width = dest_width; + fraction = concatenation_exprt( + fraction, + from_integer(0, unsignedbv_typet(dest_width - fraction_width)), + unsignedbv_typet(effective_width)); + } + // if the exponent is positive, shift right - exprt offset=from_integer(spec.f, signedbv_typet(spec.e)); + exprt offset = from_integer(effective_width - 1, signedbv_typet(spec.e)); const minus_exprt distance(offset, unpacked.exponent); - const lshr_exprt shift_result(unpacked.fraction, distance); + const lshr_exprt shift_result(fraction, distance); // if the exponent is negative, we have zero anyways exprt result=shift_result; diff --git a/src/solvers/floatbv/float_utils.cpp b/src/solvers/floatbv/float_utils.cpp index ba72bb09b21..8112cbaf29c 100644 --- a/src/solvers/floatbv/float_utils.cpp +++ b/src/solvers/floatbv/float_utils.cpp @@ -91,6 +91,22 @@ bvt float_utilst::to_integer( // The following is the usual case in ANSI-C, and we optimize for that. PRECONDITION(rounding_mode_bits.round_to_zero.is_true()); + // Keep in sync with float_bvt::to_integer — the body below is a + // line-for-line translation of that function between the + // literalt/bvt and exprt APIs. + // + // The shift distance is built in a bit-vector of size + // `unpacked.exponent.size()` (equal to `spec.e` after unpacking), + // so `fraction.size() - 1` (which equals `dest_width - 1` when + // `dest_width > fraction_width`) must be representable in that + // unsigned bit width. For all C integer types currently supported + // this holds: float (spec.e = 8) supports `dest_width <= 128` (i.e. + // up to `__int128`); double (spec.e = 11) supports `dest_width <= + // 1024`. If support for wider integer destinations is ever added, + // the subtraction width would need to be widened — see the matching + // PRECONDITION in float_bvt::to_integer. + PRECONDITION(dest_width <= (std::size_t{1} << (spec.e - 1))); + const unbiased_floatt unpacked = unpack(src); bvt fraction = unpacked.fraction;