diff --git a/docs/http.md b/docs/http.md index ce03b22470e..a738b7084e9 100644 --- a/docs/http.md +++ b/docs/http.md @@ -137,7 +137,7 @@ possible. In that case, only the `code` field will be returned. - `code` if the request was successful `Ok` otherwise see the service dependent and general status codes. - `waypoints` array of `Waypoint` objects sorted by distance to the input coordinate. Each object has at least the following additional properties: - - `nodes`: Array of OpenStreetMap node ids. + - `nodes`: Array of OpenStreetMap node ids. Each id is a 64-bit unsigned integer (encoded as a JSON number for the `json` format, and as `ulong` for the `flatbuffers` format). #### Example Requests @@ -700,7 +700,7 @@ Annotation of the whole route leg with fine-grained information about each segme - `distance`: The distance, in meters, between each pair of coordinates - `duration`: The duration between each pair of coordinates, in seconds. Does not include the duration of any turns. - `datasources`: The index of the data source for the speed between each pair of coordinates. `0` is the default profile, other values are supplied via `--segment-speed-file` to `osrm-contract` or `osrm-customize`. String-like names are in the `metadata.datasource_names` array. -- `nodes`: The OSM node ID for each coordinate along the route, excluding the first/last user-supplied coordinates +- `nodes`: Array of OpenStreetMap node ids for each coordinate along the route (excluding the first/last user-supplied coordinates). Each id is a 64-bit unsigned integer (encoded as a JSON number for the `json` format, and as `ulong` for the `flatbuffers` format). Clients consuming flatbuffers should treat these values as 64-bit integers (JS bindings expose them as BigInt). - `weight`: The weights between each pair of coordinates. Does not include any turn costs. - `speed`: Convenience field, calculation of `distance / duration` rounded to one decimal place - `metadata`: Metadata related to other annotations @@ -1094,7 +1094,7 @@ Almost the same as `json` StepManeuver object. The following properties differ: ### Annotation object -Exactly the same as `json` annotation object. +Almost the same as the `json` annotation object. Note: on the flatbuffers wire the `Annotation.nodes` field is a `[ulong]` (64-bit unsigned integers), and the generated JavaScript flatbuffers bindings will expose these values as BigInt / BigUint64Array. Clients that consume flatbuffers should handle 64-bit node ids accordingly. ### Position object diff --git a/features/support/fbresult_generated.js b/features/support/fbresult_generated.js index 33030f75142..816cf5abeae 100644 --- a/features/support/fbresult_generated.js +++ b/features/support/fbresult_generated.js @@ -110,7 +110,7 @@ var Annotation = class _Annotation { } nodes(index) { const offset = this.bb.__offset(this.bb_pos, 10); - return offset ? this.bb.readUint32(this.bb.__vector(this.bb_pos + offset) + index * 4) : 0; + return offset ? this.bb.readUint64(this.bb.__vector(this.bb_pos + offset) + index * 8) : BigInt(0); } nodesLength() { const offset = this.bb.__offset(this.bb_pos, 10); @@ -118,7 +118,7 @@ var Annotation = class _Annotation { } nodesArray() { const offset = this.bb.__offset(this.bb_pos, 10); - return offset ? new Uint32Array(this.bb.bytes().buffer, this.bb.bytes().byteOffset + this.bb.__vector(this.bb_pos + offset), this.bb.__vector_len(this.bb_pos + offset)) : null; + return offset ? new BigUint64Array(this.bb.bytes().buffer, this.bb.bytes().byteOffset + this.bb.__vector(this.bb_pos + offset), this.bb.__vector_len(this.bb_pos + offset)) : null; } weight(index) { const offset = this.bb.__offset(this.bb_pos, 12); @@ -194,14 +194,14 @@ var Annotation = class _Annotation { builder.addFieldOffset(3, nodesOffset, 0); } static createNodesVector(builder, data) { - builder.startVector(4, data.length, 4); + builder.startVector(8, data.length, 8); for (let i = data.length - 1; i >= 0; i--) { - builder.addInt32(data[i]); + builder.addInt64(BigInt(data[i])); } return builder.endVector(); } static startNodesVector(builder, numElems) { - builder.startVector(4, numElems, 4); + builder.startVector(8, numElems, 8); } static addWeight(builder, weightOffset) { builder.addFieldOffset(4, weightOffset, 0); diff --git a/include/engine/api/flatbuffers/route.fbs b/include/engine/api/flatbuffers/route.fbs index bbdd5fcd21c..e00ce9a2052 100644 --- a/include/engine/api/flatbuffers/route.fbs +++ b/include/engine/api/flatbuffers/route.fbs @@ -9,7 +9,7 @@ table Annotation { distance: [uint]; duration: [uint]; datasources: [uint]; - nodes: [uint]; + nodes: [ulong]; weight: [uint]; speed: [float]; metadata: Metadata; diff --git a/include/engine/api/route_api.hpp b/include/engine/api/route_api.hpp index ab8d18aefee..12d5bdbebb1 100644 --- a/include/engine/api/route_api.hpp +++ b/include/engine/api/route_api.hpp @@ -522,7 +522,7 @@ class RouteAPI : public BaseAPI [](const guidance::LegGeometry::Annotation &anno) { return anno.datasource; }); } - std::vector nodes; + std::vector nodes; if (requested_annotations & RouteParameters::AnnotationsType::Nodes) { nodes.reserve(leg_geometry.node_ids.size()); diff --git a/include/extractor/files.hpp b/include/extractor/files.hpp index ce94c9906e7..358bbdbd436 100644 --- a/include/extractor/files.hpp +++ b/include/extractor/files.hpp @@ -3,6 +3,7 @@ #include "extractor/edge_based_edge.hpp" #include "extractor/node_data_container.hpp" +#include "extractor/packed_osm_ids.hpp" #include "extractor/profile_properties.hpp" #include "extractor/query_node.hpp" #include "extractor/serialization.hpp" @@ -453,6 +454,7 @@ void readRawNBGraph(const std::filesystem::path &path, { coordinates[index].lon = current_node.lon; coordinates[index].lat = current_node.lat; + checkPackedOSMNodeIdFits(current_node.node_id); osm_node_ids.push_back(current_node.node_id); index++; }; diff --git a/include/extractor/packed_osm_ids.hpp b/include/extractor/packed_osm_ids.hpp index ee1c793dc5b..e30cfc67d63 100644 --- a/include/extractor/packed_osm_ids.hpp +++ b/include/extractor/packed_osm_ids.hpp @@ -1,19 +1,48 @@ #ifndef OSRM_EXTRACTOR_PACKED_OSM_IDS_HPP #define OSRM_EXTRACTOR_PACKED_OSM_IDS_HPP +#include "util/exception.hpp" #include "util/packed_vector.hpp" #include "util/typedefs.hpp" +#include + namespace osrm::extractor { namespace detail { +// Bit width for packed OSM node IDs. Real-world OSM node IDs already exceed +// 2^33 and grow over time, so 34 bits is no longer sufficient and would +// silently truncate IDs >= 2^34 when written into PackedVector storage. +// 40 bits supports IDs up to ~1.1 * 10^12, providing decades of headroom. +// See https://github.com/Project-OSRM/osrm-backend/issues/7069 +inline constexpr std::size_t PACKED_OSM_ID_BITS = 40; + template -using PackedOSMIDs = util::detail::PackedVector; +using PackedOSMIDs = util::detail::PackedVector; } // namespace detail using PackedOSMIDsView = detail::PackedOSMIDs; using PackedOSMIDs = detail::PackedOSMIDs; + +// Maximum OSM node ID that can be stored in PackedOSMIDs without truncation. +inline constexpr std::uint64_t MAX_PACKED_OSM_NODE_ID = + (std::uint64_t{1} << detail::PACKED_OSM_ID_BITS) - 1; + +// Throws util::exception if `id` is too large to fit into PackedOSMIDs. +// Use this at every call site that pushes OSM node IDs into PackedOSMIDs to +// fail loudly during extraction rather than silently truncating IDs. +inline void checkPackedOSMNodeIdFits(const OSMNodeID id) +{ + if (static_cast(id) > MAX_PACKED_OSM_NODE_ID) + { + throw util::exception("OSM node ID " + std::to_string(static_cast(id)) + + " exceeds the " + std::to_string(detail::PACKED_OSM_ID_BITS) + + "-bit packed storage limit (" + + std::to_string(MAX_PACKED_OSM_NODE_ID) + + "). Increase PACKED_OSM_ID_BITS in packed_osm_ids.hpp."); + } +} } // namespace osrm::extractor #endif diff --git a/src/extractor/extractor.cpp b/src/extractor/extractor.cpp index 1d2b5209f5b..81bedc5d2d0 100644 --- a/src/extractor/extractor.cpp +++ b/src/extractor/extractor.cpp @@ -632,6 +632,7 @@ Extractor::ParsedOSMData Extractor::ParseOSMData(ScriptingEnvironment &scripting const auto ¤t_node = extraction_containers.used_nodes[index]; osm_coordinates[index].lon = current_node.lon; osm_coordinates[index].lat = current_node.lat; + checkPackedOSMNodeIdFits(current_node.node_id); osm_node_ids.push_back(current_node.node_id); } diff --git a/src/util/fingerprint.cpp b/src/util/fingerprint.cpp index 50e0f973acc..3b6853d244b 100644 --- a/src/util/fingerprint.cpp +++ b/src/util/fingerprint.cpp @@ -24,7 +24,8 @@ FingerPrint FingerPrint::GetValid() // 4 chars, 'O','S','R','N' - note the N instead of M, v1 of the fingerprint // used M, so we add one and use N to indicate the newer fingerprint magic number. // Bump this value if the fingerprint format ever changes. - fingerprint.magic_number = {{'O', 'S', 'R', 'N'}}; + // Changed to force incompatibility for updated packed storage layout (packed_osm_ids.hpp). + fingerprint.magic_number = {{'O', 'S', 'R', 'O'}}; fingerprint.major_version = OSRM_VERSION_MAJOR; fingerprint.minor_version = OSRM_VERSION_MINOR; fingerprint.patch_version = OSRM_VERSION_PATCH; diff --git a/unit_tests/library/fb_annotation_nodes.cpp b/unit_tests/library/fb_annotation_nodes.cpp new file mode 100644 index 00000000000..8ae37d2b66c --- /dev/null +++ b/unit_tests/library/fb_annotation_nodes.cpp @@ -0,0 +1,81 @@ +#include + +#include +#include + +#include "engine/api/flatbuffers/fbresult_generated.h" +#include "extractor/packed_osm_ids.hpp" + +#include + +BOOST_AUTO_TEST_CASE(fb_annotation_nodes_64bit_roundtrip) +{ + using namespace osrm; + using namespace osrm::engine::api; + + flatbuffers::FlatBufferBuilder builder; + + const std::vector inputs = { + 0ull, + 1ull, + (1ull << 33), // 2^33 + (1ull << 34), // 2^34, the old truncation boundary + (1ull << 34) + 1, // just above 2^34 + 20'000'000'000ull, // realistic projected OSM node ID + (1ull << 35), // 2^35 + extractor::MAX_PACKED_OSM_NODE_ID - 1, // just below the new limit + extractor::MAX_PACKED_OSM_NODE_ID, // exactly at the new limit + }; + + // Create flatbuffers vector of uint64_t nodes + auto nodes_vector = builder.CreateVector(inputs); + + // Build an Annotation with the nodes vector + fbresult::AnnotationBuilder annotation_builder(builder); + annotation_builder.add_nodes(nodes_vector); + auto annotation_off = annotation_builder.Finish(); + + // Create a Leg that contains the annotation + fbresult::LegBuilder leg_builder(builder); + leg_builder.add_annotations(annotation_off); + auto leg_off = leg_builder.Finish(); + + // Create a RouteObject that contains the leg + std::vector> legs_vec = {leg_off}; + auto legs_vector = builder.CreateVector(legs_vec); + + fbresult::RouteObjectBuilder route_builder(builder); + route_builder.add_legs(legs_vector); + auto route_off = route_builder.Finish(); + + // Create FBResult root with the route + std::vector> routes_vec = {route_off}; + auto routes_vector = builder.CreateVector(routes_vec); + + fbresult::FBResultBuilder result_builder(builder); + result_builder.add_routes(routes_vector); + auto result_off = result_builder.Finish(); + builder.Finish(result_off); + + // Read back and verify + auto fb = fbresult::GetFBResult(builder.GetBufferPointer()); + BOOST_REQUIRE(fb); + auto routes = fb->routes(); + BOOST_REQUIRE(routes); + auto route = routes->Get(0); + BOOST_REQUIRE(route); + auto legs = route->legs(); + BOOST_REQUIRE(legs); + auto leg = legs->Get(0); + BOOST_REQUIRE(leg); + auto annotation = leg->annotations(); + BOOST_REQUIRE(annotation); + auto nodes = annotation->nodes(); + BOOST_REQUIRE(nodes); + + BOOST_CHECK_EQUAL(nodes->size(), inputs.size()); + for (std::size_t i = 0; i < inputs.size(); ++i) + { + BOOST_CHECK_EQUAL(static_cast(nodes->Get(i)), inputs[i]); + } +} diff --git a/unit_tests/util/packed_vector.cpp b/unit_tests/util/packed_vector.cpp index 9fc4c5a3c38..e4b5201dc70 100644 --- a/unit_tests/util/packed_vector.cpp +++ b/unit_tests/util/packed_vector.cpp @@ -1,6 +1,9 @@ #include "util/packed_vector.hpp" #include "util/typedefs.hpp" +#include "extractor/packed_osm_ids.hpp" +#include "util/exception.hpp" + #include "common/range_tools.hpp" #include @@ -202,4 +205,49 @@ BOOST_AUTO_TEST_CASE(packed_vector_33bit_continious) } } +// Regression test for https://github.com/Project-OSRM/osrm-backend/issues/7069 +// OSM node IDs above 2^34 must round-trip through PackedOSMIDs without +// truncation. The previous 34-bit packing silently masked these values. +BOOST_AUTO_TEST_CASE(packed_osm_ids_above_2pow34_roundtrip) +{ + osrm::extractor::PackedOSMIDs ids; + + const std::vector inputs = { + 0ull, + 1ull, + (1ull << 33), // 2^33 + (1ull << 34), // 2^34, the old truncation boundary + (1ull << 34) + 1, // just above 2^34 + 20'000'000'000ull, // realistic projected OSM node ID + (1ull << 35), // 2^35 + osrm::extractor::MAX_PACKED_OSM_NODE_ID - 1, // just below the new limit + osrm::extractor::MAX_PACKED_OSM_NODE_ID, // exactly at the new limit + }; + + for (auto raw : inputs) + { + ids.push_back(OSMNodeID{raw}); + } + + for (std::size_t i = 0; i < inputs.size(); ++i) + { + const OSMNodeID id_val = ids[i]; + BOOST_CHECK_EQUAL(static_cast(id_val), inputs[i]); + } +} + +// Verify that the overflow guard fires for IDs above the packed-storage limit +// instead of silently masking them on push_back. +BOOST_AUTO_TEST_CASE(packed_osm_ids_overflow_guard_throws) +{ + const OSMNodeID just_above_limit{osrm::extractor::MAX_PACKED_OSM_NODE_ID + 1}; + BOOST_CHECK_THROW(osrm::extractor::checkPackedOSMNodeIdFits(just_above_limit), + osrm::util::exception); + + // Sanity: values at and below the limit must not throw. + BOOST_CHECK_NO_THROW(osrm::extractor::checkPackedOSMNodeIdFits( + OSMNodeID{osrm::extractor::MAX_PACKED_OSM_NODE_ID})); + BOOST_CHECK_NO_THROW(osrm::extractor::checkPackedOSMNodeIdFits(OSMNodeID{1ull << 34})); +} + BOOST_AUTO_TEST_SUITE_END()