Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,12 @@ of the latest hardware. Roaring bitmaps are already available on a variety of pl
# Requirements

- Linux, macOS, FreeBSD, Windows (MSYS2 and Microsoft Visual studio).
- We test the library with ARM, x64/x86 and POWER processors. We only support little endian systems (big endian systems are vanishingly rare).
- We test the library with ARM, x64/x86 and POWER processors. We support big endian systems.
- Recent C compiler supporting the C11 standard (GCC 7 or better, LLVM 8 or better (clang), Xcode 11 or better, Microsoft Visual Studio 2022 or better, Intel oneAPI Compiler 2023.2 or better), there is also an optional C++ class that requires a C++ compiler supporting the C++11 standard. We support [Fil-C, the memory-safe C/C++ compiler](https://fil-c.org).
- CMake (to contribute to the project, users can rely on amalgamation/unity builds if they do not wish to use CMake).
- The CMake system assumes that git is available.
- Under x64 systems, the library provides runtime dispatch so that optimized functions are called based on the detected CPU features. It works with GCC, clang (version 9 and up) and Visual Studio (2017 and up). Other systems (e.g., ARM) do not need runtime dispatch.

Hardly anyone has access to an actual big-endian system. Nevertheless,
We support big-endian systems such as IBM s390x through emulators---except for
IO serialization which is only supported on little-endian systems (see [issue 423](https://github.com/RoaringBitmap/CRoaring/issues/423)).


# Quick Start
Expand Down
2 changes: 1 addition & 1 deletion cpp/roaring/roaring.hh
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ class Roaring {
return api::roaring_bitmap_remove_range_closed(&roaring, min, max);
}

/**
/**
* Keep only values in the half-open interval [min, max).
* Equivalent to two consecutive removeRange calls.
*/
Expand Down
10 changes: 8 additions & 2 deletions cpp/roaring/roaring64map.hh
Original file line number Diff line number Diff line change
Expand Up @@ -1141,13 +1141,15 @@ class Roaring64Map {
const char *orig = buf;
// push map size
uint64_t map_size = roarings.size();
std::memcpy(buf, &map_size, sizeof(uint64_t));
uint64_t map_size_le = croaring_htole64(map_size);
std::memcpy(buf, &map_size_le, sizeof(uint64_t));
buf += sizeof(uint64_t);
std::for_each(roarings.cbegin(), roarings.cend(),
[&buf, portable](
const std::pair<const uint32_t, Roaring> &map_entry) {
// push map key
std::memcpy(buf, &map_entry.first, sizeof(uint32_t));
uint32_t key_le = croaring_htole32(map_entry.first);
std::memcpy(buf, &key_le, sizeof(uint32_t));
// ^-- Note: `*((uint32_t*)buf) = map_entry.first;` is
// undefined

Expand Down Expand Up @@ -1175,11 +1177,13 @@ class Roaring64Map {
// get map size
uint64_t map_size;
std::memcpy(&map_size, buf, sizeof(uint64_t));
map_size = croaring_letoh64(map_size);
buf += sizeof(uint64_t);
for (uint64_t lcv = 0; lcv < map_size; lcv++) {
// get map key
uint32_t key;
std::memcpy(&key, buf, sizeof(uint32_t));
key = croaring_letoh32(key);
// ^-- Note: `uint32_t key = *((uint32_t*)buf);` is undefined

buf += sizeof(uint32_t);
Expand Down Expand Up @@ -1209,6 +1213,7 @@ class Roaring64Map {
}
uint64_t map_size;
std::memcpy(&map_size, buf, sizeof(uint64_t));
map_size = croaring_letoh64(map_size);
buf += sizeof(uint64_t);
maxbytes -= sizeof(uint64_t);
for (uint64_t lcv = 0; lcv < map_size; lcv++) {
Expand All @@ -1217,6 +1222,7 @@ class Roaring64Map {
}
uint32_t key;
std::memcpy(&key, buf, sizeof(uint32_t));
key = croaring_letoh32(key);
// ^-- Note: `uint32_t key = *((uint32_t*)buf);` is undefined

buf += sizeof(uint32_t);
Expand Down
53 changes: 51 additions & 2 deletions include/roaring/portability.h
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,55 @@ static inline int roaring_hamming(uint64_t x) {
#define croaring_be64toh(x) croaring_htobe64(x)
// End of host <-> big endian conversion.

// Host <-> little-endian conversion helpers.
//
// The CRoaring "portable" serialization format (and the regular
// roaring_bitmap_serialize / Roaring64Map::write formats which build on it)
// is defined to be little-endian on the wire. Code that reads or writes
// multi-byte integers to such buffers must convert between host and
// little-endian byte order. On little-endian hosts these are no-ops; on
// big-endian hosts they swap bytes.
//
// The "frozen" format is intentionally non-portable and uses native byte
// order; it must not use these helpers.
#if CROARING_IS_BIG_ENDIAN

static inline uint16_t croaring_bswap16(uint16_t x) {
return (uint16_t)((x << 8) | (x >> 8));
}

static inline uint32_t croaring_bswap32(uint32_t x) {
return ((x & 0x000000FFU) << 24) | ((x & 0x0000FF00U) << 8) |
((x & 0x00FF0000U) >> 8) | ((x & 0xFF000000U) >> 24);
}

static inline uint64_t croaring_bswap64(uint64_t x) {
return ((x & 0x00000000000000FFULL) << 56) |
((x & 0x000000000000FF00ULL) << 40) |
((x & 0x0000000000FF0000ULL) << 24) |
((x & 0x00000000FF000000ULL) << 8) |
((x & 0x000000FF00000000ULL) >> 8) |
((x & 0x0000FF0000000000ULL) >> 24) |
((x & 0x00FF000000000000ULL) >> 40) |
((x & 0xFF00000000000000ULL) >> 56);
}

#define croaring_htole16(x) croaring_bswap16(x)
#define croaring_htole32(x) croaring_bswap32(x)
#define croaring_htole64(x) croaring_bswap64(x)

#else // CROARING_IS_BIG_ENDIAN

#define croaring_htole16(x) (x)
#define croaring_htole32(x) (x)
#define croaring_htole64(x) (x)

#endif // CROARING_IS_BIG_ENDIAN

#define croaring_letoh16(x) croaring_htole16(x)
#define croaring_letoh32(x) croaring_htole32(x)
#define croaring_letoh64(x) croaring_htole64(x)

// Defines for the possible CROARING atomic implementations
#define CROARING_ATOMIC_IMPL_NONE 1
#define CROARING_ATOMIC_IMPL_CPP 2
Expand All @@ -477,13 +526,13 @@ static inline int roaring_hamming(uint64_t x) {
#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP
#endif //__has_include(<atomic>)
#else
// We lack __has_include to check:
// We lack __has_include to check:
#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP
#endif //__has_include
#elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__)
#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C
#elif CROARING_REGULAR_VISUAL_STUDIO
// https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/
// https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/
#define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C_WINDOWS
#endif
#endif // !defined(CROARING_ATOMIC_IMPL)
Expand Down
33 changes: 6 additions & 27 deletions include/roaring/roaring.h
Original file line number Diff line number Diff line change
Expand Up @@ -636,10 +636,6 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
*
* Returns how many bytes written, should be `roaring_bitmap_size_in_bytes(r)`.
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* When serializing data to a file, we recommend that you also use
* checksums so that, at deserialization, you can be confident
* that you are recovering the correct data.
Expand All @@ -652,10 +648,6 @@ size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
* (See `roaring_bitmap_portable_deserialize()` if you want a format that's
* compatible with Java and Go implementations).
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* The returned pointer may be NULL in case of errors.
*/
roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
Expand All @@ -666,10 +658,6 @@ roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf);
* (See `roaring_bitmap_portable_deserialize_safe()` if you want a format that's
* compatible with Java and Go implementations).
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* The difference with `roaring_bitmap_deserialize()` is that this function
* checks that the input buffer is a valid bitmap. If the buffer is too small,
* NULL is returned.
Expand Down Expand Up @@ -705,10 +693,6 @@ size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r);
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* The returned pointer may be NULL in case of errors.
*/
roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
Expand Down Expand Up @@ -742,10 +726,6 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
* corresponds to the serialized bitmap. The CRoaring library does not provide
* checksumming.
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* The returned pointer may be NULL in case of errors.
*/
roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
Expand All @@ -769,7 +749,8 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
* compatible with little-endian systems. It is not a bug, it is by design,
* since the format imitates C memory layout of roaring_bitmap_t.
*
* The returned pointer may be NULL in case of errors.
*/
Expand Down Expand Up @@ -803,10 +784,6 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
* This is meant to be compatible with the Java and Go versions:
* https://github.com/RoaringBitmap/RoaringFormatSpec
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* When serializing data to a file, we recommend that you also use
* checksums so that, at deserialization, you can be confident
* that you are recovering the correct data.
Expand Down Expand Up @@ -843,7 +820,8 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
* compatible with little-endian systems. This is not a bug, it is by design,
*since the format imitates C memory layout
*
* When serializing data to a file, we recommend that you also use
* checksums so that, at deserialization, you can be confident
Expand All @@ -864,7 +842,8 @@ void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
* compatible with little-endian systems. This is not a bug, it is by design,
*since the format imitates C memory layout of roaring_bitmap_t.
*/
const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
size_t length);
Expand Down
14 changes: 4 additions & 10 deletions include/roaring/roaring64.h
Original file line number Diff line number Diff line change
Expand Up @@ -583,10 +583,6 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r);
* This is meant to be compatible with other languages:
* https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*
* When serializing data to a file, we recommend that you also use
* checksums so that, at deserialization, you can be confident
* that you are recovering the correct data.
Expand Down Expand Up @@ -631,10 +627,6 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf,
* We also recommend that you use checksums to check that serialized data
* corresponds to the serialized bitmap. The CRoaring library does not provide
* checksumming.
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
*/
roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf,
size_t maxbytes);
Expand Down Expand Up @@ -663,7 +655,8 @@ size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r);
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
* compatible with little-endian systems. This is not a bug, it is by design,
* since the format imitates C memory layout of roaring64_bitmap_t.
*/
size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r,
char *buf);
Expand All @@ -681,7 +674,8 @@ size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r,
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
* compatible with little-endian systems. This is not a bug, it is by design,
* since the format imitates C memory layout of roaring64_bitmap_t.
*/
roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf,
size_t maxbytes);
Expand Down
15 changes: 15 additions & 0 deletions src/containers/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,14 @@ int32_t array_container_number_of_runs(const array_container_t *ac) {
*
*/
int32_t array_container_write(const array_container_t *container, char *buf) {
#if CROARING_IS_BIG_ENDIAN
for (int32_t i = 0; i < container->cardinality; ++i) {
uint16_t v_le = croaring_htole16(container->array[i]);
memcpy(buf + i * sizeof(uint16_t), &v_le, sizeof(uint16_t));
}
#else
memcpy(buf, container->array, container->cardinality * sizeof(uint16_t));
#endif
return array_container_size_in_bytes(container);
}

Expand Down Expand Up @@ -543,7 +550,15 @@ int32_t array_container_read(int32_t cardinality, array_container_t *container,
array_container_grow(container, cardinality, false);
}
container->cardinality = cardinality;
#if CROARING_IS_BIG_ENDIAN
for (int32_t i = 0; i < cardinality; ++i) {
uint16_t v_le;
memcpy(&v_le, buf + i * sizeof(uint16_t), sizeof(uint16_t));
container->array[i] = croaring_letoh16(v_le);
}
#else
memcpy(container->array, buf, container->cardinality * sizeof(uint16_t));
#endif

return array_container_size_in_bytes(container);
}
Expand Down
15 changes: 15 additions & 0 deletions src/containers/bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1048,15 +1048,30 @@ int bitset_container_number_of_runs(bitset_container_t *bc) {

int32_t bitset_container_write(const bitset_container_t *container,
char *buf) {
#if CROARING_IS_BIG_ENDIAN
for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
uint64_t w_le = croaring_htole64(container->words[i]);
memcpy(buf + i * sizeof(uint64_t), &w_le, sizeof(uint64_t));
}
#else
memcpy(buf, container->words, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
#endif
return bitset_container_size_in_bytes(container);
}


int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container,
const char *buf) {
container->cardinality = cardinality;
#if CROARING_IS_BIG_ENDIAN
for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
uint64_t w_le;
memcpy(&w_le, buf + i * sizeof(uint64_t), sizeof(uint64_t));
container->words[i] = croaring_letoh64(w_le);
}
#else
memcpy(container->words, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
#endif
return bitset_container_size_in_bytes(container);
}

Expand Down
28 changes: 26 additions & 2 deletions src/containers/run.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,9 +717,21 @@ bool run_container_validate(const run_container_t *run, const char **reason) {

int32_t run_container_write(const run_container_t *container, char *buf) {
uint16_t cast_16 = container->n_runs;
memcpy(buf, &cast_16, sizeof(uint16_t));
uint16_t n_runs_le = croaring_htole16(cast_16);
memcpy(buf, &n_runs_le, sizeof(uint16_t));
#if CROARING_IS_BIG_ENDIAN
char *out = buf + sizeof(uint16_t);
for (int32_t i = 0; i < container->n_runs; ++i) {
uint16_t v_le = croaring_htole16(container->runs[i].value);
uint16_t l_le = croaring_htole16(container->runs[i].length);
memcpy(out, &v_le, sizeof(uint16_t));
memcpy(out + sizeof(uint16_t), &l_le, sizeof(uint16_t));
out += sizeof(rle16_t);
}
#else
memcpy(buf + sizeof(uint16_t), container->runs,
container->n_runs * sizeof(rle16_t));
#endif
return run_container_size_in_bytes(container);
}

Expand All @@ -728,12 +740,24 @@ int32_t run_container_read(int32_t cardinality, run_container_t *container,
(void)cardinality;
uint16_t cast_16;
memcpy(&cast_16, buf, sizeof(uint16_t));
container->n_runs = cast_16;
container->n_runs = croaring_letoh16(cast_16);
if (container->n_runs > container->capacity)
run_container_grow(container, container->n_runs, false);
if (container->n_runs > 0) {
#if CROARING_IS_BIG_ENDIAN
const char *in = buf + sizeof(uint16_t);
for (int32_t i = 0; i < container->n_runs; ++i) {
uint16_t v_le, l_le;
memcpy(&v_le, in, sizeof(uint16_t));
memcpy(&l_le, in + sizeof(uint16_t), sizeof(uint16_t));
container->runs[i].value = croaring_letoh16(v_le);
container->runs[i].length = croaring_letoh16(l_le);
in += sizeof(rle16_t);
}
#else
memcpy(container->runs, buf + sizeof(uint16_t),
container->n_runs * sizeof(rle16_t));
#endif
}
return run_container_size_in_bytes(container);
}
Expand Down
Loading
Loading