Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ tests/config.h
# CMake
build

# clangd
.cache/clangd/


#############BEGIN VISUAL STUDIO############
Expand Down
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,39 @@ You should be aware that a convention bitset (`bitset_t *`) may use much more
memory than a Roaring bitmap in some cases. You should run benchmarks to determine
whether the conversion to a bitset has performance benefits in your case.


# Convert to boolean array (C)
Comment thread
RinChanNOWWW marked this conversation as resolved.
Outdated

This example shows how to convert a range of a Roaring bitmap to a boolean array using `roaring_bitmap_range_bool_array`:

```c
roaring_bitmap_t *r1 = roaring_bitmap_create();
for (uint32_t i = 100; i < 100000; i+= 1 + (i%5)) {
roaring_bitmap_add(r1, i);
}
for (uint32_t i = 100000; i < 500000; i+= 100) {
roaring_bitmap_add(r1, i);
}
roaring_bitmap_add_range(r1, 500000, 600000);

// Convert a range to boolean array
uint32_t range_start = 50; // Start from the 50th element
uint32_t range_end = 1000; // End at the 1000th element (not included)
bool *bool_array = malloc((range_end - range_start) * sizeof(bool));

// Convert range to boolean array
roaring_bitmap_range_bool_array(r1, range_start, range_end, bool_array);
Comment thread
RinChanNOWWW marked this conversation as resolved.
Outdated

// The bool_array now contains true/false for elements at positions [range_start, range_end)
// bool_array[i] is true if the (range_start+i) exists in the bitmap

// you must free the memory:
free(bool_array);
roaring_bitmap_free(r1);
```

This function stores each element's presence in a single byte as a boolean value, which can be useful when you need to work with boolean arrays directly for a specific range of the bitmap.

# Example (C++)


Expand Down
2 changes: 1 addition & 1 deletion benchmarks/bitset_container_benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "benchmark.h"
#include "random.h"

#define DIV_CEIL_64K(denom) (((1 << 16) + ((denom)-1)) / (denom))
#define DIV_CEIL_64K(denom) (((1 << 16) + ((denom) - 1)) / (denom))
Comment thread
RinChanNOWWW marked this conversation as resolved.
Outdated

const int repeat = 500;

Expand Down
15 changes: 15 additions & 0 deletions include/roaring/containers/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,21 @@ inline int array_container_index_equalorlarger(const array_container_t *arr,
}
}

/**
* Reads values from the array container into a boolean buffer.
*
* @param ac The array container to read from
* @param it Iterator state (index into the array)
* @param buf Boolean buffer to write to
* @param max_value Stop reading when reaching this value. If it is null, read
* the whole container.
* @param value_out Output parameter for the next value
* @return true if there are more values to read, false otherwise
*/
bool array_container_iterator_read_into_bool(
const array_container_t *ac, struct roaring_container_iterator_s *it,
bool *buf, const uint16_t *max_value, uint16_t *value_out);

/*
* Adds all values in range [min,max] using hint:
* nvals_less is the number of array values less than $min
Expand Down
15 changes: 15 additions & 0 deletions include/roaring/containers/bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,21 @@ int bitset_container_get_index(const bitset_container_t *container, uint16_t x);
int bitset_container_index_equalorlarger(const bitset_container_t *container,
uint16_t x);

/**
* Reads values from the bitset container into a boolean buffer.
*
* @param bc The bitset container to read from
* @param it Iterator state (index into the bitset)
* @param buf Boolean buffer to write to
* @param max_value Stop reading when reaching this value. If it is null, read
* the whole container.
* @param value_out Output parameter for the next value
* @return true if there are more values to read, false otherwise
*/
bool bitset_container_iterator_read_into_bool(
const bitset_container_t *bc, struct roaring_container_iterator_s *it,
bool *buf, const uint16_t *max_value, uint16_t *value_out);

#ifdef __cplusplus
}
}
Expand Down
20 changes: 20 additions & 0 deletions include/roaring/containers/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -2477,6 +2477,26 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
uint32_t count, uint32_t *consumed,
uint16_t *value_out);

/**
* Reads entries until the the last entry whose value is strictly smaller than
* `*max_value` from the container (*max_value is excluded), and sets
* corresponding positions in `buf` to true. If `max_value` is null, then all
* entries are read.
*
* The `buf` array is filled starting from index 0, which corresponds to the
* initial iterator position `it`. For subsequent iterator positions `it_new`,
* set `buf[it_new->current_value - it->current_value]` to true.
*
* Returns true and sets `value_out` if a value is present after reading the
* entries.
*
* The initial `it` should have a value.
*/
bool container_iterator_read_into_bool(const container_t *c, uint8_t typecode,
Comment thread
RinChanNOWWW marked this conversation as resolved.
roaring_container_iterator_t *it,
bool *buf, const uint16_t *max_value,
uint16_t *value_out);

/**
* Skips the next `skip_count` entries in the container iterator. Returns true
* and sets `value_out` if a value is present after skipping. Returns false if
Expand Down
17 changes: 16 additions & 1 deletion include/roaring/containers/run.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ typedef struct rle16_s rle16_t;

#ifdef __cplusplus
#define CROARING_MAKE_RLE16(val, len) \
{ (uint16_t)(val), (uint16_t)(len) } // no tagged structs until c++20
{(uint16_t)(val), (uint16_t)(len)} // no tagged structs until c++20
#else
#define CROARING_MAKE_RLE16(val, len) \
(rle16_t) { .value = (uint16_t)(val), .length = (uint16_t)(len) }
Expand Down Expand Up @@ -709,6 +709,21 @@ static inline void run_container_remove_range(run_container_t *run,
}
}

/**
* Reads values from the run container into a boolean buffer.
*
* @param rc The run container to read from
* @param it Iterator state (index into the runs array)
* @param buf Boolean buffer to write to
* @param max_value Stop reading when reaching this value. If it is null, read
* the whole container.
* @param value_out Output parameter for the current/next value
* @return true if there are more values to read, false otherwise
*/
bool run_container_iterator_read_into_bool(
const run_container_t *rc, struct roaring_container_iterator_s *it,
bool *buf, const uint16_t *max_value, uint16_t *value_out);

#ifdef __cplusplus
}
}
Expand Down
33 changes: 33 additions & 0 deletions include/roaring/roaring.h
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,24 @@ void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans);
*/
bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset);

/**
* Convert the bitmap within the range [range_start, range_end) to a dense bool
Comment thread
RinChanNOWWW marked this conversation as resolved.
Outdated
* array and output in `ans`.
*
* For each value at position `i` (where i ranges from 0 to
* range_end-range_start) in the output array, `ans[i]` is set to true if the
* (range_start + i)-th element in the bitmap exists, and false otherwise.
Comment thread
RinChanNOWWW marked this conversation as resolved.
Outdated
*
* Caller is responsible to ensure that there is enough memory allocated, e.g.
*
* ans = malloc((range_end - range_start) * sizeof(bool));
*
* For more control, see `roaring_uint32_iterator_move_equalorlarger` and
* `roaring_uint32_iterator_read_into_bool`.
*/
void roaring_bitmap_range_bool_array(const roaring_bitmap_t *r,
uint32_t range_start, uint32_t range_end,
bool *ans);
/**
* Convert the bitmap to a sorted array from `offset` by `limit`, output in
* `ans`.
Expand Down Expand Up @@ -1208,6 +1226,21 @@ CROARING_DEPRECATED static inline void roaring_free_uint32_iterator(
uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it,
uint32_t *buf, uint32_t count);

/**
* Reads until the last value that is strictly smaller than `max_value` and
* fill the bool array `buf`.
*
* This function satisfies semantics of iteration and can be used together with
* other iterator functions.
*
* Let `it1` be the initial iterator and it has value, then for every iterated
* `it`, buf[it1.current_value - it.current_value] will be set to true; other
* positions will remain to be false.
* - after function returns, iterator is positioned at the next element
*/
void roaring_uint32_iterator_read_into_bool(roaring_uint32_iterator_t *it,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function roaring_bitmap_range_bool_array(r1, range_start, range_end, bool_array) is fine and easy to understand, but I don't understand the use case here, and I don't understand from the description what it does.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I presume that the user is expected to have an iterator that points at some initial value, but I don't understand how they would do it cleanly and what the purpose is.

Copy link
Copy Markdown
Author

@RinChanNOWWW RinChanNOWWW Dec 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is used to iterate the giving it until max_value. When the function returns, the status of final it will be:

  • it->has_value == false
  • or
  • it->has_value == true && it->current_value >= max_value && prev(it)->current_value < max_value

There is a diagram to show what this function does:

                                     final_it(8)
                             it(4)  max_value(8)
                               │       │        
                               ▼       ▼        
               Values:   1 2 3 4 5 6 7 8 9      
               Roaring:    x   x     x x x      
 The result bool array:       [1 0 0 1]         
Size of the bool array: 4      ▲                
                               │                
                      Start of the bool array   

I will improve the comments and make it more clear.

Comment thread
RinChanNOWWW marked this conversation as resolved.
Outdated
bool *buf, uint32_t max_value);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The use of max_value feels odd, I would expect an API like this to take a count of booleans, e.g. to be consistent with roaring_uint32_iterator_read. Is there a reason to prefer taking a max_value here? It makes it less obvious for the caller to ensure they provide the right amount of space for the buffer.

Copy link
Copy Markdown
Author

@RinChanNOWWW RinChanNOWWW Jan 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I use max_value here because I think the meaning is a little different with the meaning of count of roaring_uint32_iterator_read. roaring_uint32_iterator_read will actually iterate count elements in the roaring bitmap while how many elements should be iterated is unknown, and we need to iterate the iterator until it->value >= max_value.


/** DEPRECATED, use `roaring_uint32_iterator_read`. */
CROARING_DEPRECATED static inline uint32_t roaring_read_uint32_iterator(
roaring_uint32_iterator_t *it, uint32_t *buf, uint32_t count) {
Expand Down
15 changes: 15 additions & 0 deletions microbenchmarks/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,21 @@ struct to_array64 {
auto ToArray64 = BasicBench<to_array64>;
BENCHMARK(ToArray64);

struct to_array_bool {
static uint64_t run() {
uint64_t marker = 0;
for (size_t i = 0; i < count; ++i) {
uint64_t card = roaring_bitmap_get_cardinality(bitmaps[i]);
roaring_bitmap_range_bool_array(bitmaps[i], 0, card,
array_buffer_bool);
marker += array_buffer_bool[0];
Comment thread
RinChanNOWWW marked this conversation as resolved.
Outdated
}
return marker;
}
};
auto ToArrayBool = BasicBench<to_array_bool>;
BENCHMARK(ToArrayBool);

struct iterate_all {
static uint64_t run() {
uint64_t marker = 0;
Expand Down
2 changes: 2 additions & 0 deletions microbenchmarks/bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ roaring64_bitmap_t **bitmaps64 = NULL;
Roaring64Map **bitmaps64cpp = NULL;
uint32_t *array_buffer;
uint64_t *array_buffer64;
bool *array_buffer_bool;
uint32_t maxvalue = 0;
uint32_t maxcard = 0;

Expand Down Expand Up @@ -200,6 +201,7 @@ static roaring_bitmap_t **create_all_bitmaps(size_t *howmany,
}
array_buffer = (uint32_t *)malloc(maxcard * sizeof(uint32_t));
array_buffer64 = (uint64_t *)malloc(maxcard * sizeof(uint64_t));
array_buffer_bool = (bool *)malloc(maxvalue + 1);
return answer;
}

Expand Down
28 changes: 28 additions & 0 deletions src/containers/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,34 @@ bool array_container_iterate64(const array_container_t *cont, uint32_t base,
return true;
}

CROARING_ALLOW_UNALIGNED
bool array_container_iterator_read_into_bool(const array_container_t *ac,
roaring_container_iterator_t *it,
bool *buf,
const uint16_t *max_value,
uint16_t *value_out) {
int32_t initial_index = it->index;

if (max_value == NULL) {
// TODO: SIMD optimization
while (it->index < ac->cardinality) {
buf[ac->array[it->index] - ac->array[initial_index]] = true;
it->index++;
}
return false;
Comment thread
RinChanNOWWW marked this conversation as resolved.
}

while (it->index < ac->cardinality && ac->array[it->index] < *max_value) {
buf[ac->array[it->index] - ac->array[initial_index]] = true;
it->index++;
}
if (it->index < ac->cardinality) {
*value_out = ac->array[it->index];
return true;
}
return false;
}

#ifdef __cplusplus
}
}
Expand Down
47 changes: 47 additions & 0 deletions src/containers/bitset.c
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,53 @@ bool bitset_container_intersect(const bitset_container_t *src_1,
return false;
}

CROARING_ALLOW_UNALIGNED
bool bitset_container_iterator_read_into_bool(const bitset_container_t *bc,
roaring_container_iterator_t *it,
bool *buf,
const uint16_t *max_value,
uint16_t *value_out) {
uint32_t max_wordindex = BITSET_CONTAINER_SIZE_IN_WORDS;
// If max_value is not NULL, get the wordindex of the max_value.
if (max_value != NULL) {
max_wordindex = *max_value / 64;
assert(max_wordindex < BITSET_CONTAINER_SIZE_IN_WORDS);
}
uint32_t wordindex = it->index / 64;
uint64_t word = bc->words[wordindex] & (UINT64_MAX << (it->index % 64));
uint16_t initial_value = it->index;
if (max_wordindex > 0) {
while (wordindex < max_wordindex) {
// TODO: SIMD optimization
while (word != 0) {
*value_out = wordindex * 64 + roaring_trailing_zeroes(word);
buf[*value_out - initial_value] = true;
word = word & (word - 1);
}
wordindex++;
if (wordindex < BITSET_CONTAINER_SIZE_IN_WORDS) {
word = bc->words[wordindex];
}
}
}
// All the words are processed.
if (max_value == NULL) return false;
// Process the last word (which is at max_wordindex)
while (word != 0) {
*value_out = wordindex * 64 + roaring_trailing_zeroes(word);
if (*value_out >= *max_value) {
it->index = *value_out;
return true;
}
buf[*value_out - initial_value] = true;
word = word & (word - 1);
}
// If max_value is not NULL, its wordindex must be less than
// BITSET_CONTAINER_SIZE_IN_WORDS. So if reach this line, the bitset must be
// drained.
return false;
}

#if CROARING_IS_X64
#ifndef CROARING_WORDS_IN_AVX2_REG
#define CROARING_WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
Expand Down
22 changes: 22 additions & 0 deletions src/containers/containers.c
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,28 @@ bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
}
}

bool container_iterator_read_into_bool(const container_t *c, uint8_t typecode,
roaring_container_iterator_t *it,
bool *buf, const uint16_t *max_value,
uint16_t *value_out) {
c = container_unwrap_shared(c, &typecode);
switch (typecode) {
case BITSET_CONTAINER_TYPE:
return bitset_container_iterator_read_into_bool(
const_CAST_bitset(c), it, buf, max_value, value_out);
case ARRAY_CONTAINER_TYPE:
return array_container_iterator_read_into_bool(
const_CAST_array(c), it, buf, max_value, value_out);
case RUN_CONTAINER_TYPE:
return run_container_iterator_read_into_bool(
const_CAST_run(c), it, buf, max_value, value_out);
default:
assert(false);
roaring_unreachable;
return false;
}
}

bool container_iterator_skip(const container_t *c, uint8_t typecode,
roaring_container_iterator_t *it,
uint32_t skip_count, uint32_t *consumed_count,
Expand Down
Loading