Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
295 changes: 279 additions & 16 deletions test/src/unit-capi-consolidation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* The MIT License
*
* @copyright Copyright (c) 2017-2021 TileDB Inc.
* @copyright Copyright (c) 2017-2025 TileDB Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -145,6 +145,11 @@ struct ConsolidationFx {
void get_array_meta_files_dense(std::vector<std::string>& files);
void get_array_meta_vac_files_dense(std::vector<std::string>& files);
void get_vac_files(std::vector<std::string>& files, bool dense = true);
void write_and_consolidate_fragments(
const char* array_name,
uint64_t num_small_cells,
uint64_t long_string_length,
uint64_t consolidation_budget);

// Used to get the number of directories or files of another directory
struct get_num_struct {
Expand Down Expand Up @@ -7587,27 +7592,285 @@ TEST_CASE_METHOD(
}
}

/**
* Helper method which attempts to validate fragment consolidation by writing
* `num_small_cells` small cells before writing one large cell of length
* `long_string_length`. Consolidation will succeed up to some value of
* `long_string_length`, and fail after by disrespecting the memory budget.
*
* @param array_name The name of the array.
* @param num_small_cells The number of small cells to consolidate.
* @param long_string_length The length of the long string to write.
* @param consolidation_budget The total budget to set for consolidation.
*/
void ConsolidationFx::write_and_consolidate_fragments(
const char* array_name,
uint64_t num_small_cells,
uint64_t long_string_length,
uint64_t consolidation_budget) {
std::string words[8] = {
"foo", "bar", "apple", "orange", "banana", "red", "yellow", "blue"};

tiledb_config_t* cfg;
tiledb_error_t* err = nullptr;
int rc = tiledb_config_alloc(&cfg, &err);
REQUIRE(rc == TILEDB_OK);
REQUIRE(err == nullptr);

// Create array
tiledb_dimension_t* dim;
uint64_t tile_extent = std::max(num_small_cells, long_string_length);
uint64_t dim_domain[] = {0, tile_extent};
rc = tiledb_dimension_alloc(
ctx_, "dim", TILEDB_UINT64, &dim_domain, &tile_extent, &dim);
CHECK(rc == TILEDB_OK);
Comment thread
bekadavis9 marked this conversation as resolved.
Outdated
tiledb_domain_t* domain;
rc = tiledb_domain_alloc(ctx_, &domain);
CHECK(rc == TILEDB_OK);
rc = tiledb_domain_add_dimension(ctx_, domain, dim);
CHECK(rc == TILEDB_OK);
tiledb_attribute_t* attr;
rc = tiledb_attribute_alloc(ctx_, "attr", TILEDB_CHAR, &attr);
CHECK(rc == TILEDB_OK);
rc = set_attribute_compression_filter(ctx_, attr, TILEDB_FILTER_GZIP, -1);
CHECK(rc == TILEDB_OK);
rc = tiledb_attribute_set_cell_val_num(ctx_, attr, TILEDB_VAR_NUM);
CHECK(rc == TILEDB_OK);
tiledb_array_schema_t* array_schema;
rc = tiledb_array_schema_alloc(ctx_, TILEDB_SPARSE, &array_schema);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_schema_set_cell_order(ctx_, array_schema, TILEDB_ROW_MAJOR);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_schema_set_tile_order(ctx_, array_schema, TILEDB_ROW_MAJOR);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_schema_set_capacity(ctx_, array_schema, 2);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_schema_set_domain(ctx_, array_schema, domain);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_schema_add_attribute(ctx_, array_schema, attr);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_schema_check(ctx_, array_schema);
CHECK(rc == TILEDB_OK);

if (encryption_type_ != TILEDB_NO_ENCRYPTION) {
std::string encryption_type_string =
encryption_type_str((tiledb::sm::EncryptionType)encryption_type_);
rc = tiledb_config_set(
cfg, "sm.encryption_type", encryption_type_string.c_str(), &err);
REQUIRE(err == nullptr);
rc = tiledb_config_set(cfg, "sm.encryption_key", encryption_key_, &err);
REQUIRE(rc == TILEDB_OK);
REQUIRE(err == nullptr);
// Do not remove the array when recreating context to set the new config
vfs_test_setup_.update_config(cfg);
ctx_ = vfs_test_setup_.ctx_c;
vfs_ = vfs_test_setup_.vfs_c;
}
rc = tiledb_array_create(ctx_, array_name, array_schema);
REQUIRE(rc == TILEDB_OK);
tiledb_attribute_free(&attr);
tiledb_dimension_free(&dim);
tiledb_domain_free(&domain);
tiledb_array_schema_free(&array_schema);

// Prepare to write small cells to the array
std::string test_str = "";
std::vector<uint64_t> offsets;
offsets.reserve(num_small_cells);
std::vector<uint64_t> coords;
coords.reserve(num_small_cells);
offsets.push_back(0);
for (uint64_t i = 0; i < num_small_cells; i++) {
std::string word = words[i % 8];
test_str += word;
coords.push_back(i + 1);
if (i != num_small_cells - 1) {
offsets.push_back(offsets[i] + word.length());
}
}
std::vector<char> test_vec(test_str.begin(), test_str.end());
uint64_t values_size = test_vec.size();
uint64_t offsets_size = sizeof(uint64_t) * offsets.size();
uint64_t coords_size = sizeof(uint64_t) * coords.size();

// Write small cells to the array
tiledb_array_t* array;
rc = tiledb_array_alloc(ctx_, array_name, &array);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_open(ctx_, array, TILEDB_WRITE);
REQUIRE(rc == TILEDB_OK);
tiledb_query_t* query;
rc = tiledb_query_alloc(ctx_, array, TILEDB_WRITE, &query);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_layout(ctx_, query, TILEDB_GLOBAL_ORDER);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_data_buffer(
ctx_, query, "attr", test_str.data(), &values_size);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_offsets_buffer(
ctx_, query, "attr", offsets.data(), &offsets_size);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_data_buffer(
ctx_, query, "dim", coords.data(), &coords_size);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_submit_and_finalize(ctx_, query);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_close(ctx_, array);
CHECK(rc == TILEDB_OK);
tiledb_array_free(&array);
tiledb_query_free(&query);

// Prepare to write long string to the array
const std::string test_chars = "abcdefghijklmnopqrstuvwxyz";
std::random_device rd;
Comment thread
bekadavis9 marked this conversation as resolved.
Outdated
std::mt19937 gen(rd());
std::uniform_int_distribution<size_t> dist(0, test_chars.length() - 1);
std::vector<char> long_string;
for (uint64_t i = 0; i < long_string_length; i++) {
long_string.emplace_back(test_chars[dist(gen)]);
}
uint64_t str_size = long_string.size();
uint64_t offset = 0;
uint64_t offset_size = sizeof(uint64_t);
uint64_t coord = coords.back();
uint64_t coord_size = sizeof(uint64_t);

// Write long string to the array
rc = tiledb_array_alloc(ctx_, array_name, &array);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_open(ctx_, array, TILEDB_WRITE);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_alloc(ctx_, array, TILEDB_WRITE, &query);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_layout(ctx_, query, TILEDB_GLOBAL_ORDER);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_data_buffer(
ctx_, query, "attr", long_string.data(), &str_size);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_offsets_buffer(
ctx_, query, "attr", &offset, &offset_size);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_set_data_buffer(ctx_, query, "dim", &coord, &coord_size);
CHECK(rc == TILEDB_OK);
rc = tiledb_query_submit_and_finalize(ctx_, query);
CHECK(rc == TILEDB_OK);
rc = tiledb_array_close(ctx_, array);
CHECK(rc == TILEDB_OK);
tiledb_array_free(&array);
tiledb_query_free(&query);

// Consolidate
rc = tiledb_config_set(
cfg,
"sm.mem.total_budget",
std::to_string(consolidation_budget).c_str(),
&err);
REQUIRE(rc == TILEDB_OK);
REQUIRE(err == nullptr);
rc = tiledb_config_set(cfg, "sm.consolidation.step_min_frags", "2", &err);
REQUIRE(rc == TILEDB_OK);
REQUIRE(err == nullptr);
tiledb_array_consolidate(ctx_, array_name, cfg);
Comment thread
bekadavis9 marked this conversation as resolved.
Outdated
tiledb_config_free(&cfg);
Comment thread
bekadavis9 marked this conversation as resolved.
}

TEST_CASE_METHOD(
Comment thread
bekadavis9 marked this conversation as resolved.
ConsolidationFx,
"C API: Test consolidation, sparse string, no progress",
"[capi][consolidation][sparse][string][no-progress][non-rest]") {
remove_sparse_string_array();
create_sparse_string_array();
"C API: Test sparse fragment consolidation",
"[capi][consolidation][fragment][sparse][non-rest]") {
const char* array_name = "fragment_consolidation_array";
remove_array(array_name);

uint64_t num_small_cells = 10000;
uint64_t long_string_length = 10000;
uint64_t consolidation_budget = 10000000;
std::string expected_error_msg = "";

SECTION(
"Success: "
"num small cells = 10000, "
"long string length = 10000, "
"consolidation_budget = 10000000 ") {
num_small_cells = 10000;
long_string_length = 10000;
consolidation_budget = 10000000;
write_and_consolidate_fragments(
array_name, num_small_cells, long_string_length, consolidation_budget);
}

write_sparse_string_full();
write_sparse_string_unordered();
consolidate_sparse_string(1, true);
// Err: SparseGlobalOrderReader: Unable to copy one slab with current
// budget/buffers
SECTION(
"Error after buffer growth: "
"num small cells = 10000, "
"long string length = 5000000, "
"consolidation_budget = 10000000 ") {
expected_error_msg = " Unable to copy one slab with current budget/buffers";
num_small_cells = 10000;
long_string_length = 5000000;
consolidation_budget = 10000000;
write_and_consolidate_fragments(
array_name, num_small_cells, long_string_length, consolidation_budget);
}

tiledb_error_t* err = NULL;
tiledb_ctx_get_last_error(ctx_, &err);
// Error: FragmentMetadata: Cannot load R-tree; Insufficient memory budget;
// Needed 888952 but only had 98395 from budget 499999
SECTION(
"Error attempting to load R-tree: "
"num small cells = 10000, "
Comment thread
bekadavis9 marked this conversation as resolved.
Outdated
"long string length = 5000000, "
Comment thread
bekadavis9 marked this conversation as resolved.
Outdated
"consolidation_budget = 10000000 ") {
expected_error_msg = "Cannot load R-tree; Insufficient memory budget";
num_small_cells = 100000;
long_string_length = 100000;
consolidation_budget = 10000000;
write_and_consolidate_fragments(
array_name, num_small_cells, long_string_length, consolidation_budget);
}

const char* msg;
tiledb_error_message(err, &msg);
CHECK(
std::string("FragmentConsolidator: Consolidation read 0 cells, no "
"progress can be made") == msg);
// SparseGlobalOrderReader: Cannot load tile offsets, computed size (16800) is
// larger than available memory (10459), increase memory budget.
// Total budget for array data (24999).
SECTION(
"Error loading tile offsets: "
"num small cells = 10000, "
Comment thread
bekadavis9 marked this conversation as resolved.
Outdated
"long string length = 5000000, "
Comment thread
bekadavis9 marked this conversation as resolved.
Outdated
"consolidation_budget = 10000000 ") {
Comment thread
bekadavis9 marked this conversation as resolved.
Outdated
expected_error_msg = "Cannot load tile offsets";

num_small_cells = 1000;
long_string_length = 1000;
consolidation_budget = 500000;
write_and_consolidate_fragments(
array_name, num_small_cells, long_string_length, consolidation_budget);
}

remove_sparse_string_array();
// FragmentConsolidator: Consolidation read 0 cells; no progress can be made
// without disrespecting the memory budget.
// -> get above error if num_small_cells is too large
SECTION(
"Error after buffer growth: "
"num small cells = 2, "
"long string length = 20000, "
"consolidation_budget = 50000 ") {
expected_error_msg = "Consolidation read 0 cells";

num_small_cells = 2;
long_string_length = 20000;
consolidation_budget = 50000;
write_and_consolidate_fragments(
array_name, num_small_cells, long_string_length, consolidation_budget);
}

if (expected_error_msg != "") {
tiledb_error_t* err = nullptr;
tiledb_ctx_get_last_error(ctx_, &err);
const char* actual_error_msg = nullptr;
tiledb_error_message(err, &actual_error_msg);
CHECK(strstr(actual_error_msg, expected_error_msg.c_str()) != NULL);
Comment thread
bekadavis9 marked this conversation as resolved.
Outdated
}

remove_array(array_name);
}

TEST_CASE_METHOD(
Expand Down
2 changes: 1 addition & 1 deletion test/support/src/error_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
do { \
auto rc = (thing); \
auto maybe_err = tiledb::test::error_if_any(ctx, rc); \
ASSERTER(!maybe_err.has_value()); \
ASSERTER(maybe_err == std::optional<std::string>{}); \
} while (0)

namespace tiledb::test {
Expand Down
Loading
Loading