Fix guf_dict_init_with_capacity kv_idx_cap calculation

This commit is contained in:
jun 2025-03-29 17:32:13 +01:00
parent ae7814fe7c
commit f3e184da73
2 changed files with 17 additions and 5 deletions

View File

@ -278,12 +278,11 @@ GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init_with_capacity)(GU
const size_t desired_idx_cap = (size_t)guf_min_f64(kv_elem_capacity * 1.0 / GUF_DICT_MAX_LOAD_FACTOR, MAX_IDX_CAP); const size_t desired_idx_cap = (size_t)guf_min_f64(kv_elem_capacity * 1.0 / GUF_DICT_MAX_LOAD_FACTOR, MAX_IDX_CAP);
// Capacities must be powers of two. // Capacities must be powers of two.
size_t kv_idx_cap = 1; size_t kv_idx_cap = 1;
while ((kv_idx_cap <= MAX_IDX_CAP / 2) && ((kv_idx_cap << 1) <= desired_idx_cap)) { while ((kv_idx_cap <= MAX_IDX_CAP / 2) && (kv_idx_cap <= desired_idx_cap)) {
kv_idx_cap <<= 1; kv_idx_cap <<= 1;
} }
GUF_ASSERT_RELEASE(kv_idx_cap >= (size_t)ht->kv_elems.capacity && kv_idx_cap <= MAX_IDX_CAP);
GUF_ASSERT_RELEASE(guf_is_pow2_size_t(kv_idx_cap)); GUF_ASSERT_RELEASE(guf_is_pow2_size_t(kv_idx_cap));
GUF_ASSERT_RELEASE(kv_idx_cap >= (size_t)ht->kv_elems.capacity && kv_idx_cap <= MAX_IDX_CAP);
const size_t num_bytes = kv_idx_cap * sizeof(GUF_DICT_KV_META_T); const size_t num_bytes = kv_idx_cap * sizeof(GUF_DICT_KV_META_T);
GUF_ASSERT_RELEASE(!guf_mul_is_overflow_size_t(kv_idx_cap, sizeof(GUF_DICT_KV_META_T)) && num_bytes <= GUF_ALLOC_MAX_BYTES(GUF_DICT_KV_META_T)); GUF_ASSERT_RELEASE(!guf_mul_is_overflow_size_t(kv_idx_cap, sizeof(GUF_DICT_KV_META_T)) && num_bytes <= GUF_ALLOC_MAX_BYTES(GUF_DICT_KV_META_T));

View File

@ -18,11 +18,15 @@ private:
dbuf_char text_buf {}; dbuf_char text_buf {};
std::vector<char> text_vec {}; std::vector<char> text_vec {};
void insert_lookup() void insert_lookup(std::optional<ptrdiff_t> inital_dict_cap = {})
{ {
std::unordered_map<std::string_view, int32_t> word_cnt_map {}; std::unordered_map<std::string_view, int32_t> word_cnt_map {};
dict_sv_i32 word_cnt_dict {}; dict_sv_i32 word_cnt_dict {};
if (inital_dict_cap) {
dict_sv_i32_init_with_capacity(&word_cnt_dict, &guf_allocator_libc, inital_dict_cap.value());
} else {
dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc); dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc);
}
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc); dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) { for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) {
@ -347,11 +351,18 @@ public:
if (TEST_CHECK(load_file(TEST_DATA_DIR "/utf8-test.txt"))) { if (TEST_CHECK(load_file(TEST_DATA_DIR "/utf8-test.txt"))) {
insert_lookup(); insert_lookup();
for (ptrdiff_t i = 0; i <= 64; ++i) {
insert_lookup(i);
}
insert_lookup(512);
insert_lookup(1997);
insert_lookup(1999);
} }
free_file(); free_file();
if (TEST_CHECK(load_file(TEST_DATA_DIR "/bartleby.txt"))) { if (TEST_CHECK(load_file(TEST_DATA_DIR "/bartleby.txt"))) {
insert_lookup(); insert_lookup();
insert_lookup(201);
} }
free_file(); free_file();
@ -360,3 +371,5 @@ public:
return passed; return passed;
} }
}; };