Add dict_erase tests

This commit is contained in:
jun 2025-03-03 16:14:13 +01:00
parent a41ff868f2
commit 8e6ffcdc70
3 changed files with 196 additions and 27 deletions

View File

@ -152,6 +152,7 @@ GUF_DICT_KWRDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _iter_to_idx)(const GUF_DICT_NAM
GUF_DICT_KWRDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val_if)(GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) begin, GUF_CAT(GUF_DICT_NAME, _iter) end, bool (*predicate)(const GUF_DICT_VAL_T *)); GUF_DICT_KWRDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val_if)(GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) begin, GUF_CAT(GUF_DICT_NAME, _iter) end, bool (*predicate)(const GUF_DICT_VAL_T *));
#endif #endif
GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _debug_valid_size)(const GUF_DICT_NAME *ht);
// #define GUF_DICT_IMPL /* DEBUGGGGGGGGG */ // #define GUF_DICT_IMPL /* DEBUGGGGGGGGG */
@ -159,6 +160,17 @@ GUF_DICT_KWRDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _iter_to_idx)(const GUF_DICT_NAM
#include "guf_assert.h" #include "guf_assert.h"
GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _debug_valid_size)(const GUF_DICT_NAME *ht)
{
ptrdiff_t cnt = 0;
for (ptrdiff_t i = 0; i < ht->kv_indices_cap; ++i) {
if (ht->kv_indices[i].kv_idx != GUF_DICT_KV_IDX_NULL && ht->kv_indices[i].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE) {
++cnt;
}
}
return cnt == ht->kv_elems.size;
}
static inline void GUF_CAT(GUF_DICT_KV_NAME, _free)(GUF_DICT_KV_NAME *kv, void *ctx) static inline void GUF_CAT(GUF_DICT_KV_NAME, _free)(GUF_DICT_KV_NAME *kv, void *ctx)
{ {
(void)ctx; (void)ctx;
@ -278,7 +290,7 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC
const size_t start_idx = idx; const size_t start_idx = idx;
size_t first_tombstone_idx = SIZE_MAX; size_t first_tombstone_idx = SIZE_MAX;
size_t probe_len = 0; size_t probe_len = 0;
// size_t seen_occupied = 0; // This allows us to bail out early once we visited every non-null/non-tombstone kv_idx.
do { do {
if (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_NULL) { // 1.) Empty. if (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_NULL) { // 1.) Empty.
if (first_tombstone_idx != SIZE_MAX) { if (first_tombstone_idx != SIZE_MAX) {
@ -293,15 +305,17 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC
first_tombstone_idx = idx; first_tombstone_idx = idx;
} }
goto probe; goto probe;
} else if (hash == ht->kv_indices[idx].key_hash && GUF_DICT_KEY_T_EQ(key, &GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, ht->kv_indices[idx].kv_idx)->key)) { // 3.) Key already exists. } else if (hash == ht->kv_indices[idx].key_hash && GUF_DICT_KEY_T_EQ(key, &(GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, ht->kv_indices[idx].kv_idx)->key))) { // 3.) Key already exists.
ht->max_probelen = GUF_MAX((ptrdiff_t)probe_len, ht->max_probelen); ht->max_probelen = GUF_MAX((ptrdiff_t)probe_len, ht->max_probelen);
*key_exists = true; *key_exists = true;
return idx; return idx;
} else { // 4.) Have to probe due to hash-collision (idx is already occupied, but not by the key). } else { // 4.) Have to probe due to hash-collision/tombstone.
probe: probe:
++probe_len; ++probe_len;
// NOTE: Add the probe_offset to start_idx and not to idx. // if (ht->kv_indices[idx].kv_idx != GUF_DICT_KV_IDX_NULL && ht->kv_indices[idx].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE) {
idx = GUF_MOD_CAP(start_idx + GUF_CAT(GUF_DICT_NAME, _probe_offset)(probe_len)); // ++seen_occupied; // && seen_occupied <= (size_t)ht->kv_elems.size
// }
idx = GUF_MOD_CAP(start_idx + GUF_CAT(GUF_DICT_NAME, _probe_offset)(probe_len)); // NOTE: Add probe_offset to start_idx and not to idx.
GUF_ASSERT((ptrdiff_t)probe_len <= (ht->kv_elems.size + ht->num_tombstones)); GUF_ASSERT((ptrdiff_t)probe_len <= (ht->kv_elems.size + ht->num_tombstones));
} }
} while (idx != start_idx && probe_len < (size_t)ht->kv_indices_cap); } while (idx != start_idx && probe_len < (size_t)ht->kv_indices_cap);
@ -317,19 +331,10 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC
#undef GUF_MOD_CAP #undef GUF_MOD_CAP
} }
GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt, guf_err *err)
static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, guf_err *err)
{ {
GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht));
if (!key || !val) {
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in function dict_try_insert: key or val argument is NULL"));
return;
}
if ((size_t)ht->kv_elems.size == GUF_DICT_MAX_SIZE) {
guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in function dict_try_insert: dict has reached its max size (UINT64_MAX - 2 or UINT32_MAX - 2)"));
return;
}
#ifdef GUF_DICT_PROBE_LINEAR #ifdef GUF_DICT_PROBE_LINEAR
const double MAX_LOAD_FAC = 0.6; const double MAX_LOAD_FAC = 0.6;
#else #else
@ -343,7 +348,7 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
if (ht->kv_indices_cap == 0) { // 1.a) Allocate initial kv-index-buffer. if (ht->kv_indices_cap == 0) { // 1.a) Allocate initial kv-index-buffer.
GUF_DICT_KV_META_T *new_kv_indices = allocator->alloc(KV_META_START_CAP * sizeof(GUF_DICT_KV_META_T), allocator->ctx); GUF_DICT_KV_META_T *new_kv_indices = allocator->alloc(KV_META_START_CAP * sizeof(GUF_DICT_KV_META_T), allocator->ctx);
if (new_kv_indices == NULL) { if (new_kv_indices == NULL) {
guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: Initial allocation failed")); guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_grow: Initial allocation failed"));
return; return;
} }
ht->kv_indices = new_kv_indices; ht->kv_indices = new_kv_indices;
@ -352,7 +357,7 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
new_kv_indices[i].kv_idx = GUF_DICT_KV_IDX_NULL; new_kv_indices[i].kv_idx = GUF_DICT_KV_IDX_NULL;
new_kv_indices[i].key_hash = 0; new_kv_indices[i].key_hash = 0;
} }
} else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > MAX_LOAD_FAC) { // 1.b) Grow kv-index-buffer. } else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > MAX_LOAD_FAC) { // 1.b) Grow kv-index-buffer if necessary.
GUF_ASSERT(ht->kv_indices); GUF_ASSERT(ht->kv_indices);
const ptrdiff_t old_size = ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T); const ptrdiff_t old_size = ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T);
ptrdiff_t new_size = 0; ptrdiff_t new_size = 0;
@ -377,7 +382,7 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
ht->kv_indices[i].key_hash = 0; ht->kv_indices[i].key_hash = 0;
} }
for (ptrdiff_t kv_idx = 0; kv_idx < ht->kv_elems.size; ++kv_idx) { for (ptrdiff_t kv_idx = 0; kv_idx < ht->kv_elems.size; ++kv_idx) { // Re-insert keys.
const GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx); const GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx);
GUF_ASSERT(kv); GUF_ASSERT(kv);
bool key_exists = false; bool key_exists = false;
@ -389,11 +394,33 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
} }
} }
guf_err_set_if_not_null(err, GUF_ERR_NONE);
GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) <= MAX_LOAD_FAC); GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) <= MAX_LOAD_FAC);
}
// 2.) Insert new key-value pair.
GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt, guf_err *err)
{
GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht));
if (!key || !val) {
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in function dict_try_insert: key or val argument is NULL"));
return;
}
if ((size_t)ht->kv_elems.size == GUF_DICT_MAX_SIZE) {
guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in function dict_try_insert: dict has reached its max size (UINT64_MAX - 2 or UINT32_MAX - 2)"));
return;
}
// 1.) Grow kv-index-buffer if neccessary (or make the initial allocation.)
GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(ht, err);
if (err != NULL && *err != GUF_ERR_NONE) {
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in function dict_try_insert: try_grow failed."));
return;
}
GUF_ASSERT_RELEASE(ht->kv_indices_cap > ht->kv_elems.size); GUF_ASSERT_RELEASE(ht->kv_indices_cap > ht->kv_elems.size);
// 2.) Insert new key-value pair.
bool key_exists = false; bool key_exists = false;
size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists); size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists);
if (key_exists) { if (key_exists) {
@ -463,6 +490,16 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
GUF_DICT_KV_NAME kv = {.key = key_cpy, .val = val_cpy}; GUF_DICT_KV_NAME kv = {.key = key_cpy, .val = val_cpy};
GUF_CAT(GUF_DICT_KV_DBUF, _try_push_val)(&ht->kv_elems, kv, err); GUF_CAT(GUF_DICT_KV_DBUF, _try_push_val)(&ht->kv_elems, kv, err);
if (err && *err != GUF_ERR_NONE) { // Insertion failed.
GUF_ASSERT(*err != GUF_ERR_IDX_RANGE && *err != GUF_ERR_INVALID_ARG);
#ifdef GUF_DICT_KEY_T_FREE
GUF_DICT_KEY_T_FREE(&kv.key, NULL);
#endif
#ifdef GUF_DICT_VAL_T_FREE
GUF_DICT_VAL_T_FREE(&kv.val, NULL);
#endif
}
} }
GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt) GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt)
@ -517,6 +554,11 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _contains)(GUF_DICT_NAME *ht, const G
} }
bool key_exists = false; bool key_exists = false;
const size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists); const size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists);
if (key_exists) {
GUF_ASSERT(idx != SIZE_MAX);
GUF_ASSERT(ht->kv_indices[idx].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE);
GUF_ASSERT(ht->kv_indices[idx].kv_idx != GUF_DICT_KV_IDX_NULL);
}
(void)idx; (void)idx;
return key_exists; return key_exists;
} }
@ -540,13 +582,16 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _erase)(GUF_DICT_NAME *ht, const GUF_
if (!key_exists) { if (!key_exists) {
return false; return false;
} }
GUF_ASSERT((ptrdiff_t)idx < ht->kv_indices_cap); GUF_ASSERT((ptrdiff_t)idx < ht->kv_indices_cap);
const size_t kv_idx = ht->kv_indices[idx].kv_idx; const size_t kv_idx = ht->kv_indices[idx].kv_idx;
GUF_ASSERT(kv_idx < (size_t)ht->kv_elems.size);
ht->kv_indices[idx].kv_idx = GUF_DICT_KV_IDX_TOMBSTONE; ht->kv_indices[idx].kv_idx = GUF_DICT_KV_IDX_TOMBSTONE;
ht->kv_indices[idx].key_hash = 0; ht->kv_indices[idx].key_hash = 0;
ht->num_tombstones += 1;
GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx); GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx);
GUF_ASSERT(kv); GUF_ASSERT(kv);
@ -556,16 +601,45 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _erase)(GUF_DICT_NAME *ht, const GUF_
// 1.) Switch kv_elem. // 1.) Switch kv_elem.
GUF_DICT_KV_NAME *last_kv = GUF_CAT(GUF_DICT_KV_DBUF, _back)(&ht->kv_elems); GUF_DICT_KV_NAME *last_kv = GUF_CAT(GUF_DICT_KV_DBUF, _back)(&ht->kv_elems);
GUF_ASSERT(last_kv); GUF_ASSERT(last_kv);
GUF_ASSERT(kv != last_kv);
*kv = *last_kv; *kv = *last_kv;
GUF_ASSERT(!GUF_DICT_KEY_T_EQ(key, &last_kv->key));
// 2.) Update kv_index. // 2.) Update kv_index.
bool last_key_exists = false; bool last_key_exists = false;
const size_t last_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &kv->key, &last_key_exists); const size_t last_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &last_kv->key, &last_key_exists);
GUF_ASSERT(last_idx != idx);
GUF_ASSERT(last_key_exists && (ptrdiff_t)last_idx < ht->kv_indices_cap); GUF_ASSERT(last_key_exists && (ptrdiff_t)last_idx < ht->kv_indices_cap);
GUF_ASSERT((ptrdiff_t)ht->kv_indices[last_idx].kv_idx == ht->kv_elems.size - 1);
GUF_ASSERT(ht->kv_indices[last_idx].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE && ht->kv_indices[last_idx].kv_idx != GUF_DICT_KV_IDX_NULL);
ht->kv_indices[last_idx].kv_idx = kv_idx; ht->kv_indices[last_idx].kv_idx = kv_idx;
} }
ht->kv_elems.size--; ht->kv_elems.size -= 1;
ht->num_tombstones++;
GUF_ASSERT(ht->kv_elems.size >= 0);
GUF_ASSERT(ht->num_tombstones <= ht->kv_indices_cap);
GUF_ASSERT(!GUF_CAT(GUF_DICT_NAME, _contains)(ht, key));
if (ht->kv_elems.size == 0 && ht->num_tombstones > 0) { // Optimisation: We can delete all tombstones here.
ptrdiff_t del_tombstone_cnt = 0;
for (ptrdiff_t i = 0; i < ht->kv_indices_cap && del_tombstone_cnt < ht->num_tombstones; ++i) {
GUF_ASSERT(ht->kv_indices[i].kv_idx == GUF_DICT_KV_IDX_TOMBSTONE || ht->kv_indices[i].kv_idx == GUF_DICT_KV_IDX_NULL);
if (ht->kv_indices[i].kv_idx == GUF_DICT_KV_IDX_TOMBSTONE) {
ht->kv_indices[i].kv_idx = GUF_DICT_KV_IDX_NULL;
ht->kv_indices[i].key_hash = 0;
++del_tombstone_cnt;
} else {
GUF_ASSERT(ht->kv_indices[i].kv_idx == GUF_DICT_KV_IDX_NULL);
}
}
GUF_ASSERT(del_tombstone_cnt == ht->num_tombstones);
ht->num_tombstones = 0;
}
return true; return true;
} }

View File

@ -33,6 +33,7 @@ struct DictSvToIntTest : public Test
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]}; guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]};
dbuf_str_view_push_val(&delims, d); dbuf_str_view_push_val(&delims, d);
} }
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size}; guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
guf_str_view tok; guf_str_view tok;
while ((tok = guf_str_next_tok(&input_str, delims.data, delims.size, NULL, -1)).len) { while ((tok = guf_str_next_tok(&input_str, delims.data, delims.size, NULL, -1)).len) {
@ -53,10 +54,12 @@ struct DictSvToIntTest : public Test
} }
// printf("tok_len: %td ", tok.len); // printf("tok_len: %td ", tok.len);
// printf("'%.*s'\n", (int)tok.len, tok.str); // printf("'%.*s'\n", (int)tok.len, tok.str);
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
} }
dbuf_str_view_free(&delims, NULL); dbuf_str_view_free(&delims, NULL);
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map)); TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map));
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
for (const auto & [word, cnt] : word_cnt_map ) { for (const auto & [word, cnt] : word_cnt_map ) {
guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()}; guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()};
@ -79,10 +82,101 @@ struct DictSvToIntTest : public Test
} }
TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict)); TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict));
TEST_CHECK(i == std::ssize(word_cnt_map)); TEST_CHECK(i == std::ssize(word_cnt_map));
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << "\n"; std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << "\n";
std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n"; std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n";
const double load_fac_before_erase = dict_sv_i32_load_factor(&word_cnt_dict);
const ptrdiff_t size_before_erase = dict_sv_i32_size(&word_cnt_dict);
ptrdiff_t num_del = 0;
while (dict_sv_i32_size(&word_cnt_dict) > size_before_erase / 2) {
dict_sv_i32_kv *kv = NULL;
if (num_del % 2) {
dict_sv_i32_iter it = dict_sv_i32_begin(&word_cnt_dict);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
kv = it.ptr;
} else {
dict_sv_i32_iter rit = dict_sv_i32_rbegin(&word_cnt_dict);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, rit));
kv = rit.ptr;
}
GUF_ASSERT_RELEASE(kv);
const guf_str_view key = kv->key;
const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
std::string_view sv(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
TEST_CHECK(!word_cnt_map.contains(sv));
if (del_success) {
++num_del;
}
}
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) >= 0);
TEST_CHECK(size_before_erase - num_del == dict_sv_i32_size(&word_cnt_dict));
TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict));
if (dict_sv_i32_size(&word_cnt_dict) != 0) {
TEST_CHECK(load_fac_before_erase == dict_sv_i32_load_factor(&word_cnt_dict));
} else {
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
}
if (dict_sv_i32_size(&word_cnt_dict) >= 4) {
dict_sv_i32_kv_dbuf_iter it = dict_sv_i32_begin(&word_cnt_dict);
it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
guf_str_view key = it.ptr->key;
bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
std::string_view sv(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
it = dict_sv_i32_rbegin(&word_cnt_dict);
it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
key = it.ptr->key;
del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
sv = std::string_view(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
}
TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict));
i = 0;
GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) {
const dict_sv_i32_kv *kv = kv_it.ptr;
if (TEST_CHECK(kv)) {
const int32_t cnt = kv->val;
const std::string_view sv(kv->key.str, (size_t)kv->key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
TEST_CHECK(word_cnt_map.at(sv) == cnt);
}
++i;
}
}
TEST_CHECK(i == word_cnt_dict.kv_elems.size);
TEST_CHECK(i == std::ssize(word_cnt_map));
dict_sv_i32_free(&word_cnt_dict, NULL); dict_sv_i32_free(&word_cnt_dict, NULL);
bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size; bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size;
TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones); TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones);

View File

@ -3,3 +3,4 @@
- unicode normalisation - unicode normalisation
- fix 32-bit dict (and add 32/64 bit defs in common.h) - fix 32-bit dict (and add 32/64 bit defs in common.h)
- guf_dict: allow manual resize (and possibly resize if load fac gets to high after erase)