#include "test_dict.hpp" #include #include extern "C" { #include "guf_alloc_libc.h" #include "guf_str.h" #include "impls/dict_impl.h" #include "impls/dbuf_impl.h" } /* DictSvToIntTest: */ void DictSvToIntTest::run() { if (done) { return; } push_check_name("insert_lookup(\"utf8-test.txt\")"); if (TEST_CHECK(load_file(TEST_DATA_DIR "/utf8-test.txt"))) { insert_lookup(); for (ptrdiff_t i = 0; i <= 64; ++i) { insert_lookup(i); } insert_lookup(512); insert_lookup(1997); insert_lookup(1999); } free_file(); pop_check_name(); push_check_name("insert_lookup(\"bartleby.txt\")"); if (TEST_CHECK(load_file(TEST_DATA_DIR "/bartleby.txt"))) { insert_lookup(); insert_lookup(201); } free_file(); pop_check_name(); //guf_alloc_tracker_print(&allocator_ctx.tracker, NULL); TEST_CHECK(!guf_alloc_tracker_found_leak(&allocator_ctx.tracker)); } void DictSvToIntTest::insert_lookup(std::optional inital_dict_cap) { std::unordered_map word_cnt_map {}; dict_sv_i32 word_cnt_dict {}; dict_str_i32 word_cnt_dict_str {}; if (inital_dict_cap) { dict_sv_i32_init_with_capacity(&word_cnt_dict, &allocator, inital_dict_cap.value()); dict_str_i32_init_with_capacity(&word_cnt_dict_str, &allocator, inital_dict_cap.value()); } else { dict_sv_i32_init(&word_cnt_dict, &allocator); dict_str_i32_init(&word_cnt_dict_str, &allocator); } dbuf_str_view delims = dbuf_str_view_new(&allocator); for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) { guf_str_view d = {.str = GUF_UTF8_WHITESPACE[i], .len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i])}; dbuf_str_view_push_val(&delims, d); } for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) { guf_str_view d = {.str = GUF_UTF8_COMMON_PUNCT[i], .len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i])}; dbuf_str_view_push_val(&delims, d); } guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims.data, delims.size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); while (guf_str_tok_next(&tok_state, true)) { guf_str_view tok = tok_state.cur_tok; // if (tok.len <= 0) { // continue; // } std::string_view sv(tok.str , tok.len); //std::cout << sv << std::string_view(tok_state.cur_delim.str, tok_state.cur_delim.len); TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &tok) == word_cnt_map.contains(sv)); if (!dict_sv_i32_contains(&word_cnt_dict, &tok)) { dict_sv_i32_insert_val_arg(&word_cnt_dict, tok, 1, GUF_CPY_VALUE, GUF_CPY_VALUE); word_cnt_map.insert({sv, 1}); if (TEST_CHECK(!dict_str_i32_contains_val_arg(&word_cnt_dict_str, guf_str_new_readonly(tok)))) { dict_str_i32_insert_val_arg(&word_cnt_dict_str, guf_str_new(tok, &allocator), 1, GUF_CPY_MOVE, GUF_CPY_VALUE); } } else { int32_t *cnt = dict_sv_i32_at_val_arg(&word_cnt_dict, tok); if (TEST_CHECK(cnt)) { *cnt += 1; } int32_t *cnt_2 = dict_str_i32_at_val_arg(&word_cnt_dict_str, guf_str_new_readonly(tok)); if (TEST_CHECK(cnt_2)) { *cnt_2 += 1; } // else { // std::cout << "tok: " << std::string_view{tok.str, (size_t)tok.len} << "\n"; // } word_cnt_map.at(sv) += 1; } // printf("tok_len: %td ", tok.len); // printf("'%.*s'\n", (int)tok.len, tok.str); TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); TEST_CHECK(dict_str_i32_debug_valid_size(&word_cnt_dict_str)); } dbuf_str_view_free(&delims, NULL); TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map)); TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); TEST_CHECK(dict_str_i32_size(&word_cnt_dict_str) == std::ssize(word_cnt_map)); TEST_CHECK(dict_str_i32_debug_valid_size(&word_cnt_dict_str)); for (const auto & [word, cnt] : word_cnt_map ) { guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()}; int32_t *res = dict_sv_i32_at(&word_cnt_dict, &sv); int32_t *res2 = dict_str_i32_at_val_arg(&word_cnt_dict_str, guf_str_new_readonly(sv)); TEST_CHECK(res && *res == cnt); TEST_CHECK(res2 && *res2 == cnt); } ptrdiff_t i = 0; GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) { const dict_sv_i32_kv *kv = kv_it.ptr; if (TEST_CHECK(kv)) { const int32_t cnt = kv->val; const std::string_view sv(kv->key.str, kv->key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { TEST_CHECK(word_cnt_map.at(sv) == cnt); } } ++i; } TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict)); TEST_CHECK(i == std::ssize(word_cnt_map)); TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); i = 0; GUF_CNT_FOREACH(&word_cnt_dict_str, dict_str_i32, kv_it) { const dict_str_i32_kv *kv = kv_it.ptr; if (TEST_CHECK(kv)) { const int32_t cnt = kv->val; const std::string_view sv(guf_str_const_cstr(&kv->key), guf_str_len(&kv->key)); // std::cout << sv << "\n"; if (TEST_CHECK(word_cnt_map.contains(sv))) { TEST_CHECK(word_cnt_map.at(sv) == cnt); } } ++i; } TEST_CHECK(i == dict_str_i32_size(&word_cnt_dict_str)); TEST_CHECK(i == std::ssize(word_cnt_map)); TEST_CHECK(dict_str_i32_debug_valid_size(&word_cnt_dict_str)); // std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << " elem cap: " << word_cnt_dict.kv_elems.capacity << "\n"; // std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n"; // std::cout << "mem usage: " << dict_sv_i32_memory_usage(&word_cnt_dict) << "\n"; // Erase tests: const double load_fac_before_erase = dict_sv_i32_load_factor(&word_cnt_dict); const ptrdiff_t size_before_erase = dict_sv_i32_size(&word_cnt_dict); ptrdiff_t num_del = 0; while (dict_sv_i32_size(&word_cnt_dict) > size_before_erase / 2) { dict_sv_i32_kv *kv = NULL; if (num_del % 2) { dict_sv_i32_iter it = dict_sv_i32_begin(&word_cnt_dict); GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); kv = it.ptr; } else { dict_sv_i32_iter rit = dict_sv_i32_rbegin(&word_cnt_dict); GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, rit)); kv = rit.ptr; } GUF_ASSERT_RELEASE(kv); const guf_str_view key = kv->key; const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); TEST_CHECK(del_success); TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); std::string_view sv(key.str, (size_t)key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { word_cnt_map.erase(sv); } TEST_CHECK(!word_cnt_map.contains(sv)); if (del_success) { ++num_del; } } TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) >= 0); TEST_CHECK(size_before_erase - num_del == dict_sv_i32_size(&word_cnt_dict)); TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict)); if (dict_sv_i32_size(&word_cnt_dict) != 0) { TEST_CHECK(load_fac_before_erase == dict_sv_i32_load_factor(&word_cnt_dict)); } else { TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); } if (dict_sv_i32_size(&word_cnt_dict) >= 4) { dict_sv_i32_kv_dbuf_iter it = dict_sv_i32_begin(&word_cnt_dict); it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1); GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); guf_str_view key = it.ptr->key; bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); TEST_CHECK(del_success); TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); std::string_view sv(key.str, (size_t)key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { word_cnt_map.erase(sv); } it = dict_sv_i32_rbegin(&word_cnt_dict); it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1); GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); key = it.ptr->key; del_success = dict_sv_i32_erase(&word_cnt_dict, &key); TEST_CHECK(del_success); TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); sv = std::string_view(key.str, (size_t)key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { word_cnt_map.erase(sv); } } TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict)); i = 0; GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) { const dict_sv_i32_kv *kv = kv_it.ptr; if (TEST_CHECK(kv)) { const int32_t cnt = kv->val; const std::string_view sv(kv->key.str, (size_t)kv->key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { TEST_CHECK(word_cnt_map.at(sv) == cnt); } ++i; } } TEST_CHECK(i == word_cnt_dict.kv_elems.size); TEST_CHECK(i == std::ssize(word_cnt_map)); while (dict_sv_i32_size(&word_cnt_dict) > 0) { const dict_sv_i32_iter beg = dict_sv_i32_begin(&word_cnt_dict); if (TEST_CHECK(!dict_sv_i32_iter_is_end(&word_cnt_dict, beg))) { const guf_str_view key = beg.ptr->key; if (TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &key))) { const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); TEST_CHECK(del_success); TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); } const std::string_view sv(key.str, (size_t)key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { word_cnt_map.erase(sv); } } } TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0 && word_cnt_map.size() == 0); TEST_CHECK(word_cnt_dict.num_tombstones == 0); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), (size_t)64, GUF_CPY_VALUE, GUF_CPY_VALUE); dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), (size_t)128, GUF_CPY_VALUE, GUF_CPY_VALUE); dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), (size_t)256, GUF_CPY_VALUE, GUF_CPY_VALUE); dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), (size_t)512, GUF_CPY_VALUE, GUF_CPY_VALUE); dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), (size_t)1024, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 5); int32_t *val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej")); TEST_CHECK(val && *val == 64); val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum")); TEST_CHECK(val && *val == 256); val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi.")); TEST_CHECK(val && *val == 1024); val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!")); TEST_CHECK(val && *val == 128); val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med")); TEST_CHECK(val && *val == 512); TEST_CHECK(word_cnt_dict.kv_elems.size == 5); TEST_CHECK(word_cnt_dict.kv_elems.data[0].val == 64 && std::strcmp(word_cnt_dict.kv_elems.data[0].key.str, "Hej") == 0); TEST_CHECK(word_cnt_dict.kv_elems.data[1].val == 128 && std::strcmp(word_cnt_dict.kv_elems.data[1].key.str, "verden!") == 0); TEST_CHECK(word_cnt_dict.kv_elems.data[2].val == 256 && std::strcmp(word_cnt_dict.kv_elems.data[2].key.str, "Flødeskum") == 0); TEST_CHECK(word_cnt_dict.kv_elems.data[3].val == 512 && std::strcmp(word_cnt_dict.kv_elems.data[3].key.str, "med") == 0); TEST_CHECK(word_cnt_dict.kv_elems.data[4].val == 1024 && std::strcmp(word_cnt_dict.kv_elems.data[4].key.str, "Faxe Kondi.") == 0); const double load_fac_beg = dict_sv_i32_load_factor(&word_cnt_dict); const ptrdiff_t cap_begin = word_cnt_dict.kv_indices_cap; ptrdiff_t del = 0; TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); for (ptrdiff_t n = 0; n < cap_begin + 128; ++n) { dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), 64, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(word_cnt_dict.num_tombstones == --del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); } TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); for (ptrdiff_t n = 0; n < 256; ++n) { dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), 128, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(word_cnt_dict.num_tombstones == --del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); } TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); for (ptrdiff_t n = 0; n < 512 + cap_begin; ++n) { dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), 256, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(word_cnt_dict.num_tombstones == --del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); } TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); for (ptrdiff_t n = 0; n < 71; ++n) { dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), 512, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(word_cnt_dict.num_tombstones == --del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); } TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"))); TEST_CHECK(word_cnt_dict.num_tombstones == 0); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); for (ptrdiff_t n = 0; n < 201; ++n) { dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), 128, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(word_cnt_dict.num_tombstones == 0); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) > 0); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"))); TEST_CHECK(word_cnt_dict.num_tombstones == 0); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); } TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); TEST_CHECK(word_cnt_dict.kv_elems.size == 0); TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0); std::string str; for (size_t c = 0; c < GUF_STR_SSO_BUF_CAP * 4; ++c) { str += c % 2 ? "AAA" : "aaa"; } guf_str str_cpy = guf_str_new(guf_str_view{.str = str.data(), .len = (ptrdiff_t)str.size()}, &allocator); dict_str_i32_insert_val_arg(&word_cnt_dict_str, str_cpy, 42, GUF_CPY_DEEP, GUF_CPY_VALUE); int32_t *foo = dict_str_i32_at_val_arg(&word_cnt_dict_str, guf_str_new_readonly(guf_str_view_from_str(&str_cpy))); if (TEST_CHECK(foo)) { TEST_CHECK(*foo == 42); } guf_str_append(&str_cpy, GUF_CSTR_LIT_TO_VIEW_CPP("Foobar")); int32_t *foo2 = dict_str_i32_at_val_arg(&word_cnt_dict_str, guf_str_new_readonly(guf_str_view{.str = str.data(), .len = (ptrdiff_t)str.size()})); if (TEST_CHECK(foo2)) { TEST_CHECK(*foo2 == 42); } guf_str_free(&str_cpy, NULL); dict_sv_i32_free(&word_cnt_dict, NULL); dict_str_i32_free(&word_cnt_dict_str, NULL); bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size; TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones); } bool DictSvToIntTest::load_file(const char *fname) { FILE *in_file {nullptr}; if (!in_file) { in_file = fopen(fname, "r"); } GUF_ASSERT_RELEASE(in_file); dbuf_char_init(&text_buf, 128, &allocator); int c = EOF; while ((c = fgetc(in_file)) != EOF) { dbuf_char_push_val(&text_buf, (char)c); text_vec.push_back((char)c); } fclose(in_file); // dbuf_char_insert_val(&text_buf, '\xC0', 1); // text_vec.insert(text_vec.cbegin() + 1, '\xC0'); return TEST_CHECK(std::ssize(text_vec) == text_buf.size); } void DictSvToIntTest::free_file() { dbuf_char_free(&text_buf, NULL); text_buf = {}; text_vec.clear(); }