#pragma once #include #include #include "test.hpp" extern "C" { #include "guf_alloc_libc.h" #include "guf_dict_impl.h" #include "guf_str.h" } struct DictSvToIntTest : public Test { DictSvToIntTest(const std::string& name) : Test(name) {}; private: dbuf_char text_buf {}; std::vector text_vec {}; void insert_lookup() { std::unordered_map word_cnt_map {}; dict_sv_i32 word_cnt_dict {}; dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc); dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc); for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) { guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]}; dbuf_str_view_push_val(&delims, d); } for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) { guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]}; dbuf_str_view_push_val(&delims, d); } guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size}; guf_str_view tok; while ((tok = guf_str_next_tok(&input_str, delims.data, delims.size, NULL, -1)).len) { if (tok.len <= 0) { continue; } std::string_view sv(tok.str, tok.len); TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &tok) == word_cnt_map.contains(sv)); if (!dict_sv_i32_contains(&word_cnt_dict, &tok)) { dict_sv_i32_insert_val_arg(&word_cnt_dict, tok, 1, GUF_CPY_VALUE, GUF_CPY_VALUE); word_cnt_map.insert({sv, 1}); } else { int32_t *cnt = dict_sv_i32_at_val_arg(&word_cnt_dict, tok); if (TEST_CHECK(cnt)) { *cnt += 1; } word_cnt_map.at(sv) += 1; } // printf("tok_len: %td ", tok.len); // printf("'%.*s'\n", (int)tok.len, tok.str); TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); } dbuf_str_view_free(&delims, NULL); TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map)); TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); for (const auto & [word, cnt] : word_cnt_map ) { guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()}; int32_t *res = dict_sv_i32_at(&word_cnt_dict, &sv); TEST_CHECK(res && *res == cnt); } ptrdiff_t i = 0; GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) { const dict_sv_i32_kv *kv = kv_it.ptr; if (TEST_CHECK(kv)) { const int32_t cnt = kv->val; // printf("%.*s: %d\n", (int)kv->key.len, kv->key.str, cnt); const std::string_view sv(kv->key.str, kv->key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { TEST_CHECK(word_cnt_map.at(sv) == cnt); } } ++i; } TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict)); TEST_CHECK(i == std::ssize(word_cnt_map)); TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); // std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << "\n"; // std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n"; // Erase tests: const double load_fac_before_erase = dict_sv_i32_load_factor(&word_cnt_dict); const ptrdiff_t size_before_erase = dict_sv_i32_size(&word_cnt_dict); ptrdiff_t num_del = 0; while (dict_sv_i32_size(&word_cnt_dict) > size_before_erase / 2) { dict_sv_i32_kv *kv = NULL; if (num_del % 2) { dict_sv_i32_iter it = dict_sv_i32_begin(&word_cnt_dict); GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); kv = it.ptr; } else { dict_sv_i32_iter rit = dict_sv_i32_rbegin(&word_cnt_dict); GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, rit)); kv = rit.ptr; } GUF_ASSERT_RELEASE(kv); const guf_str_view key = kv->key; const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); TEST_CHECK(del_success); TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); std::string_view sv(key.str, (size_t)key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { word_cnt_map.erase(sv); } TEST_CHECK(!word_cnt_map.contains(sv)); if (del_success) { ++num_del; } } TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) >= 0); TEST_CHECK(size_before_erase - num_del == dict_sv_i32_size(&word_cnt_dict)); TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict)); if (dict_sv_i32_size(&word_cnt_dict) != 0) { TEST_CHECK(load_fac_before_erase == dict_sv_i32_load_factor(&word_cnt_dict)); } else { TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); } if (dict_sv_i32_size(&word_cnt_dict) >= 4) { dict_sv_i32_kv_dbuf_iter it = dict_sv_i32_begin(&word_cnt_dict); it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1); GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); guf_str_view key = it.ptr->key; bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); TEST_CHECK(del_success); TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); std::string_view sv(key.str, (size_t)key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { word_cnt_map.erase(sv); } it = dict_sv_i32_rbegin(&word_cnt_dict); it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1); GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); key = it.ptr->key; del_success = dict_sv_i32_erase(&word_cnt_dict, &key); TEST_CHECK(del_success); TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); sv = std::string_view(key.str, (size_t)key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { word_cnt_map.erase(sv); } } TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict)); i = 0; GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) { const dict_sv_i32_kv *kv = kv_it.ptr; if (TEST_CHECK(kv)) { const int32_t cnt = kv->val; const std::string_view sv(kv->key.str, (size_t)kv->key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { TEST_CHECK(word_cnt_map.at(sv) == cnt); } ++i; } } TEST_CHECK(i == word_cnt_dict.kv_elems.size); TEST_CHECK(i == std::ssize(word_cnt_map)); while (dict_sv_i32_size(&word_cnt_dict) > 0) { const dict_sv_i32_iter beg = dict_sv_i32_begin(&word_cnt_dict); if (TEST_CHECK(!dict_sv_i32_iter_is_end(&word_cnt_dict, beg))) { const guf_str_view key = beg.ptr->key; if (TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &key))) { const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); TEST_CHECK(del_success); TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); } const std::string_view sv(key.str, (size_t)key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { word_cnt_map.erase(sv); } } } TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0 && word_cnt_map.size() == 0); TEST_CHECK(word_cnt_dict.num_tombstones == 0); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), (size_t)64, GUF_CPY_VALUE, GUF_CPY_VALUE); dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), (size_t)128, GUF_CPY_VALUE, GUF_CPY_VALUE); dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), (size_t)256, GUF_CPY_VALUE, GUF_CPY_VALUE); dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), (size_t)512, GUF_CPY_VALUE, GUF_CPY_VALUE); dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), (size_t)1024, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 5); int32_t *val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej")); TEST_CHECK(val && *val == 64); val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum")); TEST_CHECK(val && *val == 256); val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi.")); TEST_CHECK(val && *val == 1024); val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!")); TEST_CHECK(val && *val == 128); val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med")); TEST_CHECK(val && *val == 512); TEST_CHECK(word_cnt_dict.kv_elems.size == 5); TEST_CHECK(word_cnt_dict.kv_elems.data[0].val == 64 && std::strcmp(word_cnt_dict.kv_elems.data[0].key.str, "Hej") == 0); TEST_CHECK(word_cnt_dict.kv_elems.data[1].val == 128 && std::strcmp(word_cnt_dict.kv_elems.data[1].key.str, "verden!") == 0); TEST_CHECK(word_cnt_dict.kv_elems.data[2].val == 256 && std::strcmp(word_cnt_dict.kv_elems.data[2].key.str, "Flødeskum") == 0); TEST_CHECK(word_cnt_dict.kv_elems.data[3].val == 512 && std::strcmp(word_cnt_dict.kv_elems.data[3].key.str, "med") == 0); TEST_CHECK(word_cnt_dict.kv_elems.data[4].val == 1024 && std::strcmp(word_cnt_dict.kv_elems.data[4].key.str, "Faxe Kondi.") == 0); const double load_fac_beg = dict_sv_i32_load_factor(&word_cnt_dict); const ptrdiff_t cap_begin = word_cnt_dict.kv_indices_cap; ptrdiff_t del = 0; TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); for (ptrdiff_t n = 0; n < cap_begin + 128; ++n) { dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), 64, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(word_cnt_dict.num_tombstones == --del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); } TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); for (ptrdiff_t n = 0; n < 256; ++n) { dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), 128, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(word_cnt_dict.num_tombstones == --del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); } TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); for (ptrdiff_t n = 0; n < 512 + cap_begin; ++n) { dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), 256, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(word_cnt_dict.num_tombstones == --del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); } TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); for (ptrdiff_t n = 0; n < 71; ++n) { dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), 512, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(word_cnt_dict.num_tombstones == --del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"))); TEST_CHECK(word_cnt_dict.num_tombstones == ++del); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); } TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"))); TEST_CHECK(word_cnt_dict.num_tombstones == 0); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); for (ptrdiff_t n = 0; n < 201; ++n) { dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), 128, GUF_CPY_VALUE, GUF_CPY_VALUE); TEST_CHECK(word_cnt_dict.num_tombstones == 0); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) > 0); TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"))); TEST_CHECK(word_cnt_dict.num_tombstones == 0); TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); } TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); TEST_CHECK(word_cnt_dict.kv_elems.size == 0); TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0); dict_sv_i32_free(&word_cnt_dict, NULL); bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size; TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones); } bool load_file(const char *fname) { FILE *in_file {nullptr}; if (!in_file) { in_file = fopen(fname, "r"); } GUF_ASSERT_RELEASE(in_file); dbuf_char_init(&text_buf, 128, &guf_allocator_libc); int c = EOF; while ((c = fgetc(in_file)) != EOF) { dbuf_char_push_val(&text_buf, (char)c); text_vec.push_back((char)c); } fclose(in_file); // dbuf_char_insert_val(&text_buf, '\xC0', 1); // text_vec.insert(text_vec.cbegin() + 1, '\xC0'); return TEST_CHECK(std::ssize(text_vec) == text_buf.size); } void free_file() { dbuf_char_free(&text_buf, NULL); text_buf = {}; text_vec.clear(); } public: bool run() override { if (done) { return passed; } if (TEST_CHECK(load_file(TEST_DATA_DIR "/utf8-test.txt"))) { insert_lookup(); } free_file(); if (TEST_CHECK(load_file(TEST_DATA_DIR "/bartleby.txt"))) { insert_lookup(); } free_file(); passed = (num_failed_checks == 0); done = true; return passed; } };