365 lines
16 KiB
C++
365 lines
16 KiB
C++
#pragma once
|
|
#include <unordered_map>
|
|
#include <cstring>
|
|
#include "test.hpp"
|
|
|
|
extern "C"
|
|
{
|
|
#include "guf_alloc_libc.h"
|
|
#include "guf_dict_impl.h"
|
|
#include "guf_str.h"
|
|
}
|
|
|
|
struct DictSvToIntTest : public Test
|
|
{
|
|
DictSvToIntTest(const std::string& name) : Test(name) {};
|
|
|
|
private:
|
|
|
|
dbuf_char text_buf {};
|
|
std::vector<char> text_vec {};
|
|
|
|
void insert_lookup()
|
|
{
|
|
std::unordered_map<std::string_view, int32_t> word_cnt_map {};
|
|
dict_sv_i32 word_cnt_dict {};
|
|
dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc);
|
|
|
|
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
|
|
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) {
|
|
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]};
|
|
dbuf_str_view_push_val(&delims, d);
|
|
}
|
|
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) {
|
|
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]};
|
|
dbuf_str_view_push_val(&delims, d);
|
|
}
|
|
|
|
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
|
|
guf_str_view tok;
|
|
while ((tok = guf_str_next_tok(&input_str, delims.data, delims.size, NULL, -1)).len) {
|
|
if (tok.len <= 0) {
|
|
continue;
|
|
}
|
|
std::string_view sv(tok.str, tok.len);
|
|
TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &tok) == word_cnt_map.contains(sv));
|
|
if (!dict_sv_i32_contains(&word_cnt_dict, &tok)) {
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, tok, 1, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
word_cnt_map.insert({sv, 1});
|
|
} else {
|
|
int32_t *cnt = dict_sv_i32_at_val_arg(&word_cnt_dict, tok);
|
|
if (TEST_CHECK(cnt)) {
|
|
*cnt += 1;
|
|
}
|
|
word_cnt_map.at(sv) += 1;
|
|
}
|
|
// printf("tok_len: %td ", tok.len);
|
|
// printf("'%.*s'\n", (int)tok.len, tok.str);
|
|
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
|
|
}
|
|
dbuf_str_view_free(&delims, NULL);
|
|
|
|
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map));
|
|
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
|
|
|
|
for (const auto & [word, cnt] : word_cnt_map ) {
|
|
guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()};
|
|
int32_t *res = dict_sv_i32_at(&word_cnt_dict, &sv);
|
|
TEST_CHECK(res && *res == cnt);
|
|
}
|
|
|
|
ptrdiff_t i = 0;
|
|
GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) {
|
|
const dict_sv_i32_kv *kv = kv_it.ptr;
|
|
if (TEST_CHECK(kv)) {
|
|
const int32_t cnt = kv->val;
|
|
// printf("%.*s: %d\n", (int)kv->key.len, kv->key.str, cnt);
|
|
const std::string_view sv(kv->key.str, kv->key.len);
|
|
if (TEST_CHECK(word_cnt_map.contains(sv))) {
|
|
TEST_CHECK(word_cnt_map.at(sv) == cnt);
|
|
}
|
|
}
|
|
++i;
|
|
}
|
|
TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict));
|
|
TEST_CHECK(i == std::ssize(word_cnt_map));
|
|
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
|
|
|
|
// std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << "\n";
|
|
// std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n";
|
|
|
|
// Erase tests:
|
|
|
|
const double load_fac_before_erase = dict_sv_i32_load_factor(&word_cnt_dict);
|
|
const ptrdiff_t size_before_erase = dict_sv_i32_size(&word_cnt_dict);
|
|
ptrdiff_t num_del = 0;
|
|
while (dict_sv_i32_size(&word_cnt_dict) > size_before_erase / 2) {
|
|
dict_sv_i32_kv *kv = NULL;
|
|
if (num_del % 2) {
|
|
dict_sv_i32_iter it = dict_sv_i32_begin(&word_cnt_dict);
|
|
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
|
|
kv = it.ptr;
|
|
} else {
|
|
dict_sv_i32_iter rit = dict_sv_i32_rbegin(&word_cnt_dict);
|
|
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, rit));
|
|
kv = rit.ptr;
|
|
}
|
|
GUF_ASSERT_RELEASE(kv);
|
|
|
|
const guf_str_view key = kv->key;
|
|
|
|
const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
|
|
TEST_CHECK(del_success);
|
|
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
|
|
|
|
std::string_view sv(key.str, (size_t)key.len);
|
|
if (TEST_CHECK(word_cnt_map.contains(sv))) {
|
|
word_cnt_map.erase(sv);
|
|
}
|
|
TEST_CHECK(!word_cnt_map.contains(sv));
|
|
|
|
if (del_success) {
|
|
++num_del;
|
|
}
|
|
}
|
|
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) >= 0);
|
|
TEST_CHECK(size_before_erase - num_del == dict_sv_i32_size(&word_cnt_dict));
|
|
TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict));
|
|
|
|
if (dict_sv_i32_size(&word_cnt_dict) != 0) {
|
|
TEST_CHECK(load_fac_before_erase == dict_sv_i32_load_factor(&word_cnt_dict));
|
|
} else {
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
|
|
}
|
|
|
|
if (dict_sv_i32_size(&word_cnt_dict) >= 4) {
|
|
dict_sv_i32_kv_dbuf_iter it = dict_sv_i32_begin(&word_cnt_dict);
|
|
it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1);
|
|
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
|
|
|
|
guf_str_view key = it.ptr->key;
|
|
|
|
bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
|
|
TEST_CHECK(del_success);
|
|
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
|
|
|
|
std::string_view sv(key.str, (size_t)key.len);
|
|
if (TEST_CHECK(word_cnt_map.contains(sv))) {
|
|
word_cnt_map.erase(sv);
|
|
}
|
|
|
|
it = dict_sv_i32_rbegin(&word_cnt_dict);
|
|
it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1);
|
|
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
|
|
key = it.ptr->key;
|
|
|
|
del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
|
|
TEST_CHECK(del_success);
|
|
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
|
|
|
|
sv = std::string_view(key.str, (size_t)key.len);
|
|
if (TEST_CHECK(word_cnt_map.contains(sv))) {
|
|
word_cnt_map.erase(sv);
|
|
}
|
|
}
|
|
TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict));
|
|
|
|
i = 0;
|
|
GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) {
|
|
const dict_sv_i32_kv *kv = kv_it.ptr;
|
|
if (TEST_CHECK(kv)) {
|
|
const int32_t cnt = kv->val;
|
|
const std::string_view sv(kv->key.str, (size_t)kv->key.len);
|
|
if (TEST_CHECK(word_cnt_map.contains(sv))) {
|
|
TEST_CHECK(word_cnt_map.at(sv) == cnt);
|
|
}
|
|
++i;
|
|
}
|
|
}
|
|
TEST_CHECK(i == word_cnt_dict.kv_elems.size);
|
|
TEST_CHECK(i == std::ssize(word_cnt_map));
|
|
|
|
while (dict_sv_i32_size(&word_cnt_dict) > 0) {
|
|
const dict_sv_i32_iter beg = dict_sv_i32_begin(&word_cnt_dict);
|
|
if (TEST_CHECK(!dict_sv_i32_iter_is_end(&word_cnt_dict, beg))) {
|
|
const guf_str_view key = beg.ptr->key;
|
|
if (TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &key))) {
|
|
const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
|
|
TEST_CHECK(del_success);
|
|
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
|
|
}
|
|
const std::string_view sv(key.str, (size_t)key.len);
|
|
if (TEST_CHECK(word_cnt_map.contains(sv))) {
|
|
word_cnt_map.erase(sv);
|
|
}
|
|
}
|
|
}
|
|
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0 && word_cnt_map.size() == 0);
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
|
|
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), (size_t)64, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), (size_t)128, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), (size_t)256, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), (size_t)512, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), (size_t)1024, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
|
|
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 5);
|
|
|
|
int32_t *val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"));
|
|
TEST_CHECK(val && *val == 64);
|
|
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"));
|
|
TEST_CHECK(val && *val == 256);
|
|
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."));
|
|
TEST_CHECK(val && *val == 1024);
|
|
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"));
|
|
TEST_CHECK(val && *val == 128);
|
|
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"));
|
|
TEST_CHECK(val && *val == 512);
|
|
|
|
TEST_CHECK(word_cnt_dict.kv_elems.size == 5);
|
|
|
|
TEST_CHECK(word_cnt_dict.kv_elems.data[0].val == 64 && std::strcmp(word_cnt_dict.kv_elems.data[0].key.str, "Hej") == 0);
|
|
TEST_CHECK(word_cnt_dict.kv_elems.data[1].val == 128 && std::strcmp(word_cnt_dict.kv_elems.data[1].key.str, "verden!") == 0);
|
|
TEST_CHECK(word_cnt_dict.kv_elems.data[2].val == 256 && std::strcmp(word_cnt_dict.kv_elems.data[2].key.str, "Flødeskum") == 0);
|
|
TEST_CHECK(word_cnt_dict.kv_elems.data[3].val == 512 && std::strcmp(word_cnt_dict.kv_elems.data[3].key.str, "med") == 0);
|
|
TEST_CHECK(word_cnt_dict.kv_elems.data[4].val == 1024 && std::strcmp(word_cnt_dict.kv_elems.data[4].key.str, "Faxe Kondi.") == 0);
|
|
|
|
const double load_fac_beg = dict_sv_i32_load_factor(&word_cnt_dict);
|
|
const ptrdiff_t cap_begin = word_cnt_dict.kv_indices_cap;
|
|
ptrdiff_t del = 0;
|
|
|
|
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej")));
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
for (ptrdiff_t n = 0; n < cap_begin + 128; ++n) {
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), 64, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
|
|
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej")));
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
}
|
|
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
|
|
|
|
|
|
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi.")));
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
for (ptrdiff_t n = 0; n < 256; ++n) {
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), 128, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
|
|
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi.")));
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
}
|
|
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
|
|
|
|
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med")));
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
for (ptrdiff_t n = 0; n < 512 + cap_begin; ++n) {
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), 256, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
|
|
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med")));
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
}
|
|
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
|
|
|
|
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum")));
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
for (ptrdiff_t n = 0; n < 71; ++n) {
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), 512, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
|
|
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum")));
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
|
|
}
|
|
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
|
|
|
|
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!")));
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
|
|
for (ptrdiff_t n = 0; n < 201; ++n) {
|
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), 128, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) > 0);
|
|
|
|
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!")));
|
|
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
|
|
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
|
|
}
|
|
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
|
|
|
|
TEST_CHECK(word_cnt_dict.kv_elems.size == 0);
|
|
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0);
|
|
|
|
dict_sv_i32_free(&word_cnt_dict, NULL);
|
|
bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size;
|
|
TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones);
|
|
}
|
|
|
|
bool load_file(const char *fname)
|
|
{
|
|
FILE *in_file {nullptr};
|
|
if (!in_file) {
|
|
in_file = fopen(fname, "r");
|
|
}
|
|
|
|
GUF_ASSERT_RELEASE(in_file);
|
|
|
|
dbuf_char_init(&text_buf, 128, &guf_allocator_libc);
|
|
|
|
int c = EOF;
|
|
while ((c = fgetc(in_file)) != EOF) {
|
|
dbuf_char_push_val(&text_buf, (char)c);
|
|
text_vec.push_back((char)c);
|
|
}
|
|
fclose(in_file);
|
|
|
|
// dbuf_char_insert_val(&text_buf, '\xC0', 1);
|
|
// text_vec.insert(text_vec.cbegin() + 1, '\xC0');
|
|
|
|
return TEST_CHECK(std::ssize(text_vec) == text_buf.size);
|
|
}
|
|
|
|
void free_file()
|
|
{
|
|
dbuf_char_free(&text_buf, NULL);
|
|
text_buf = {};
|
|
text_vec.clear();
|
|
}
|
|
|
|
public:
|
|
|
|
bool run() override
|
|
{
|
|
if (done) {
|
|
return passed;
|
|
}
|
|
|
|
if (TEST_CHECK(load_file(TEST_DATA_DIR "/utf8-test.txt"))) {
|
|
insert_lookup();
|
|
}
|
|
free_file();
|
|
|
|
if (TEST_CHECK(load_file(TEST_DATA_DIR "/bartleby.txt"))) {
|
|
insert_lookup();
|
|
}
|
|
free_file();
|
|
|
|
passed = (num_failed_checks == 0);
|
|
done = true;
|
|
return passed;
|
|
}
|
|
};
|