libguf/src/test/test_dict.hpp
2025-03-02 18:25:54 +01:00

100 lines
2.9 KiB
C++

#pragma once
#include <unordered_map>
#include "test.hpp"
extern "C"
{
#include "guf_alloc_libc.h"
#include "guf_dict_impl.h"
#include "guf_str.h"
}
struct DictCstrToIntTest : public Test
{
DictCstrToIntTest(const std::string& name) : Test(name) {};
private:
dbuf_char text_buf {};
std::vector<char> text_vec {};
void insert_lookup()
{
std::unordered_map<std::string, int> word_cnt_map {};
dict_cstr_int word_cnt_dict {};
dict_cstr_int_init(&word_cnt_dict, &guf_allocator_libc);
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_WHITESPACE); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]};
dbuf_str_view_push_val(&delims, d);
}
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]};
dbuf_str_view_push_val(&delims, d);
}
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
guf_str_view tok;
while ((tok = guf_str_next_tok(&input_str, delims.data, delims.size, NULL, -1)).len) {
// printf("tok_len: %td ", tok.len);
// printf("'%.*s'\n", (int)tok.len, tok.str);
}
dbuf_str_view_free(&delims, NULL);
dict_cstr_int_free(&word_cnt_dict, NULL);
bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size;
TEST_CHECK(!dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones);
}
bool load_file()
{
FILE *in_file {nullptr};
if (!in_file) {
in_file = fopen(TEST_DATA_DIR "/utf8-test.txt", "r");
}
if (!in_file) {
return false;
}
dbuf_char_init(&text_buf, 128, &guf_allocator_libc);
int c = EOF;
while ((c = fgetc(in_file)) != EOF) {
dbuf_char_push_val(&text_buf, (char)c);
text_vec.push_back((char)c);
}
fclose(in_file);
// dbuf_char_insert_val(&text_buf, '\xC0', 1);
// text_vec.insert(text_vec.cbegin() + 1, '\xC0');
return TEST_CHECK(std::ssize(text_vec) == text_buf.size);
}
public:
bool run() override
{
if (done) {
return passed;
}
if (!TEST_CHECK(load_file())) {
goto end;
}
insert_lookup();
end:
dbuf_char_free(&text_buf, NULL);
text_buf = {};
passed = (num_failed_checks == 0);
done = true;
return passed;
}
};