117 lines
3.7 KiB
C++
117 lines
3.7 KiB
C++
#pragma once
|
|
#include <unordered_map>
|
|
#include "test.hpp"
|
|
|
|
extern "C"
|
|
{
|
|
#include "guf_alloc_libc.h"
|
|
#include "guf_dict_impl.h"
|
|
#include "guf_utf8.h"
|
|
#include "guf_str.h"
|
|
}
|
|
|
|
struct DictCstrToIntTest : public Test
|
|
{
|
|
|
|
DictCstrToIntTest(const std::string& name) : Test(name) {};
|
|
|
|
private:
|
|
|
|
dbuf_char text_buf {};
|
|
std::vector<char> text_vec {};
|
|
|
|
void insert_lookup()
|
|
{
|
|
std::unordered_map<std::string, int> word_cnt_map {};
|
|
dict_cstr_int word_cnt_dict {};
|
|
dict_cstr_int_init(&word_cnt_dict, &guf_allocator_libc);
|
|
|
|
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
|
|
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(guf_utf8_whitespace); ++i) {
|
|
guf_str_view d = {.len = (ptrdiff_t)strlen(guf_utf8_whitespace[i]), .str = guf_utf8_whitespace[i]};
|
|
dbuf_str_view_push_val(&delims, d);
|
|
}
|
|
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(guf_utf8_punctuation); ++i) {
|
|
guf_str_view d = {.len = (ptrdiff_t)strlen(guf_utf8_punctuation[i]), .str = guf_utf8_punctuation[i]};
|
|
dbuf_str_view_push_val(&delims, d);
|
|
}
|
|
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
|
|
guf_str_view tok;
|
|
while ((tok = guf_str_next_tok(&input_str, delims.data, delims.size, NULL, -1)).len) {
|
|
// printf("tok_len: %td ", tok.len);
|
|
printf("'%.*s'\n", (int)tok.len, tok.str);
|
|
}
|
|
dbuf_str_view_free(&delims, NULL);
|
|
|
|
// ptrdiff_t valid_chars = 0, invalid_chars = 0, bytes = 0;
|
|
// guf_utf8_char ch = {};
|
|
// for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
|
|
// if (stat == GUF_UTF8_READ_VALID) {
|
|
// ++valid_chars;
|
|
// printf("%s", ch.bytes);
|
|
// } else {
|
|
// ++invalid_chars;
|
|
// printf("::INVALID_UTF8_CHAR::");
|
|
// }
|
|
// bytes += guf_utf8_char_num_bytes(&ch);
|
|
// }
|
|
// TEST_CHECK(input_str.len == 0 && input_str.str == NULL);
|
|
// printf("\nread %td bytes\n", bytes);
|
|
// printf("read %td valid and %td invalid utf-8 characters\n", valid_chars, invalid_chars);
|
|
|
|
dict_cstr_int_free(&word_cnt_dict, NULL);
|
|
bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size;
|
|
TEST_CHECK(!dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones);
|
|
}
|
|
|
|
bool load_file()
|
|
{
|
|
FILE *in_file {nullptr};
|
|
if (!in_file) {
|
|
in_file = fopen(TEST_DATA_DIR "/utf8-test.txt", "r");
|
|
}
|
|
|
|
if (!in_file) {
|
|
return false;
|
|
}
|
|
|
|
dbuf_char_init(&text_buf, 128, &guf_allocator_libc);
|
|
|
|
int c = EOF;
|
|
while ((c = fgetc(in_file)) != EOF) {
|
|
dbuf_char_push_val(&text_buf, (char)c);
|
|
text_vec.push_back((char)c);
|
|
}
|
|
fclose(in_file);
|
|
|
|
// dbuf_char_insert_val(&text_buf, '\xC0', 1);
|
|
// text_vec.insert(text_vec.cbegin() + 1, '\xC0');
|
|
|
|
return TEST_CHECK(std::ssize(text_vec) == text_buf.size);
|
|
}
|
|
|
|
public:
|
|
|
|
bool run() override
|
|
{
|
|
if (done) {
|
|
return passed;
|
|
}
|
|
|
|
if (!TEST_CHECK(load_file())) {
|
|
goto end;
|
|
}
|
|
|
|
insert_lookup();
|
|
|
|
end:
|
|
dbuf_char_free(&text_buf, NULL);
|
|
text_buf = {};
|
|
|
|
passed = (num_failed_checks == 0);
|
|
done = true;
|
|
|
|
return passed;
|
|
}
|
|
};
|