Add guf_tok test

This commit is contained in:
jun 2025-05-09 15:12:07 +02:00
parent 864bd70ece
commit c4b68d5ad2
2 changed files with 58 additions and 0 deletions

View File

@ -177,6 +177,30 @@ private:
return result; return result;
} }
std::vector<std::string_view> get_toks(std::string_view sv_in, const std::vector<std::string_view>& delims_in, bool preserve_delims = false, guf_str_tok_delim_opt opt = GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST)
{
const guf_str_view sv = guf_str_view{.len = (ptrdiff_t)sv_in.size(), .str = sv_in.data()};
std::vector<guf_str_view> delims;
for (const auto delim : delims_in) {
delims.push_back(guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()});
}
guf_str_tok_state tok_state = guf_str_tok_state_new(sv, delims.data(), std::ssize(delims), opt);
std::vector<std::string_view> toks_out;
while (guf_str_tok_next(&tok_state, preserve_delims)) {
if (tok_state.cur_tok.len > 0) {
toks_out.push_back( std::string_view{tok_state.cur_tok.str, (size_t)tok_state.cur_tok.len});
}
if (preserve_delims && tok_state.cur_delim.len > 0) {
toks_out.push_back( std::string_view{tok_state.cur_delim.str, (size_t)tok_state.cur_delim.len});
}
}
TEST_CHECK(tok_state.done);
const ptrdiff_t num_toks = preserve_delims ? tok_state.num_delims_read + tok_state.num_toks_read : tok_state.num_toks_read;
TEST_CHECK(num_toks == std::ssize(toks_out));
return toks_out;
}
public: public:
bool run() bool run()
@ -313,6 +337,38 @@ public:
TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO Poo"); TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO Poo");
} }
std::vector<std::string_view> tok_words = {"hello", "world", "cat", "vertex", "normal", "pizza", "running", "mouse", "playing", "adjacent"};
std::vector<std::string_view> delims = {",", " ", "\n", "\t", "\r"};
for (int is_trailing = 0; is_trailing < 2; ++is_trailing) {
for (ptrdiff_t num_words = 1; num_words < std::ssize(tok_words); ++num_words) {
std::string str = "";
for (ptrdiff_t j = 0; j < num_words; ++j) {
str += tok_words.at(j);
if (j < num_words - 1 || is_trailing) {
str += ", ";
}
}
std::vector<std::string_view> toks = get_toks(std::string_view{str}, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
if (TEST_CHECK(std::ssize(toks) == num_words)) {
for (ptrdiff_t i = 0; i < num_words; ++i) {
TEST_CHECK(toks.at(i) == tok_words.at(i));
}
}
}
}
std::string_view tok_str = "<stats>age: 28, occupation: NULL, crayons_eaten: 256 </stats>";
delims = {"<stats>", "</stats>", ":", ",", " ", "\t", "<stats", "<", ">", "</"};
auto tok_result = get_toks(tok_str, delims, true, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
TEST_CHECK(tok_result.size() == 19);
TEST_CHECK(tok_result.at(18) == "</stats>" && tok_result.at(0) == "<stats>" && tok_result.at(1) == "age" && tok_result.at(2) == ":" && tok_result.at(3) == " " && tok_result.at(4) == "28");
tok_result = get_toks(tok_str, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
TEST_CHECK(tok_result.size() == 6);
TEST_CHECK(tok_result.at(0) == "age" && tok_result.at(1) == "28" && tok_result.at(2) == "occupation" && tok_result.at(3) == "NULL" &&
tok_result.at(4) == "crayons_eaten" && tok_result.at(5) == "256");
done = true; done = true;
passed = (num_failed_checks == 0); passed = (num_failed_checks == 0);

View File

@ -1,5 +1,7 @@
- sort: add cpp #ifdef to remove restrict from declaration - sort: add cpp #ifdef to remove restrict from declaration
- separate impl and headers from tests (for compile perf)
- tests for guf_dict with GUF_DICT_64_BIT_IDX (and also hash32/hash64); maybe pass kv_type to insert to avoid copy - tests for guf_dict with GUF_DICT_64_BIT_IDX (and also hash32/hash64); maybe pass kv_type to insert to avoid copy
- dict elems shrink to fit; allow to pass GUF_DBUF_USE_GROWTH_FAC_ONE_POINT_FIVE; start capacity (for elems and kv_indices?) - dict elems shrink to fit; allow to pass GUF_DBUF_USE_GROWTH_FAC_ONE_POINT_FIVE; start capacity (for elems and kv_indices?)
- dict: if load factor is high due to mostly tombstones, just try rehashing without resizing first? - dict: if load factor is high due to mostly tombstones, just try rehashing without resizing first?