From c4b68d5ad2952ea7b3586d19c7ac89cc62bab170 Mon Sep 17 00:00:00 2001 From: jun <83899451+zeichensystem@users.noreply.github.com> Date: Fri, 9 May 2025 15:12:07 +0200 Subject: [PATCH] Add guf_tok test --- src/test/test_str.hpp | 56 +++++++++++++++++++++++++++++++++++++++++++ todo.txt | 2 ++ 2 files changed, 58 insertions(+) diff --git a/src/test/test_str.hpp b/src/test/test_str.hpp index 0cf08d8..ae37079 100644 --- a/src/test/test_str.hpp +++ b/src/test/test_str.hpp @@ -177,6 +177,30 @@ private: return result; } + + std::vector get_toks(std::string_view sv_in, const std::vector& delims_in, bool preserve_delims = false, guf_str_tok_delim_opt opt = GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST) + { + const guf_str_view sv = guf_str_view{.len = (ptrdiff_t)sv_in.size(), .str = sv_in.data()}; + std::vector delims; + for (const auto delim : delims_in) { + delims.push_back(guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()}); + } + guf_str_tok_state tok_state = guf_str_tok_state_new(sv, delims.data(), std::ssize(delims), opt); + + std::vector toks_out; + while (guf_str_tok_next(&tok_state, preserve_delims)) { + if (tok_state.cur_tok.len > 0) { + toks_out.push_back( std::string_view{tok_state.cur_tok.str, (size_t)tok_state.cur_tok.len}); + } + if (preserve_delims && tok_state.cur_delim.len > 0) { + toks_out.push_back( std::string_view{tok_state.cur_delim.str, (size_t)tok_state.cur_delim.len}); + } + } + TEST_CHECK(tok_state.done); + const ptrdiff_t num_toks = preserve_delims ? tok_state.num_delims_read + tok_state.num_toks_read : tok_state.num_toks_read; + TEST_CHECK(num_toks == std::ssize(toks_out)); + return toks_out; + } public: bool run() @@ -313,6 +337,38 @@ public: TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO Poo"); } + std::vector tok_words = {"hello", "world", "cat", "vertex", "normal", "pizza", "running", "mouse", "playing", "adjacent"}; + std::vector delims = {",", " ", "\n", "\t", "\r"}; + + for (int is_trailing = 0; is_trailing < 2; ++is_trailing) { + for (ptrdiff_t num_words = 1; num_words < std::ssize(tok_words); ++num_words) { + std::string str = ""; + for (ptrdiff_t j = 0; j < num_words; ++j) { + str += tok_words.at(j); + if (j < num_words - 1 || is_trailing) { + str += ", "; + } + } + std::vector toks = get_toks(std::string_view{str}, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); + if (TEST_CHECK(std::ssize(toks) == num_words)) { + for (ptrdiff_t i = 0; i < num_words; ++i) { + TEST_CHECK(toks.at(i) == tok_words.at(i)); + } + } + } + } + + std::string_view tok_str = "age: 28, occupation: NULL, crayons_eaten: 256 "; + delims = {"", "", ":", ",", " ", "\t", "", "" && tok_result.at(0) == "" && tok_result.at(1) == "age" && tok_result.at(2) == ":" && tok_result.at(3) == " " && tok_result.at(4) == "28"); + + tok_result = get_toks(tok_str, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); + TEST_CHECK(tok_result.size() == 6); + TEST_CHECK(tok_result.at(0) == "age" && tok_result.at(1) == "28" && tok_result.at(2) == "occupation" && tok_result.at(3) == "NULL" && + tok_result.at(4) == "crayons_eaten" && tok_result.at(5) == "256"); + done = true; passed = (num_failed_checks == 0); diff --git a/todo.txt b/todo.txt index 79eca7f..67731ab 100644 --- a/todo.txt +++ b/todo.txt @@ -1,5 +1,7 @@ - sort: add cpp #ifdef to remove restrict from declaration +- separate impl and headers from tests (for compile perf) + - tests for guf_dict with GUF_DICT_64_BIT_IDX (and also hash32/hash64); maybe pass kv_type to insert to avoid copy - dict elems shrink to fit; allow to pass GUF_DBUF_USE_GROWTH_FAC_ONE_POINT_FIVE; start capacity (for elems and kv_indices?) - dict: if load factor is high due to mostly tombstones, just try rehashing without resizing first?