From c4f3616b23e0060a22c0ec929ded071f4756344d Mon Sep 17 00:00:00 2001 From: jun <83899451+zeichensystem@users.noreply.github.com> Date: Tue, 13 May 2025 17:36:08 +0200 Subject: [PATCH] Refactor tests --- CMakeLists.txt | 4 +- src/test/test.cpp | 16 +- src/test/test.hpp | 14 +- src/test/test_dbuf.cpp | 523 ++++++++++++++++++++++++++++++++++++++++ src/test/test_dbuf.hpp | 527 +---------------------------------------- src/test/test_dict.cpp | 368 ++++++++++++++++++++++++++++ src/test/test_dict.hpp | 369 +---------------------------- src/test/test_str.cpp | 376 +++++++++++++++++++++++++++++ src/test/test_str.hpp | 378 +---------------------------- src/test/test_utf8.cpp | 388 ++++++++++++++++++++++++++++++ src/test/test_utf8.hpp | 387 +----------------------------- 11 files changed, 1712 insertions(+), 1638 deletions(-) create mode 100644 src/test/test_dbuf.cpp create mode 100644 src/test/test_dict.cpp create mode 100644 src/test/test_str.cpp create mode 100644 src/test/test_utf8.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9677ebf..672c22d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ endif () if (NOT DEFINED MSVC) set(WARNING_FLAGS_C -Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef -Wmisleading-indentation -Wnull-dereference -Wswitch-default -Wconversion -Wno-sign-conversion -Wdouble-promotion -Wno-unused-function) - set(WARNING_FLAGS_CXX -Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef -Wmisleading-indentation -Wnull-dereference -Wshadow -Wundef -Wstrict-overflow=5 -Wsign-promo -Wcast-align -Wcast-qual -Woverloaded-virtual -Wredundant-decls -Wctor-dtor-privacy -Wdisabled-optimization -Wformat=2 -Winit-self -Wswitch-default -Wno-unused-function) + set(WARNING_FLAGS_CXX -Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef -Wmisleading-indentation -Wnull-dereference -Wshadow -Wundef -Wstrict-overflow=5 -Wsign-promo -Wcast-align -Wcast-qual -Woverloaded-virtual -Wredundant-decls -Wctor-dtor-privacy -Wdisabled-optimization -Wformat=2 -Winit-self -Wswitch-default -Wno-unused-function) set(DBG_FLAGS -fsanitize=undefined,address -g3 -glldb -Og) else () set(WARNING_FLAGS_C /W4) @@ -30,7 +30,7 @@ endif () add_executable(libguf_example src/test/example.c src/test/impls/str_impl.c src/test/impls/dict_impl.c src/test/impls/linalg_impl.c) target_include_directories(libguf_example PRIVATE src src/test) -add_executable(libguf_test src/test/test.cpp src/test/impls/init_impl.c src/test/impls/dbuf_impl.c src/test/impls/str_impl.c src/test/impls/dict_impl.c src/test/impls/rand_impl.c src/test/impls/sort_impl.c src/test/impls/linalg_impl.c) +add_executable(libguf_test src/test/test.cpp src/test/test_dbuf.cpp src/test/test_dict.cpp src/test/test_str.cpp src/test/test_utf8.cpp src/test/impls/init_impl.c src/test/impls/dbuf_impl.c src/test/impls/str_impl.c src/test/impls/dict_impl.c src/test/impls/rand_impl.c src/test/impls/sort_impl.c src/test/impls/linalg_impl.c) target_include_directories(libguf_test PRIVATE src src/test) set_target_properties(libguf_example libguf_test PROPERTIES DEBUG_POSTFIX ${CMAKE_DEBUG_POSTFIX}) diff --git a/src/test/test.cpp b/src/test/test.cpp index 5a4b6a5..634f6f1 100644 --- a/src/test/test.cpp +++ b/src/test/test.cpp @@ -3,19 +3,19 @@ #include #include -extern "C" { - #include "guf_assert.h" - #include "guf_math.h" -} - #include "test_dbuf.hpp" #include "test_dict.hpp" #include "test_utf8.hpp" #include "test_str.hpp" +extern "C" +{ + #include "guf_assert.h" + #include "guf_math.h" +} -std::unordered_set> g_tests {}; +static std::unordered_set> g_tests {}; -void init_tests() +static void init_tests() { std::unique_ptr test = std::make_unique("DbufIntTest"); GUF_ASSERT_RELEASE(test.get()); @@ -70,4 +70,4 @@ int main() } return passed_all ? EXIT_SUCCESS : EXIT_FAILURE; -} \ No newline at end of file +} diff --git a/src/test/test.hpp b/src/test/test.hpp index c4528c5..e776aa0 100644 --- a/src/test/test.hpp +++ b/src/test/test.hpp @@ -7,19 +7,19 @@ #include #include #include - -#include "guf_common.h" +extern "C" { + #include "guf_common.h" + #include "guf_assert.h" +} #define TEST_CHECK(COND) (check((COND), GUF_STRINGIFY(COND), __LINE__, __FILE__)) struct Test { private: - std::chrono::steady_clock::time_point time_start, time_end; protected: - std::stack check_name_stack; std::string full_check_name = ""; @@ -50,7 +50,6 @@ protected: } public: - const std::string name {}; std::chrono::duration runtime_ms {0}; bool passed {false}, done {false}; @@ -64,7 +63,7 @@ public: return num_passed_checks + num_failed_checks; } - virtual bool run() = 0; + virtual void run() = 0; void before_run() { @@ -73,6 +72,9 @@ public: void after_run() { + done = true; + passed = (num_failed_checks == 0); + time_end = std::chrono::high_resolution_clock::now(); runtime_ms = std::chrono::duration_cast(time_end - time_start); } diff --git a/src/test/test_dbuf.cpp b/src/test/test_dbuf.cpp new file mode 100644 index 0000000..7a089a8 --- /dev/null +++ b/src/test/test_dbuf.cpp @@ -0,0 +1,523 @@ +#include "test_dbuf.hpp" + +extern "C" +{ + #include "guf_alloc_libc.h" + #include "impls/dbuf_impl.h" +} + +/* + DbufIntTest +*/ + +void DbufIntTest::run() +{ + if (done) { + return; + } + + dbuf_int dbuf {}; + dbuf_int_init(&dbuf, 0, &guf_allocator_libc); + + push_check_name("test_push"); + + test_push(&dbuf, 256); + test_push(&dbuf, 128); + test_push(&dbuf, 17); + TEST_CHECK(dbuf.size == (256 + 128 + 17)); + + dbuf_int_free(&dbuf, NULL); + TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && dbuf.data == NULL); + + dbuf_int_init(&dbuf, 24, &guf_allocator_libc); + TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 24 && dbuf.data); + + test_push(&dbuf, 365); + test_push(&dbuf, 4); + test_push(&dbuf, 25); + test_push(&dbuf, 64); + TEST_CHECK(dbuf.size == (365 + 4 + 25 + 64)); + + dbuf_int_free(&dbuf, NULL); + TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && dbuf.data == NULL); + + pop_check_name(); + + push_check_name("insert_remove"); + + for (int n = 0; n <= 128; ++n) { + test_insert_remove(n); + } + test_insert_remove(400); + test_insert_remove(401); + test_insert_remove(512); + test_insert_remove(513); + test_insert_remove(601); + test_insert_remove(2048); + test_insert_remove(2049); + + pop_check_name(); +} + +std::vector DbufIntTest::dbuf_to_vec(dbuf_int *dbuf) +{ + std::vector vec; + GUF_CNT_FOREACH(dbuf, dbuf_int, it) { + vec.push_back(*it.ptr); + } + return vec; +} + +void DbufIntTest::test_push(dbuf_int *dbuf, int n) +{ + std::vector vec = dbuf_to_vec(dbuf); + + TEST_CHECK(std::ssize(vec) == dbuf->size); + + for (int i = 0; i < n; ++i) { + dbuf_int_push_val(dbuf, i); + vec.push_back(i); + TEST_CHECK(*dbuf_int_back(dbuf) == vec.back()); + } + + ptrdiff_t i = 0; + GUF_CNT_FOREACH(dbuf, dbuf_int, it) { + TEST_CHECK(*it.ptr == vec.at(i++)); + } + TEST_CHECK(i == dbuf->size); + + i = dbuf->size - 1; + GUF_CNT_FOREACH_REVERSE(dbuf, dbuf_int, rit) { + TEST_CHECK(*rit.ptr == vec.at(i--)); + } + TEST_CHECK(i == -1); + +} + +void DbufIntTest::test_insert_remove(int n) +{ + dbuf_int dbuf = {}; + dbuf_int_init(&dbuf, 0, &guf_allocator_libc); + std::vector vec = dbuf_to_vec(&dbuf); + + guf_err err = GUF_ERR_NONE; + dbuf_int_try_erase(&dbuf, 0, &err); + TEST_CHECK(err == GUF_ERR_IDX_RANGE); + + err = GUF_ERR_NONE; + dbuf_int_try_erase(&dbuf, 12, &err); + TEST_CHECK(err == GUF_ERR_IDX_RANGE); + + err = GUF_ERR_NONE; + dbuf_int_try_front(&dbuf, &err); + TEST_CHECK(err == GUF_ERR_IDX_RANGE); + + err = GUF_ERR_NONE; + dbuf_int_try_back(&dbuf, &err); + TEST_CHECK(err == GUF_ERR_IDX_RANGE); + + err = GUF_ERR_NONE; + dbuf_int_try_at(&dbuf, 0, &err); + TEST_CHECK(err == GUF_ERR_IDX_RANGE); + + for (int i = 0; i < n; ++i) { + dbuf_int_insert_val(&dbuf, i, i); + dbuf_int_insert_val(&dbuf, i * 2, 0); + dbuf_int_insert_val(&dbuf, i * 4, dbuf.size); + + vec.insert(vec.begin() + i, i); + vec.insert(vec.begin(), i * 2); + vec.insert(vec.end(), i * 4); + } + TEST_CHECK(std::ssize(vec) == dbuf.size); + + // Iterate + dbuf_int_iter it_dbuf = dbuf_int_begin(&dbuf); + std::vector::const_iterator it_vec = vec.begin(); + while (!dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec != vec.end()) { + TEST_CHECK(*it_dbuf.ptr == *it_vec); + it_dbuf = dbuf_int_iter_next(&dbuf, it_dbuf, 1); + std::advance(it_vec, 1); + } + TEST_CHECK(dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec == vec.end()); + + // Step iterate. + it_dbuf = dbuf_int_begin(&dbuf); + it_vec = vec.begin(); + while (!dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec != vec.end()) { + TEST_CHECK(*it_dbuf.ptr == *it_vec); + it_dbuf = dbuf_int_iter_next(&dbuf, it_dbuf, 7); + + if (dbuf_int_iter_is_end(&dbuf, it_dbuf)) { + it_vec = vec.end(); + } else { + std::advance(it_vec, 7); + } + } + TEST_CHECK(dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec == vec.end()); + + // Reverse iterate. + dbuf_int_iter rit_dbuf = dbuf_int_rbegin(&dbuf); + std::vector::const_reverse_iterator rit_vec = vec.crbegin(); + while (!dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec != vec.crend()) { + TEST_CHECK(*rit_dbuf.ptr == *rit_vec); + rit_dbuf = dbuf_int_iter_next(&dbuf, rit_dbuf, 1); + std::advance(rit_vec, 1); + } + TEST_CHECK(dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec == vec.rend()); + + // Reverse iterate step. + rit_dbuf = dbuf_int_rbegin(&dbuf); + rit_vec = vec.crbegin(); + while (!dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec != vec.crend()) { + TEST_CHECK(*rit_dbuf.ptr == *rit_vec); + rit_dbuf = dbuf_int_iter_next(&dbuf, rit_dbuf, 4); + if (dbuf_int_iter_is_end(&dbuf, rit_dbuf)) { + rit_vec = vec.rend(); + } else { + std::advance(rit_vec, 4); + } + } + TEST_CHECK(dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec == vec.rend()); + + TEST_CHECK(dbuf.size == std::ssize(vec)); + + for (ptrdiff_t i = 0; i < dbuf.size; i += 8) { + dbuf_int_erase(&dbuf, i); + dbuf_int_erase(&dbuf, 0); + dbuf_int_pop(&dbuf); + + vec.erase(vec.begin() + i); + vec.erase(vec.begin() + 0); + vec.pop_back(); + } + + TEST_CHECK(dbuf.size == std::ssize(vec)); + + for (ptrdiff_t i = 0; i < dbuf.size; i += 8) { + TEST_CHECK(*dbuf_int_at(&dbuf, i) == vec.at(i)); + } + + const ptrdiff_t size = dbuf.size; + for (ptrdiff_t i = 0; i < size; ++i) { + int a = dbuf_int_pop_move(&dbuf); + int b = vec.back(); + TEST_CHECK(a == b); + vec.pop_back(); + } + TEST_CHECK(dbuf.size == 0 && vec.size() == 0); + + dbuf_int_free(&dbuf, NULL); + TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && !dbuf.data); +} + + +/* + DbufCstringTest +*/ + +void DbufCstringTest::run() +{ + if (done) { + return; + } + + push_check_name("push_insert_erase"); + + for (int i = 1; i <= 32; ++i) { + test_push_insert_erase(i); + test_push_insert_erase(i, i - 1); + test_push_insert_erase(i, i + 1); + test_push_insert_erase(i, i); + test_push_insert_erase(i, i / 2); + } + test_push_insert_erase(2048); + test_push_insert_erase(2048, 11); + + dbuf_heap_cstr str_dbuf = {}; + dbuf_heap_cstr_init(&str_dbuf, 0, &guf_allocator_libc); + std::vector str_vec {}; + + for (int i = 0; i < 512; ++i) { + char buf[128]; + memset(buf, '\0', GUF_ARR_SIZE(buf)); + snprintf(buf, GUF_ARR_SIZE(buf), "This is a pretty guf string (number %d)", i); + guf_cstr_heap str = buf; + dbuf_heap_cstr_push(&str_dbuf, &str, GUF_CPY_DEEP); + str_vec.push_back(std::string{buf}); + } + for (int i = 0; i < str_dbuf.size + 16; ++i) { + test_iter(str_vec, &str_dbuf, i); + } + + dbuf_heap_cstr_free(&str_dbuf, NULL); + TEST_CHECK(str_dbuf.size == 0 && str_dbuf.capacity == 0 && !str_dbuf.data); + + pop_check_name(); + + push_check_name("find"); + test_find(); + test_find(3); + test_find(42); + test_find(129); + pop_check_name(); +} + +void DbufCstringTest::test_iter(std::vector& str_vec, dbuf_heap_cstr *str_dbuf, int step) +{ + GUF_ASSERT_RELEASE(str_dbuf); + if (step <= 0) { + step = 1; + } + + ptrdiff_t i = 0; + GUF_CNT_FOREACH(str_dbuf, dbuf_heap_cstr, it) { + char *str = *it.ptr; + TEST_CHECK(str_vec.at(i) == str); + ++i; + } + TEST_CHECK(i == str_dbuf->size); + + i = str_dbuf->size - 1; + GUF_CNT_FOREACH_REVERSE(str_dbuf, dbuf_heap_cstr, rit) { + char *str = *rit.ptr; + TEST_CHECK(str_vec.at(i) == str); + --i; + } + TEST_CHECK(i == -1); + + dbuf_heap_cstr_iter it_dbuf = dbuf_heap_cstr_begin(str_dbuf); + std::vector::iterator it_vec = str_vec.begin(); + while (!dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf)) { + TEST_CHECK(it_vec != str_vec.end()); + TEST_CHECK(*it_vec == *it_dbuf.ptr); + it_dbuf = dbuf_heap_cstr_iter_next(str_dbuf, it_dbuf, step); + if (!dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf)) { + std::advance(it_vec, step); + } else { + it_vec = str_vec.end(); + } + } + TEST_CHECK(dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf) && it_vec == str_vec.end()); + + dbuf_heap_cstr_iter rit_dbuf = dbuf_heap_cstr_rbegin(str_dbuf); + std::vector::reverse_iterator rit_vec = str_vec.rbegin(); + while (!dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf)) { + TEST_CHECK(rit_vec != str_vec.rend()); + TEST_CHECK(*rit_vec == *rit_dbuf.ptr); + rit_dbuf = dbuf_heap_cstr_iter_next(str_dbuf, rit_dbuf, step); + if (!dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf)) { + std::advance(rit_vec, step); + } else { + rit_vec = str_vec.rend(); + } + } + TEST_CHECK(dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf) && rit_vec == str_vec.rend()); + + for (i = 0; i < str_dbuf->size; ++i) { + char *str = *dbuf_heap_cstr_at(str_dbuf, i); + TEST_CHECK(str_vec.at(i) == str); + } +} + +void DbufCstringTest::test_push_insert_erase(int n, ptrdiff_t start_cap) +{ + std::vector str_vec; + dbuf_heap_cstr str_dbuf {}; + dbuf_heap_cstr_init(&str_dbuf, start_cap, &guf_allocator_libc); + + for (int i = 0; i < n; ++i) { + constexpr int BUF_SZ = 128; + char buf[BUF_SZ]; + memset(buf, '\0', BUF_SZ); + snprintf(buf, BUF_SZ, "This is string number %d", i); + guf_cstr_heap str = buf; + + dbuf_heap_cstr_push(&str_dbuf, &str, GUF_CPY_DEEP); + dbuf_heap_cstr_push_val_cpy(&str_dbuf, str); + char *heap_buf = strdup("Move me plz"); + dbuf_heap_cstr_push(&str_dbuf, &heap_buf, GUF_CPY_MOVE); + TEST_CHECK(heap_buf == NULL); + + TEST_CHECK(strncmp(*dbuf_heap_cstr_back(&str_dbuf), "Move me plz", BUF_SZ) == 0); + TEST_CHECK(strncmp(*dbuf_heap_cstr_at(&str_dbuf, str_dbuf.size - 2), buf, BUF_SZ) == 0); + TEST_CHECK(strncmp(*dbuf_heap_cstr_at(&str_dbuf, str_dbuf.size - 3), buf, BUF_SZ) == 0); + + str_vec.push_back(std::string{buf}); + str_vec.push_back(std::string{buf}); + str_vec.emplace_back("Move me plz"); + } + + TEST_CHECK(str_dbuf.size == std::ssize(str_vec)); + TEST_CHECK(str_dbuf.size == 3 * n); + + for (int i = 1; i <= 8; ++i) { + test_iter(str_vec, &str_dbuf, i); + } + test_iter(str_vec, &str_dbuf, str_dbuf.size); + test_iter(str_vec, &str_dbuf, str_dbuf.size - 1); + test_iter(str_vec, &str_dbuf, str_dbuf.size + 1); + + for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { + TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i)); + } + + // Insert front. + for (ptrdiff_t i = 0; i < 16; ++i) { + char str[] = "front"; + dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, 0); + str_vec.insert(str_vec.begin(), std::string{str}); + } + TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); + for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { + TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i)); + } + // Insert back. + for (ptrdiff_t i = 0; i < 16; ++i) { + char str[] = "front"; + dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size); + str_vec.insert(str_vec.end(), std::string{str}); + } + TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); + for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { + TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i)); + } + + // Insert at i. + char str[] = "guf"; + dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size / 2); + str_vec.insert(str_vec.begin() + str_vec.size() / 2, str); + dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size / 4); + str_vec.insert(str_vec.begin() + str_vec.size() / 4, str); + dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, 1); + str_vec.insert(str_vec.begin() + 1, str); + dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size - 1); + str_vec.insert(str_vec.begin() + (str_vec.size() - 1), str); + + for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { + TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i)); + } + + guf_err err = GUF_ERR_NONE; + dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, str_dbuf.size + 1, &err); + TEST_CHECK(err == GUF_ERR_IDX_RANGE); + + err = GUF_ERR_NONE; + dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, -1, &err); + TEST_CHECK(err == GUF_ERR_IDX_RANGE); + + err = GUF_ERR_NONE; + dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, str_dbuf.size + 2, &err); + TEST_CHECK(err == GUF_ERR_IDX_RANGE); + + TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); + for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { + TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i)); + } + + if (str_dbuf.size) { + dbuf_heap_cstr_erase(&str_dbuf, str_dbuf.size - 1); + str_vec.erase(str_vec.end() - 1); + } + + ptrdiff_t to_rem = 8; + while (str_dbuf.size && to_rem--) { + dbuf_heap_cstr_erase(&str_dbuf, 0); + str_vec.erase(str_vec.begin()); + TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); + if (str_dbuf.size) { + dbuf_heap_cstr_pop(&str_dbuf); + str_vec.pop_back(); + TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); + } + if (str_dbuf.size) { + dbuf_heap_cstr_erase(&str_dbuf, str_dbuf.size / 2); + str_vec.erase(str_vec.begin() + (str_vec.size() / 2)); + TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); + } + } + + dbuf_heap_cstr_free(&str_dbuf, NULL); + TEST_CHECK(str_dbuf.size == 0 && str_dbuf.capacity == 0 && !str_dbuf.data); +} + +void DbufCstringTest::test_find(int n) +{ + if (n < 2) { + n = 2; + } + std::vector str_vec {}; + + dbuf_heap_cstr str_dbuf = {}; + dbuf_heap_cstr_init(&str_dbuf, 0, &guf_allocator_libc); + + for (int i = 0; i < n; ++i) { + constexpr int BUF_SZ = 128; + char buf[BUF_SZ]; + memset(buf, '\0', BUF_SZ); + snprintf(buf, BUF_SZ, "String number %d", i); + + dbuf_heap_cstr_push_val_cpy(&str_dbuf, buf); + str_vec.push_back(buf); + } + char *move_me = strdup("Moved string"); + dbuf_heap_cstr_push(&str_dbuf, &move_me, GUF_CPY_MOVE); + GUF_ASSERT_RELEASE(move_me == NULL); + str_vec.emplace_back("Moved string"); + + TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); + + for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { + char *needle = *dbuf_heap_cstr_at(&str_dbuf, i); + TEST_CHECK(str_vec.at(i) == needle); + + TEST_CHECK(dbuf_heap_cstr_contains_val(&str_dbuf, needle)); + + dbuf_heap_cstr_iter fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_end(&str_dbuf), needle); + TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); + TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cend(), needle) != str_vec.end()); + + dbuf_heap_cstr_iter begin = dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), i); + dbuf_heap_cstr_iter end = dbuf_heap_cstr_end(&str_dbuf); + fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, begin, end, needle); + TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); + TEST_CHECK(std::find(str_vec.cbegin() + i, str_vec.cend(), needle) != str_vec.end()); + + begin = dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), i + 1); + end = dbuf_heap_cstr_end(&str_dbuf); + fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, begin, end, needle); + TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); + TEST_CHECK(std::find(str_vec.cbegin() + i + 1, str_vec.cend(), needle) == str_vec.end()); + + // Reverse. + fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_rbegin(&str_dbuf), dbuf_heap_cstr_rend(&str_dbuf), needle); + TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); + TEST_CHECK(std::find(str_vec.crbegin(), str_vec.crend(), needle) != str_vec.rend()); + } + + char needle[] = "Definitely not inside"; + dbuf_heap_cstr_iter fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_end(&str_dbuf), needle); + TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); + TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cend(), needle) == str_vec.end()); + + fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_rbegin(&str_dbuf), dbuf_heap_cstr_rend(&str_dbuf), needle); + TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); + TEST_CHECK(std::find(str_vec.crbegin(), str_vec.crend(), needle) == str_vec.rend()); + + char *needle2 = *dbuf_heap_cstr_at(&str_dbuf, 0); + fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), 1), dbuf_heap_cstr_end(&str_dbuf), needle2); + TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); + TEST_CHECK(std::find(str_vec.cbegin() + 1, str_vec.cend(), needle2) == str_vec.end()); + + needle2 = *dbuf_heap_cstr_back(&str_dbuf); + fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), 1), dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_end(&str_dbuf), -1), needle2); + TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); + TEST_CHECK(std::find(str_vec.begin(), str_vec.end() - 1, needle2) == (str_vec.end() - 1)); + + needle2 = *dbuf_heap_cstr_at(&str_dbuf, 0); + fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_begin(&str_dbuf), needle2); + TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); + TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cbegin(), needle2) == str_vec.cbegin()); + + dbuf_heap_cstr_free(&str_dbuf, NULL); +} diff --git a/src/test/test_dbuf.hpp b/src/test/test_dbuf.hpp index 14432da..2eb3db7 100644 --- a/src/test/test_dbuf.hpp +++ b/src/test/test_dbuf.hpp @@ -11,529 +11,22 @@ extern "C" struct DbufIntTest : public Test { DbufIntTest(const std::string& name) : Test(name) {}; + void run() override; - private: - - std::vector dbuf_to_vec(dbuf_int *dbuf) - { - std::vector vec; - GUF_CNT_FOREACH(dbuf, dbuf_int, it) { - vec.push_back(*it.ptr); - } - return vec; - } - - void test_push(dbuf_int *dbuf, int n) - { - std::vector vec = dbuf_to_vec(dbuf); - - TEST_CHECK(std::ssize(vec) == dbuf->size); - - for (int i = 0; i < n; ++i) { - dbuf_int_push_val(dbuf, i); - vec.push_back(i); - TEST_CHECK(*dbuf_int_back(dbuf) == vec.back()); - } - - ptrdiff_t i = 0; - GUF_CNT_FOREACH(dbuf, dbuf_int, it) { - TEST_CHECK(*it.ptr == vec.at(i++)); - } - TEST_CHECK(i == dbuf->size); - - i = dbuf->size - 1; - GUF_CNT_FOREACH_REVERSE(dbuf, dbuf_int, rit) { - TEST_CHECK(*rit.ptr == vec.at(i--)); - } - TEST_CHECK(i == -1); - - } - - void test_insert_remove(int n) - { - dbuf_int dbuf = {}; - dbuf_int_init(&dbuf, 0, &guf_allocator_libc); - std::vector vec = dbuf_to_vec(&dbuf); - - guf_err err = GUF_ERR_NONE; - dbuf_int_try_erase(&dbuf, 0, &err); - TEST_CHECK(err == GUF_ERR_IDX_RANGE); - - err = GUF_ERR_NONE; - dbuf_int_try_erase(&dbuf, 12, &err); - TEST_CHECK(err == GUF_ERR_IDX_RANGE); - - err = GUF_ERR_NONE; - dbuf_int_try_front(&dbuf, &err); - TEST_CHECK(err == GUF_ERR_IDX_RANGE); - - err = GUF_ERR_NONE; - dbuf_int_try_back(&dbuf, &err); - TEST_CHECK(err == GUF_ERR_IDX_RANGE); - - err = GUF_ERR_NONE; - dbuf_int_try_at(&dbuf, 0, &err); - TEST_CHECK(err == GUF_ERR_IDX_RANGE); - - for (int i = 0; i < n; ++i) { - dbuf_int_insert_val(&dbuf, i, i); - dbuf_int_insert_val(&dbuf, i * 2, 0); - dbuf_int_insert_val(&dbuf, i * 4, dbuf.size); - - vec.insert(vec.begin() + i, i); - vec.insert(vec.begin(), i * 2); - vec.insert(vec.end(), i * 4); - } - TEST_CHECK(std::ssize(vec) == dbuf.size); - - // Iterate - dbuf_int_iter it_dbuf = dbuf_int_begin(&dbuf); - std::vector::const_iterator it_vec = vec.begin(); - while (!dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec != vec.end()) { - TEST_CHECK(*it_dbuf.ptr == *it_vec); - it_dbuf = dbuf_int_iter_next(&dbuf, it_dbuf, 1); - std::advance(it_vec, 1); - } - TEST_CHECK(dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec == vec.end()); - - // Step iterate. - it_dbuf = dbuf_int_begin(&dbuf); - it_vec = vec.begin(); - while (!dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec != vec.end()) { - TEST_CHECK(*it_dbuf.ptr == *it_vec); - it_dbuf = dbuf_int_iter_next(&dbuf, it_dbuf, 7); - - if (dbuf_int_iter_is_end(&dbuf, it_dbuf)) { - it_vec = vec.end(); - } else { - std::advance(it_vec, 7); - } - } - TEST_CHECK(dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec == vec.end()); - - // Reverse iterate. - dbuf_int_iter rit_dbuf = dbuf_int_rbegin(&dbuf); - std::vector::const_reverse_iterator rit_vec = vec.crbegin(); - while (!dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec != vec.crend()) { - TEST_CHECK(*rit_dbuf.ptr == *rit_vec); - rit_dbuf = dbuf_int_iter_next(&dbuf, rit_dbuf, 1); - std::advance(rit_vec, 1); - } - TEST_CHECK(dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec == vec.rend()); - - // Reverse iterate step. - rit_dbuf = dbuf_int_rbegin(&dbuf); - rit_vec = vec.crbegin(); - while (!dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec != vec.crend()) { - TEST_CHECK(*rit_dbuf.ptr == *rit_vec); - rit_dbuf = dbuf_int_iter_next(&dbuf, rit_dbuf, 4); - if (dbuf_int_iter_is_end(&dbuf, rit_dbuf)) { - rit_vec = vec.rend(); - } else { - std::advance(rit_vec, 4); - } - } - TEST_CHECK(dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec == vec.rend()); - - TEST_CHECK(dbuf.size == std::ssize(vec)); - - for (ptrdiff_t i = 0; i < dbuf.size; i += 8) { - dbuf_int_erase(&dbuf, i); - dbuf_int_erase(&dbuf, 0); - dbuf_int_pop(&dbuf); - - vec.erase(vec.begin() + i); - vec.erase(vec.begin() + 0); - vec.pop_back(); - } - - TEST_CHECK(dbuf.size == std::ssize(vec)); - - for (ptrdiff_t i = 0; i < dbuf.size; i += 8) { - TEST_CHECK(*dbuf_int_at(&dbuf, i) == vec.at(i)); - } - - const ptrdiff_t size = dbuf.size; - for (ptrdiff_t i = 0; i < size; ++i) { - int a = dbuf_int_pop_move(&dbuf); - int b = vec.back(); - TEST_CHECK(a == b); - vec.pop_back(); - } - TEST_CHECK(dbuf.size == 0 && vec.size() == 0); - - dbuf_int_free(&dbuf, NULL); - TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && !dbuf.data); - } - - public: - - bool run() override - { - if (done) { - return passed; - } - - dbuf_int dbuf {}; - dbuf_int_init(&dbuf, 0, &guf_allocator_libc); - - push_check_name("test_push"); - - test_push(&dbuf, 256); - test_push(&dbuf, 128); - test_push(&dbuf, 17); - TEST_CHECK(dbuf.size == (256 + 128 + 17)); - - dbuf_int_free(&dbuf, NULL); - TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && dbuf.data == NULL); - - dbuf_int_init(&dbuf, 24, &guf_allocator_libc); - TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 24 && dbuf.data); - - test_push(&dbuf, 365); - test_push(&dbuf, 4); - test_push(&dbuf, 25); - test_push(&dbuf, 64); - TEST_CHECK(dbuf.size == (365 + 4 + 25 + 64)); - - dbuf_int_free(&dbuf, NULL); - TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && dbuf.data == NULL); - - pop_check_name(); - - push_check_name("insert_remove"); - - for (int n = 0; n <= 128; ++n) { - test_insert_remove(n); - } - test_insert_remove(400); - test_insert_remove(401); - test_insert_remove(512); - test_insert_remove(513); - test_insert_remove(601); - test_insert_remove(2048); - test_insert_remove(2049); - - pop_check_name(); - - done = true; - passed = (num_failed_checks == 0); - return passed; - } +private: + std::vector dbuf_to_vec(dbuf_int *dbuf); + void test_push(dbuf_int *dbuf, int n); + void test_insert_remove(int n); }; struct DbufCstringTest : public Test { DbufCstringTest(std::string name) : Test(name) {}; + void run() override; - private: - - void test_iter(std::vector& str_vec, dbuf_heap_cstr *str_dbuf, int step = 1) - { - GUF_ASSERT_RELEASE(str_dbuf); - if (step <= 0) { - step = 1; - } - - ptrdiff_t i = 0; - GUF_CNT_FOREACH(str_dbuf, dbuf_heap_cstr, it) { - char *str = *it.ptr; - TEST_CHECK(str_vec.at(i) == str); - ++i; - } - TEST_CHECK(i == str_dbuf->size); - - i = str_dbuf->size - 1; - GUF_CNT_FOREACH_REVERSE(str_dbuf, dbuf_heap_cstr, rit) { - char *str = *rit.ptr; - TEST_CHECK(str_vec.at(i) == str); - --i; - } - TEST_CHECK(i == -1); - - dbuf_heap_cstr_iter it_dbuf = dbuf_heap_cstr_begin(str_dbuf); - std::vector::iterator it_vec = str_vec.begin(); - while (!dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf)) { - TEST_CHECK(it_vec != str_vec.end()); - TEST_CHECK(*it_vec == *it_dbuf.ptr); - it_dbuf = dbuf_heap_cstr_iter_next(str_dbuf, it_dbuf, step); - if (!dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf)) { - std::advance(it_vec, step); - } else { - it_vec = str_vec.end(); - } - } - TEST_CHECK(dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf) && it_vec == str_vec.end()); - - dbuf_heap_cstr_iter rit_dbuf = dbuf_heap_cstr_rbegin(str_dbuf); - std::vector::reverse_iterator rit_vec = str_vec.rbegin(); - while (!dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf)) { - TEST_CHECK(rit_vec != str_vec.rend()); - TEST_CHECK(*rit_vec == *rit_dbuf.ptr); - rit_dbuf = dbuf_heap_cstr_iter_next(str_dbuf, rit_dbuf, step); - if (!dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf)) { - std::advance(rit_vec, step); - } else { - rit_vec = str_vec.rend(); - } - } - TEST_CHECK(dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf) && rit_vec == str_vec.rend()); - - for (i = 0; i < str_dbuf->size; ++i) { - char *str = *dbuf_heap_cstr_at(str_dbuf, i); - TEST_CHECK(str_vec.at(i) == str); - } - } - - void test_push_insert_erase(int n, ptrdiff_t start_cap = 0) - { - std::vector str_vec; - dbuf_heap_cstr str_dbuf {}; - dbuf_heap_cstr_init(&str_dbuf, start_cap, &guf_allocator_libc); - - for (int i = 0; i < n; ++i) { - constexpr int BUF_SZ = 128; - char buf[BUF_SZ]; - memset(buf, '\0', BUF_SZ); - snprintf(buf, BUF_SZ, "This is string number %d", i); - guf_cstr_heap str = buf; - - dbuf_heap_cstr_push(&str_dbuf, &str, GUF_CPY_DEEP); - dbuf_heap_cstr_push_val_cpy(&str_dbuf, str); - char *heap_buf = strdup("Move me plz"); - dbuf_heap_cstr_push(&str_dbuf, &heap_buf, GUF_CPY_MOVE); - TEST_CHECK(heap_buf == NULL); - - TEST_CHECK(strncmp(*dbuf_heap_cstr_back(&str_dbuf), "Move me plz", BUF_SZ) == 0); - TEST_CHECK(strncmp(*dbuf_heap_cstr_at(&str_dbuf, str_dbuf.size - 2), buf, BUF_SZ) == 0); - TEST_CHECK(strncmp(*dbuf_heap_cstr_at(&str_dbuf, str_dbuf.size - 3), buf, BUF_SZ) == 0); - - str_vec.push_back(std::string{buf}); - str_vec.push_back(std::string{buf}); - str_vec.emplace_back("Move me plz"); - } - - TEST_CHECK(str_dbuf.size == std::ssize(str_vec)); - TEST_CHECK(str_dbuf.size == 3 * n); - - for (int i = 1; i <= 8; ++i) { - test_iter(str_vec, &str_dbuf, i); - } - test_iter(str_vec, &str_dbuf, str_dbuf.size); - test_iter(str_vec, &str_dbuf, str_dbuf.size - 1); - test_iter(str_vec, &str_dbuf, str_dbuf.size + 1); - - for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { - TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i)); - } - - // Insert front. - for (ptrdiff_t i = 0; i < 16; ++i) { - char str[] = "front"; - dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, 0); - str_vec.insert(str_vec.begin(), std::string{str}); - } - TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); - for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { - TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i)); - } - // Insert back. - for (ptrdiff_t i = 0; i < 16; ++i) { - char str[] = "front"; - dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size); - str_vec.insert(str_vec.end(), std::string{str}); - } - TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); - for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { - TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i)); - } - - // Insert at i. - char str[] = "guf"; - dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size / 2); - str_vec.insert(str_vec.begin() + str_vec.size() / 2, str); - dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size / 4); - str_vec.insert(str_vec.begin() + str_vec.size() / 4, str); - dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, 1); - str_vec.insert(str_vec.begin() + 1, str); - dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size - 1); - str_vec.insert(str_vec.begin() + (str_vec.size() - 1), str); - - for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { - TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i)); - } - - guf_err err = GUF_ERR_NONE; - dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, str_dbuf.size + 1, &err); - TEST_CHECK(err == GUF_ERR_IDX_RANGE); - - err = GUF_ERR_NONE; - dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, -1, &err); - TEST_CHECK(err == GUF_ERR_IDX_RANGE); - - err = GUF_ERR_NONE; - dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, str_dbuf.size + 2, &err); - TEST_CHECK(err == GUF_ERR_IDX_RANGE); - - TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); - for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { - TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i)); - } - - if (str_dbuf.size) { - dbuf_heap_cstr_erase(&str_dbuf, str_dbuf.size - 1); - str_vec.erase(str_vec.end() - 1); - } - - ptrdiff_t to_rem = 8; - while (str_dbuf.size && to_rem--) { - dbuf_heap_cstr_erase(&str_dbuf, 0); - str_vec.erase(str_vec.begin()); - TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); - if (str_dbuf.size) { - dbuf_heap_cstr_pop(&str_dbuf); - str_vec.pop_back(); - TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); - } - if (str_dbuf.size) { - dbuf_heap_cstr_erase(&str_dbuf, str_dbuf.size / 2); - str_vec.erase(str_vec.begin() + (str_vec.size() / 2)); - TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); - } - } - - dbuf_heap_cstr_free(&str_dbuf, NULL); - TEST_CHECK(str_dbuf.size == 0 && str_dbuf.capacity == 0 && !str_dbuf.data); - } - - void test_find(int n = 32) - { - if (n < 2) { - n = 2; - } - std::vector str_vec {}; - - dbuf_heap_cstr str_dbuf = {}; - dbuf_heap_cstr_init(&str_dbuf, 0, &guf_allocator_libc); - - for (int i = 0; i < n; ++i) { - constexpr int BUF_SZ = 128; - char buf[BUF_SZ]; - memset(buf, '\0', BUF_SZ); - snprintf(buf, BUF_SZ, "String number %d", i); - - dbuf_heap_cstr_push_val_cpy(&str_dbuf, buf); - str_vec.push_back(buf); - } - char *move_me = strdup("Moved string"); - dbuf_heap_cstr_push(&str_dbuf, &move_me, GUF_CPY_MOVE); - GUF_ASSERT_RELEASE(move_me == NULL); - str_vec.emplace_back("Moved string"); - - TEST_CHECK(std::ssize(str_vec) == str_dbuf.size); - - for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) { - char *needle = *dbuf_heap_cstr_at(&str_dbuf, i); - TEST_CHECK(str_vec.at(i) == needle); - - TEST_CHECK(dbuf_heap_cstr_contains_val(&str_dbuf, needle)); - - dbuf_heap_cstr_iter fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_end(&str_dbuf), needle); - TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); - TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cend(), needle) != str_vec.end()); - - dbuf_heap_cstr_iter begin = dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), i); - dbuf_heap_cstr_iter end = dbuf_heap_cstr_end(&str_dbuf); - fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, begin, end, needle); - TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); - TEST_CHECK(std::find(str_vec.cbegin() + i, str_vec.cend(), needle) != str_vec.end()); - - begin = dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), i + 1); - end = dbuf_heap_cstr_end(&str_dbuf); - fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, begin, end, needle); - TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); - TEST_CHECK(std::find(str_vec.cbegin() + i + 1, str_vec.cend(), needle) == str_vec.end()); - - // Reverse. - fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_rbegin(&str_dbuf), dbuf_heap_cstr_rend(&str_dbuf), needle); - TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); - TEST_CHECK(std::find(str_vec.crbegin(), str_vec.crend(), needle) != str_vec.rend()); - } - - char needle[] = "Definitely not inside"; - dbuf_heap_cstr_iter fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_end(&str_dbuf), needle); - TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); - TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cend(), needle) == str_vec.end()); - - fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_rbegin(&str_dbuf), dbuf_heap_cstr_rend(&str_dbuf), needle); - TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); - TEST_CHECK(std::find(str_vec.crbegin(), str_vec.crend(), needle) == str_vec.rend()); - - char *needle2 = *dbuf_heap_cstr_at(&str_dbuf, 0); - fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), 1), dbuf_heap_cstr_end(&str_dbuf), needle2); - TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); - TEST_CHECK(std::find(str_vec.cbegin() + 1, str_vec.cend(), needle2) == str_vec.end()); - - needle2 = *dbuf_heap_cstr_back(&str_dbuf); - fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), 1), dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_end(&str_dbuf), -1), needle2); - TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); - TEST_CHECK(std::find(str_vec.begin(), str_vec.end() - 1, needle2) == (str_vec.end() - 1)); - - needle2 = *dbuf_heap_cstr_at(&str_dbuf, 0); - fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_begin(&str_dbuf), needle2); - TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it)); - TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cbegin(), needle2) == str_vec.cbegin()); - - dbuf_heap_cstr_free(&str_dbuf, NULL); - } - - public: - - bool run() - { - push_check_name("push_insert_erase"); - - for (int i = 1; i <= 32; ++i) { - test_push_insert_erase(i); - test_push_insert_erase(i, i - 1); - test_push_insert_erase(i, i + 1); - test_push_insert_erase(i, i); - test_push_insert_erase(i, i / 2); - } - test_push_insert_erase(2048); - test_push_insert_erase(2048, 11); - - dbuf_heap_cstr str_dbuf = {}; - dbuf_heap_cstr_init(&str_dbuf, 0, &guf_allocator_libc); - std::vector str_vec {}; - - for (int i = 0; i < 512; ++i) { - char buf[128]; - memset(buf, '\0', GUF_ARR_SIZE(buf)); - snprintf(buf, GUF_ARR_SIZE(buf), "This is a pretty guf string (number %d)", i); - guf_cstr_heap str = buf; - dbuf_heap_cstr_push(&str_dbuf, &str, GUF_CPY_DEEP); - str_vec.push_back(std::string{buf}); - } - for (int i = 0; i < str_dbuf.size + 16; ++i) { - test_iter(str_vec, &str_dbuf, i); - } - - dbuf_heap_cstr_free(&str_dbuf, NULL); - TEST_CHECK(str_dbuf.size == 0 && str_dbuf.capacity == 0 && !str_dbuf.data); - - pop_check_name(); - - push_check_name("find"); - test_find(); - test_find(3); - test_find(42); - test_find(129); - pop_check_name(); - - done = true; - passed = (num_failed_checks == 0); - return passed; - } +private: + void test_iter(std::vector& str_vec, dbuf_heap_cstr *str_dbuf, int step = 1); + void test_push_insert_erase(int n, ptrdiff_t start_cap = 0); + void test_find(int n = 32); }; diff --git a/src/test/test_dict.cpp b/src/test/test_dict.cpp new file mode 100644 index 0000000..530f7c1 --- /dev/null +++ b/src/test/test_dict.cpp @@ -0,0 +1,368 @@ +#include "test_dict.hpp" + +#include +#include +extern "C" +{ + #include "guf_alloc_libc.h" + #include "guf_str.h" + #include "impls/dict_impl.h" + #include "impls/dbuf_impl.h" +} + +/* + DictSvToIntTest: +*/ + +void DictSvToIntTest::run() +{ + if (done) { + return; + } + + push_check_name("insert_lookup(\"utf8-test.txt\")"); + if (TEST_CHECK(load_file(TEST_DATA_DIR "/utf8-test.txt"))) { + insert_lookup(); + for (ptrdiff_t i = 0; i <= 64; ++i) { + insert_lookup(i); + } + insert_lookup(512); + insert_lookup(1997); + insert_lookup(1999); + } + free_file(); + pop_check_name(); + + push_check_name("insert_lookup(\"bartleby.txt\")"); + if (TEST_CHECK(load_file(TEST_DATA_DIR "/bartleby.txt"))) { + insert_lookup(); + insert_lookup(201); + } + free_file(); + pop_check_name(); +} + +void DictSvToIntTest::insert_lookup(std::optional inital_dict_cap) +{ + std::unordered_map word_cnt_map {}; + dict_sv_i32 word_cnt_dict {}; + if (inital_dict_cap) { + dict_sv_i32_init_with_capacity(&word_cnt_dict, &guf_allocator_libc, inital_dict_cap.value()); + } else { + dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc); + } + + dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc); + for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) { + guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]}; + dbuf_str_view_push_val(&delims, d); + } + for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) { + guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]}; + dbuf_str_view_push_val(&delims, d); + } + + guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims.data, delims.size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); + while (guf_str_tok_next(&tok_state, true)) { + guf_str_view tok = tok_state.cur_tok; + // if (tok.len <= 0) { + // continue; + // } + std::string_view sv(tok.str , tok.len); + //std::cout << sv << std::string_view(tok_state.cur_delim.str, tok_state.cur_delim.len); + TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &tok) == word_cnt_map.contains(sv)); + if (!dict_sv_i32_contains(&word_cnt_dict, &tok)) { + dict_sv_i32_insert_val_arg(&word_cnt_dict, tok, 1, GUF_CPY_VALUE, GUF_CPY_VALUE); + word_cnt_map.insert({sv, 1}); + } else { + int32_t *cnt = dict_sv_i32_at_val_arg(&word_cnt_dict, tok); + if (TEST_CHECK(cnt)) { + *cnt += 1; + } + word_cnt_map.at(sv) += 1; + } + // printf("tok_len: %td ", tok.len); + // printf("'%.*s'\n", (int)tok.len, tok.str); + TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); + } + dbuf_str_view_free(&delims, NULL); + + TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map)); + TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); + + for (const auto & [word, cnt] : word_cnt_map ) { + guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()}; + int32_t *res = dict_sv_i32_at(&word_cnt_dict, &sv); + TEST_CHECK(res && *res == cnt); + } + + ptrdiff_t i = 0; + GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) { + const dict_sv_i32_kv *kv = kv_it.ptr; + if (TEST_CHECK(kv)) { + const int32_t cnt = kv->val; + // printf("%.*s: %d\n", (int)kv->key.len, kv->key.str, cnt); + const std::string_view sv(kv->key.str, kv->key.len); + if (TEST_CHECK(word_cnt_map.contains(sv))) { + TEST_CHECK(word_cnt_map.at(sv) == cnt); + } + } + ++i; + } + TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict)); + TEST_CHECK(i == std::ssize(word_cnt_map)); + TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); + + // std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << " elem cap: " << word_cnt_dict.kv_elems.capacity << "\n"; + // std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n"; + // std::cout << "mem usage: " << dict_sv_i32_memory_usage(&word_cnt_dict) << "\n"; + + // Erase tests: + const double load_fac_before_erase = dict_sv_i32_load_factor(&word_cnt_dict); + const ptrdiff_t size_before_erase = dict_sv_i32_size(&word_cnt_dict); + ptrdiff_t num_del = 0; + while (dict_sv_i32_size(&word_cnt_dict) > size_before_erase / 2) { + dict_sv_i32_kv *kv = NULL; + if (num_del % 2) { + dict_sv_i32_iter it = dict_sv_i32_begin(&word_cnt_dict); + GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); + kv = it.ptr; + } else { + dict_sv_i32_iter rit = dict_sv_i32_rbegin(&word_cnt_dict); + GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, rit)); + kv = rit.ptr; + } + GUF_ASSERT_RELEASE(kv); + + const guf_str_view key = kv->key; + + const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); + TEST_CHECK(del_success); + TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); + + std::string_view sv(key.str, (size_t)key.len); + if (TEST_CHECK(word_cnt_map.contains(sv))) { + word_cnt_map.erase(sv); + } + TEST_CHECK(!word_cnt_map.contains(sv)); + + if (del_success) { + ++num_del; + } + } + TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) >= 0); + TEST_CHECK(size_before_erase - num_del == dict_sv_i32_size(&word_cnt_dict)); + TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict)); + + if (dict_sv_i32_size(&word_cnt_dict) != 0) { + TEST_CHECK(load_fac_before_erase == dict_sv_i32_load_factor(&word_cnt_dict)); + } else { + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); + } + + if (dict_sv_i32_size(&word_cnt_dict) >= 4) { + dict_sv_i32_kv_dbuf_iter it = dict_sv_i32_begin(&word_cnt_dict); + it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1); + GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); + + guf_str_view key = it.ptr->key; + + bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); + TEST_CHECK(del_success); + TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); + + std::string_view sv(key.str, (size_t)key.len); + if (TEST_CHECK(word_cnt_map.contains(sv))) { + word_cnt_map.erase(sv); + } + + it = dict_sv_i32_rbegin(&word_cnt_dict); + it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1); + GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); + key = it.ptr->key; + + del_success = dict_sv_i32_erase(&word_cnt_dict, &key); + TEST_CHECK(del_success); + TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); + + sv = std::string_view(key.str, (size_t)key.len); + if (TEST_CHECK(word_cnt_map.contains(sv))) { + word_cnt_map.erase(sv); + } + } + TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict)); + + i = 0; + GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) { + const dict_sv_i32_kv *kv = kv_it.ptr; + if (TEST_CHECK(kv)) { + const int32_t cnt = kv->val; + const std::string_view sv(kv->key.str, (size_t)kv->key.len); + if (TEST_CHECK(word_cnt_map.contains(sv))) { + TEST_CHECK(word_cnt_map.at(sv) == cnt); + } + ++i; + } + } + TEST_CHECK(i == word_cnt_dict.kv_elems.size); + TEST_CHECK(i == std::ssize(word_cnt_map)); + + while (dict_sv_i32_size(&word_cnt_dict) > 0) { + const dict_sv_i32_iter beg = dict_sv_i32_begin(&word_cnt_dict); + if (TEST_CHECK(!dict_sv_i32_iter_is_end(&word_cnt_dict, beg))) { + const guf_str_view key = beg.ptr->key; + if (TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &key))) { + const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); + TEST_CHECK(del_success); + TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); + } + const std::string_view sv(key.str, (size_t)key.len); + if (TEST_CHECK(word_cnt_map.contains(sv))) { + word_cnt_map.erase(sv); + } + } + } + TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0 && word_cnt_map.size() == 0); + TEST_CHECK(word_cnt_dict.num_tombstones == 0); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); + + dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), (size_t)64, GUF_CPY_VALUE, GUF_CPY_VALUE); + dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), (size_t)128, GUF_CPY_VALUE, GUF_CPY_VALUE); + dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), (size_t)256, GUF_CPY_VALUE, GUF_CPY_VALUE); + dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), (size_t)512, GUF_CPY_VALUE, GUF_CPY_VALUE); + dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), (size_t)1024, GUF_CPY_VALUE, GUF_CPY_VALUE); + + TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 5); + + int32_t *val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej")); + TEST_CHECK(val && *val == 64); + val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum")); + TEST_CHECK(val && *val == 256); + val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi.")); + TEST_CHECK(val && *val == 1024); + val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!")); + TEST_CHECK(val && *val == 128); + val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med")); + TEST_CHECK(val && *val == 512); + + TEST_CHECK(word_cnt_dict.kv_elems.size == 5); + + TEST_CHECK(word_cnt_dict.kv_elems.data[0].val == 64 && std::strcmp(word_cnt_dict.kv_elems.data[0].key.str, "Hej") == 0); + TEST_CHECK(word_cnt_dict.kv_elems.data[1].val == 128 && std::strcmp(word_cnt_dict.kv_elems.data[1].key.str, "verden!") == 0); + TEST_CHECK(word_cnt_dict.kv_elems.data[2].val == 256 && std::strcmp(word_cnt_dict.kv_elems.data[2].key.str, "Flødeskum") == 0); + TEST_CHECK(word_cnt_dict.kv_elems.data[3].val == 512 && std::strcmp(word_cnt_dict.kv_elems.data[3].key.str, "med") == 0); + TEST_CHECK(word_cnt_dict.kv_elems.data[4].val == 1024 && std::strcmp(word_cnt_dict.kv_elems.data[4].key.str, "Faxe Kondi.") == 0); + + const double load_fac_beg = dict_sv_i32_load_factor(&word_cnt_dict); + const ptrdiff_t cap_begin = word_cnt_dict.kv_indices_cap; + ptrdiff_t del = 0; + + TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"))); + TEST_CHECK(word_cnt_dict.num_tombstones == ++del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + for (ptrdiff_t n = 0; n < cap_begin + 128; ++n) { + dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), 64, GUF_CPY_VALUE, GUF_CPY_VALUE); + TEST_CHECK(word_cnt_dict.num_tombstones == --del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + + TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"))); + TEST_CHECK(word_cnt_dict.num_tombstones == ++del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + } + TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); + + + TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."))); + TEST_CHECK(word_cnt_dict.num_tombstones == ++del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + for (ptrdiff_t n = 0; n < 256; ++n) { + dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), 128, GUF_CPY_VALUE, GUF_CPY_VALUE); + TEST_CHECK(word_cnt_dict.num_tombstones == --del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + + TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."))); + TEST_CHECK(word_cnt_dict.num_tombstones == ++del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + } + TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); + + TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"))); + TEST_CHECK(word_cnt_dict.num_tombstones == ++del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + for (ptrdiff_t n = 0; n < 512 + cap_begin; ++n) { + dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), 256, GUF_CPY_VALUE, GUF_CPY_VALUE); + TEST_CHECK(word_cnt_dict.num_tombstones == --del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + + TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"))); + TEST_CHECK(word_cnt_dict.num_tombstones == ++del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + } + TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); + + TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"))); + TEST_CHECK(word_cnt_dict.num_tombstones == ++del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + for (ptrdiff_t n = 0; n < 71; ++n) { + dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), 512, GUF_CPY_VALUE, GUF_CPY_VALUE); + TEST_CHECK(word_cnt_dict.num_tombstones == --del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + + TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"))); + TEST_CHECK(word_cnt_dict.num_tombstones == ++del); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); + } + TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); + + TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"))); + TEST_CHECK(word_cnt_dict.num_tombstones == 0); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); + for (ptrdiff_t n = 0; n < 201; ++n) { + dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), 128, GUF_CPY_VALUE, GUF_CPY_VALUE); + TEST_CHECK(word_cnt_dict.num_tombstones == 0); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) > 0); + + TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"))); + TEST_CHECK(word_cnt_dict.num_tombstones == 0); + TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); + } + TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); + + TEST_CHECK(word_cnt_dict.kv_elems.size == 0); + TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0); + + dict_sv_i32_free(&word_cnt_dict, NULL); + bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size; + TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones); +} + +bool DictSvToIntTest::load_file(const char *fname) +{ + FILE *in_file {nullptr}; + if (!in_file) { + in_file = fopen(fname, "r"); + } + + GUF_ASSERT_RELEASE(in_file); + + dbuf_char_init(&text_buf, 128, &guf_allocator_libc); + + int c = EOF; + while ((c = fgetc(in_file)) != EOF) { + dbuf_char_push_val(&text_buf, (char)c); + text_vec.push_back((char)c); + } + fclose(in_file); + + // dbuf_char_insert_val(&text_buf, '\xC0', 1); + // text_vec.insert(text_vec.cbegin() + 1, '\xC0'); + + return TEST_CHECK(std::ssize(text_vec) == text_buf.size); +} + +void DictSvToIntTest::free_file() +{ + dbuf_char_free(&text_buf, NULL); + text_buf = {}; + text_vec.clear(); +} diff --git a/src/test/test_dict.hpp b/src/test/test_dict.hpp index fb158c5..a1da0cd 100644 --- a/src/test/test_dict.hpp +++ b/src/test/test_dict.hpp @@ -1,376 +1,25 @@ #pragma once -#include -#include +#include +#include +#include #include "test.hpp" extern "C" { - #include "guf_alloc_libc.h" - #include "guf_str.h" - #include "impls/dict_impl.h" + #include "impls/dbuf_impl.h" } struct DictSvToIntTest : public Test { DictSvToIntTest(const std::string& name) : Test(name) {}; + void run() override; private: dbuf_char text_buf {}; std::vector text_vec {}; + + bool load_file(const char *fname); + void free_file(); - void insert_lookup(std::optional inital_dict_cap = {}) - { - std::unordered_map word_cnt_map {}; - dict_sv_i32 word_cnt_dict {}; - if (inital_dict_cap) { - dict_sv_i32_init_with_capacity(&word_cnt_dict, &guf_allocator_libc, inital_dict_cap.value()); - } else { - dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc); - } - - dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc); - for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) { - guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]}; - dbuf_str_view_push_val(&delims, d); - } - for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) { - guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]}; - dbuf_str_view_push_val(&delims, d); - } - - guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims.data, delims.size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); - while (guf_str_tok_next(&tok_state, true)) { - guf_str_view tok = tok_state.cur_tok; - // if (tok.len <= 0) { - // continue; - // } - std::string_view sv(tok.str , tok.len); - //std::cout << sv << std::string_view(tok_state.cur_delim.str, tok_state.cur_delim.len); - TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &tok) == word_cnt_map.contains(sv)); - if (!dict_sv_i32_contains(&word_cnt_dict, &tok)) { - dict_sv_i32_insert_val_arg(&word_cnt_dict, tok, 1, GUF_CPY_VALUE, GUF_CPY_VALUE); - word_cnt_map.insert({sv, 1}); - } else { - int32_t *cnt = dict_sv_i32_at_val_arg(&word_cnt_dict, tok); - if (TEST_CHECK(cnt)) { - *cnt += 1; - } - word_cnt_map.at(sv) += 1; - } - // printf("tok_len: %td ", tok.len); - // printf("'%.*s'\n", (int)tok.len, tok.str); - TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); - } - dbuf_str_view_free(&delims, NULL); - - TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map)); - TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); - - for (const auto & [word, cnt] : word_cnt_map ) { - guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()}; - int32_t *res = dict_sv_i32_at(&word_cnt_dict, &sv); - TEST_CHECK(res && *res == cnt); - } - - ptrdiff_t i = 0; - GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) { - const dict_sv_i32_kv *kv = kv_it.ptr; - if (TEST_CHECK(kv)) { - const int32_t cnt = kv->val; - // printf("%.*s: %d\n", (int)kv->key.len, kv->key.str, cnt); - const std::string_view sv(kv->key.str, kv->key.len); - if (TEST_CHECK(word_cnt_map.contains(sv))) { - TEST_CHECK(word_cnt_map.at(sv) == cnt); - } - } - ++i; - } - TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict)); - TEST_CHECK(i == std::ssize(word_cnt_map)); - TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); - - // std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << " elem cap: " << word_cnt_dict.kv_elems.capacity << "\n"; - // std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n"; - // std::cout << "mem usage: " << dict_sv_i32_memory_usage(&word_cnt_dict) << "\n"; - - // Erase tests: - const double load_fac_before_erase = dict_sv_i32_load_factor(&word_cnt_dict); - const ptrdiff_t size_before_erase = dict_sv_i32_size(&word_cnt_dict); - ptrdiff_t num_del = 0; - while (dict_sv_i32_size(&word_cnt_dict) > size_before_erase / 2) { - dict_sv_i32_kv *kv = NULL; - if (num_del % 2) { - dict_sv_i32_iter it = dict_sv_i32_begin(&word_cnt_dict); - GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); - kv = it.ptr; - } else { - dict_sv_i32_iter rit = dict_sv_i32_rbegin(&word_cnt_dict); - GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, rit)); - kv = rit.ptr; - } - GUF_ASSERT_RELEASE(kv); - - const guf_str_view key = kv->key; - - const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); - TEST_CHECK(del_success); - TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); - - std::string_view sv(key.str, (size_t)key.len); - if (TEST_CHECK(word_cnt_map.contains(sv))) { - word_cnt_map.erase(sv); - } - TEST_CHECK(!word_cnt_map.contains(sv)); - - if (del_success) { - ++num_del; - } - } - TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) >= 0); - TEST_CHECK(size_before_erase - num_del == dict_sv_i32_size(&word_cnt_dict)); - TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict)); - - if (dict_sv_i32_size(&word_cnt_dict) != 0) { - TEST_CHECK(load_fac_before_erase == dict_sv_i32_load_factor(&word_cnt_dict)); - } else { - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); - } - - if (dict_sv_i32_size(&word_cnt_dict) >= 4) { - dict_sv_i32_kv_dbuf_iter it = dict_sv_i32_begin(&word_cnt_dict); - it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1); - GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); - - guf_str_view key = it.ptr->key; - - bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); - TEST_CHECK(del_success); - TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); - - std::string_view sv(key.str, (size_t)key.len); - if (TEST_CHECK(word_cnt_map.contains(sv))) { - word_cnt_map.erase(sv); - } - - it = dict_sv_i32_rbegin(&word_cnt_dict); - it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1); - GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it)); - key = it.ptr->key; - - del_success = dict_sv_i32_erase(&word_cnt_dict, &key); - TEST_CHECK(del_success); - TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); - - sv = std::string_view(key.str, (size_t)key.len); - if (TEST_CHECK(word_cnt_map.contains(sv))) { - word_cnt_map.erase(sv); - } - } - TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict)); - - i = 0; - GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) { - const dict_sv_i32_kv *kv = kv_it.ptr; - if (TEST_CHECK(kv)) { - const int32_t cnt = kv->val; - const std::string_view sv(kv->key.str, (size_t)kv->key.len); - if (TEST_CHECK(word_cnt_map.contains(sv))) { - TEST_CHECK(word_cnt_map.at(sv) == cnt); - } - ++i; - } - } - TEST_CHECK(i == word_cnt_dict.kv_elems.size); - TEST_CHECK(i == std::ssize(word_cnt_map)); - - while (dict_sv_i32_size(&word_cnt_dict) > 0) { - const dict_sv_i32_iter beg = dict_sv_i32_begin(&word_cnt_dict); - if (TEST_CHECK(!dict_sv_i32_iter_is_end(&word_cnt_dict, beg))) { - const guf_str_view key = beg.ptr->key; - if (TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &key))) { - const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key); - TEST_CHECK(del_success); - TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key)); - } - const std::string_view sv(key.str, (size_t)key.len); - if (TEST_CHECK(word_cnt_map.contains(sv))) { - word_cnt_map.erase(sv); - } - } - } - TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0 && word_cnt_map.size() == 0); - TEST_CHECK(word_cnt_dict.num_tombstones == 0); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); - - dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), (size_t)64, GUF_CPY_VALUE, GUF_CPY_VALUE); - dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), (size_t)128, GUF_CPY_VALUE, GUF_CPY_VALUE); - dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), (size_t)256, GUF_CPY_VALUE, GUF_CPY_VALUE); - dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), (size_t)512, GUF_CPY_VALUE, GUF_CPY_VALUE); - dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), (size_t)1024, GUF_CPY_VALUE, GUF_CPY_VALUE); - - TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 5); - - int32_t *val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej")); - TEST_CHECK(val && *val == 64); - val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum")); - TEST_CHECK(val && *val == 256); - val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi.")); - TEST_CHECK(val && *val == 1024); - val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!")); - TEST_CHECK(val && *val == 128); - val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med")); - TEST_CHECK(val && *val == 512); - - TEST_CHECK(word_cnt_dict.kv_elems.size == 5); - - TEST_CHECK(word_cnt_dict.kv_elems.data[0].val == 64 && std::strcmp(word_cnt_dict.kv_elems.data[0].key.str, "Hej") == 0); - TEST_CHECK(word_cnt_dict.kv_elems.data[1].val == 128 && std::strcmp(word_cnt_dict.kv_elems.data[1].key.str, "verden!") == 0); - TEST_CHECK(word_cnt_dict.kv_elems.data[2].val == 256 && std::strcmp(word_cnt_dict.kv_elems.data[2].key.str, "Flødeskum") == 0); - TEST_CHECK(word_cnt_dict.kv_elems.data[3].val == 512 && std::strcmp(word_cnt_dict.kv_elems.data[3].key.str, "med") == 0); - TEST_CHECK(word_cnt_dict.kv_elems.data[4].val == 1024 && std::strcmp(word_cnt_dict.kv_elems.data[4].key.str, "Faxe Kondi.") == 0); - - const double load_fac_beg = dict_sv_i32_load_factor(&word_cnt_dict); - const ptrdiff_t cap_begin = word_cnt_dict.kv_indices_cap; - ptrdiff_t del = 0; - - TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"))); - TEST_CHECK(word_cnt_dict.num_tombstones == ++del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - for (ptrdiff_t n = 0; n < cap_begin + 128; ++n) { - dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), 64, GUF_CPY_VALUE, GUF_CPY_VALUE); - TEST_CHECK(word_cnt_dict.num_tombstones == --del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - - TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"))); - TEST_CHECK(word_cnt_dict.num_tombstones == ++del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - } - TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); - - - TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."))); - TEST_CHECK(word_cnt_dict.num_tombstones == ++del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - for (ptrdiff_t n = 0; n < 256; ++n) { - dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), 128, GUF_CPY_VALUE, GUF_CPY_VALUE); - TEST_CHECK(word_cnt_dict.num_tombstones == --del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - - TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."))); - TEST_CHECK(word_cnt_dict.num_tombstones == ++del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - } - TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); - - TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"))); - TEST_CHECK(word_cnt_dict.num_tombstones == ++del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - for (ptrdiff_t n = 0; n < 512 + cap_begin; ++n) { - dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), 256, GUF_CPY_VALUE, GUF_CPY_VALUE); - TEST_CHECK(word_cnt_dict.num_tombstones == --del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - - TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"))); - TEST_CHECK(word_cnt_dict.num_tombstones == ++del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - } - TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); - - TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"))); - TEST_CHECK(word_cnt_dict.num_tombstones == ++del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - for (ptrdiff_t n = 0; n < 71; ++n) { - dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), 512, GUF_CPY_VALUE, GUF_CPY_VALUE); - TEST_CHECK(word_cnt_dict.num_tombstones == --del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - - TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"))); - TEST_CHECK(word_cnt_dict.num_tombstones == ++del); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg); - } - TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); - - TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"))); - TEST_CHECK(word_cnt_dict.num_tombstones == 0); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); - for (ptrdiff_t n = 0; n < 201; ++n) { - dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), 128, GUF_CPY_VALUE, GUF_CPY_VALUE); - TEST_CHECK(word_cnt_dict.num_tombstones == 0); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) > 0); - - TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"))); - TEST_CHECK(word_cnt_dict.num_tombstones == 0); - TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0); - } - TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin); - - TEST_CHECK(word_cnt_dict.kv_elems.size == 0); - TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0); - - dict_sv_i32_free(&word_cnt_dict, NULL); - bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size; - TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones); - } - - bool load_file(const char *fname) - { - FILE *in_file {nullptr}; - if (!in_file) { - in_file = fopen(fname, "r"); - } - - GUF_ASSERT_RELEASE(in_file); - - dbuf_char_init(&text_buf, 128, &guf_allocator_libc); - - int c = EOF; - while ((c = fgetc(in_file)) != EOF) { - dbuf_char_push_val(&text_buf, (char)c); - text_vec.push_back((char)c); - } - fclose(in_file); - - // dbuf_char_insert_val(&text_buf, '\xC0', 1); - // text_vec.insert(text_vec.cbegin() + 1, '\xC0'); - - return TEST_CHECK(std::ssize(text_vec) == text_buf.size); - } - - void free_file() - { - dbuf_char_free(&text_buf, NULL); - text_buf = {}; - text_vec.clear(); - } - -public: - bool run() override - { - if (done) { - return passed; - } - - if (TEST_CHECK(load_file(TEST_DATA_DIR "/utf8-test.txt"))) { - insert_lookup(); - for (ptrdiff_t i = 0; i <= 64; ++i) { - insert_lookup(i); - } - insert_lookup(512); - insert_lookup(1997); - insert_lookup(1999); - } - free_file(); - - if (TEST_CHECK(load_file(TEST_DATA_DIR "/bartleby.txt"))) { - insert_lookup(); - insert_lookup(201); - } - free_file(); - - passed = (num_failed_checks == 0); - done = true; - return passed; - } + void insert_lookup(std::optional inital_dict_cap = {}); }; - - diff --git a/src/test/test_str.cpp b/src/test/test_str.cpp new file mode 100644 index 0000000..25b83a8 --- /dev/null +++ b/src/test/test_str.cpp @@ -0,0 +1,376 @@ +#include "test_str.hpp" +extern "C" +{ + #include "guf_alloc_libc.h" +} + +/* + StrTest: +*/ + +void StrTest::run() +{ + if (done) { + return; + } + + const std::vector words = { + "", + "\0", + "Hello", + "Othell\0o", + "f\0\0", + "\0", + "0", + "a", + "ab", + "🌈 waow a rainboge!", + "orange cat(1) :3", + "xes yag", + "Hello, world! This is a pretty darn long string I'd say...", + "I want to eat crayons. I crave crayons because they are tasty, and everybody telling me crayons are not edible must be either lying or dumb. I like trains. 42 is a number. 3.14159265... is not a rational number, and it is called pi. I ate some pie (it was a crayon pie).", + std::string(32, 'a'), + std::string(64, 'b'), + std::string(1024, 'a'), + std::string(2048, 'a'), + std::string(4096, 'a'), + std::string(5001, 'a'), + std::string(7121, 'a'), + std::string(2000, 'a'), + std::string(GUF_STR_SSO_BUF_CAP, 'a'), + std::string(GUF_STR_SSO_BUF_CAP - 1, 'a'), + std::string(GUF_STR_SSO_BUF_CAP + 1, 'a'), + std::string(GUF_STR_SSO_BUF_CAP - 2, 'a'), + std::string(GUF_STR_SSO_BUF_CAP + 2, 'a'), + std::string(GUF_STR_SSO_BUF_CAP - 3, 'a'), + std::string(GUF_STR_SSO_BUF_CAP + 3, 'a'), + std::string(GUF_STR_SSO_BUF_CAP * 2, 'a'), + std::string(GUF_STR_SSO_BUF_CAP * 3, 'a'), + std::string(GUF_STR_SSO_BUF_CAP * 4, 'a'), + std::string(GUF_STR_SSO_BUF_CAP * 5, 'a'), + std::string(GUF_STR_SSO_BUF_CAP * 6, 'a'), + std::string(GUF_STR_SSO_BUF_CAP * 7, 'a'), + }; + + + push_check_name("init_empy"); + test_init_empty(); + pop_check_name(); + + push_check_name("append_char"); + for (const auto& word : words) { + test_init_free(word); + test_append_char(word); + test_append_char(word, true); + } + pop_check_name(); + + push_check_name("append_str"); + for (size_t i = 0; i < words.size(); ++i) { + const auto& w1 = words.at(i); + append_str(w1, w1); + append_str(w1, w1); + for (size_t j = i + 1; j < words.size(); ++j) { + const auto& w2 = words.at(j); + append_str(w1, w2); + append_str(w2, w1); + } + } + pop_check_name(); + + push_check_name("test_popsplit"); + std::vector split = test_popsplit("1997-04-01", "-"); + if (TEST_CHECK(split.size() == 3)) { + TEST_CHECK(split.at(0) == "1997" && split.at(1) == "04" && split.at(2) == "01"); + } + split = test_popsplit("1997-04-01-", "-"); + if (TEST_CHECK(split.size() == 3)) { + TEST_CHECK(split.at(0) == "1997" && split.at(1) == "04" && split.at(2) == "01"); + } + + split = test_popsplit("2025/05/08", "/"); + if (TEST_CHECK(split.size() == 3)) { + TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08"); + } + split = test_popsplit("2025/05/08/", "/"); + if (TEST_CHECK(split.size() == 3)) { + TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08"); + } + split = test_popsplit("2025/05/08//", "/"); + if (TEST_CHECK(split.size() == 4)) { + TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08" && split.at(3) == ""); + } + + split = test_popsplit("/2025/05/08", "/"); + if (TEST_CHECK(split.size() == 4)) { + TEST_CHECK(split.at(0) == "" && split.at(1) == "2025" && split.at(2) == "05" && split.at(3) == "08"); + } + split = test_popsplit("//2025/05/08", "/"); + if (TEST_CHECK(split.size() == 5)) { + TEST_CHECK(split.at(0) == "" && split.at(1) == "" && split.at(2) == "2025" && split.at(3) == "05" && split.at(4) == "08"); + } + + split = test_popsplit("I eat formidable crayons, oof, for real", "foo"); + if (TEST_CHECK(split.size() == 1)) { + TEST_CHECK(split.at(0) == "I eat formidable crayons, oof, for real"); + } + + split = test_popsplit("Hej <<", "<<"); + if (TEST_CHECK(split.size() == 1)) { + TEST_CHECK(split.at(0) == "Hej "); + } + split = test_popsplit("Hej << verden", "<<"); + if (TEST_CHECK(split.size() == 2)) { + TEST_CHECK(split.at(0) == "Hej " && split.at(1) == " verden"); + } + split = test_popsplit("<< Hej << verden", "<<"); + if (TEST_CHECK(split.size() == 3)) { + TEST_CHECK(split.at(0) == "" && split.at(1) == " Hej " && split.at(2) == " verden"); + } + split = test_popsplit("<< Hej << verden <<< foo<>", "<<"); + if (TEST_CHECK(split.size() == 4)) { + TEST_CHECK(split.at(0) == "" && split.at(1) == " Hej " && split.at(2) == " verden " && split.at(3) == "< foo<>"); + } + + split = test_popsplit("I eat tofu", ""); + if (TEST_CHECK(split.size() == 1)) { + TEST_CHECK(split.at(0) == "I eat tofu"); + } + + split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOP", "FULL-STOP"); + if (TEST_CHECK(split.size() == 2)) { + TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign "); + } + split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOPI like trains, FULL-STO", "FULL-STOP"); + if (TEST_CHECK(split.size() == 3)) { + TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO"); + } + split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOPI like trains, FULL-STO Poo", "FULL-STOP"); + if (TEST_CHECK(split.size() == 3)) { + TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO Poo"); + } + pop_check_name(); + + push_check_name("get_toks"); + std::vector tok_words = {"hello", "world", "cat", "vertex", "normal", "pizza", "running", "mouse", "playing", "adjacent"}; + std::vector delims = {",", " ", "\n", "\t", "\r"}; + + for (int is_trailing = 0; is_trailing < 2; ++is_trailing) { + for (ptrdiff_t num_words = 1; num_words < std::ssize(tok_words); ++num_words) { + std::string str = ""; + for (ptrdiff_t j = 0; j < num_words; ++j) { + str += tok_words.at(j); + if (j < num_words - 1 || is_trailing) { + str += ", "; + } + } + std::vector toks = get_toks(std::string_view{str}, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); + if (TEST_CHECK(std::ssize(toks) == num_words)) { + for (ptrdiff_t i = 0; i < num_words; ++i) { + TEST_CHECK(toks.at(i) == tok_words.at(i)); + } + } + } + } + + std::string_view tok_str = "age: 28, occupation: NULL, crayons_eaten: 256 "; + delims = {"", "", ":", ",", " ", "\t", "", "" && tok_result.at(0) == "" && tok_result.at(1) == "age" && tok_result.at(2) == ":" && tok_result.at(3) == " " && tok_result.at(4) == "28"); + + tok_result = get_toks(tok_str, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); + TEST_CHECK(tok_result.size() == 6); + TEST_CHECK(tok_result.at(0) == "age" && tok_result.at(1) == "28" && tok_result.at(2) == "occupation" && tok_result.at(3) == "NULL" && + tok_result.at(4) == "crayons_eaten" && tok_result.at(5) == "256"); + pop_check_name(); +} + + +void StrTest::test_init_free(std::string str) +{ + guf_str s0; + guf_str_init(&s0, GUF_CSTR_TO_VIEW_CPP(str.c_str()), &guf_allocator_libc); + guf_str s1 = guf_str_new(GUF_CSTR_TO_VIEW_CPP(str.c_str()), &guf_allocator_libc); + guf_str s2; + guf_str_init_from_cstr(&s2, str.c_str(), &guf_allocator_libc); + + TEST_CHECK(guf_str_equal(&s0, &s1)); + TEST_CHECK(guf_str_equal(&s0, &s2)); + TEST_CHECK(guf_str_equal(&s1, &s2)); + + TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s0)); + TEST_CHECK(str == guf_str_const_cstr(&s0)); + TEST_CHECK(str == guf_str_cstr(&s0)); + + TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s1)); + TEST_CHECK(str == guf_str_const_cstr(&s1)); + TEST_CHECK(str == guf_str_cstr(&s1)); + + TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s2)); + TEST_CHECK(str == guf_str_const_cstr(&s2)); + TEST_CHECK(str == guf_str_cstr(&s2)); + + guf_str_free(&s0, NULL); + guf_str_free(&s1, NULL); + guf_str_free(&s2, NULL); + TEST_CHECK(guf_str_is_uninit(&s0)); + TEST_CHECK(guf_str_is_uninit(&s1)); + TEST_CHECK(guf_str_is_uninit(&s2)); +} + +void StrTest::test_init_empty() +{ + std::string str = ""; + guf_str s = GUF_STR_UNINITIALISED_CPP; + guf_str_init_empty(&s, &guf_allocator_libc); + TEST_CHECK(guf_str_len(&s) == 0); + TEST_CHECK(str == guf_str_const_cstr(&s)); + + guf_str_append_char(&s, 'a', 1024); + str.append(1024, 'a'); + TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size()); + TEST_CHECK(guf_str_const_cstr(&s) == str); + + guf_str_append_char(&s, 'b', 24); + str.append(24, 'b'); + TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size()); + TEST_CHECK(guf_str_const_cstr(&s) == str); + + guf_str_append_char(&s, 'c', 255); + str.append(255, 'c'); + TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size()); + TEST_CHECK(guf_str_const_cstr(&s) == str); + + *guf_str_at(&s, 0) = '<'; + str.at(0) = '<'; + TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size()); + TEST_CHECK(guf_str_const_cstr(&s) == str); + + *guf_str_at(&s, guf_str_len(&s) - 1) = '>'; + str.at(str.size() - 1) = '>'; + TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size()); + TEST_CHECK(guf_str_const_cstr(&s) == str); + + guf_err err = GUF_ERR_NONE; + TEST_CHECK(NULL == guf_str_try_at(&s, guf_str_len(&s), &err)); + TEST_CHECK(err != GUF_ERR_NONE && err == GUF_ERR_IDX_RANGE); + err = GUF_ERR_NONE; + TEST_CHECK(NULL == guf_str_try_at(&s, -1, &err)); + TEST_CHECK(err != GUF_ERR_NONE && err == GUF_ERR_IDX_RANGE); + + guf_str_free(&s, NULL); + TEST_CHECK(guf_str_is_uninit(&s)); +} + +void StrTest::test_append_char(std::string str, bool include_null) +{ + guf_str s0 = guf_str_new(guf_str_view{.str = str.c_str(), .len = (ptrdiff_t)str.size()}, &guf_allocator_libc); + + TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s0)); + TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); + + for (int i = include_null ? 0 : 1; i < 128; ++i) { + char ch = (char)i; + guf_str_append_one_char(&s0, ch); + str.append(1, ch); + TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size()); + TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); + } + + for (int i = include_null ? 0 : 1; i < 128; ++i) { + char ch = (char)i; + guf_str_append_char(&s0, ch, i); + str.append(i, ch); + TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size()); + TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); + guf_str_append_char(&s0, ch, i * 16); + str.append(i * 16, ch); + TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size()); + TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); + } + + guf_str_free(&s0, NULL); + TEST_CHECK(guf_str_is_uninit(&s0)); +} + +void StrTest::append_str(const std::string& a, const std::string& b) +{ + std::string str0 = a; + guf_str s0 = guf_str_new(guf_str_view{.str = str0.c_str(), .len = (ptrdiff_t)str0.size()}, &guf_allocator_libc); + TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str0.size()); + TEST_CHECK((str0 == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); + TEST_CHECK((str0 == std::string_view{guf_str_cstr(&s0), (size_t)guf_str_len(&s0)})); + + for (int i = 0; i <= 64; ++i) { + str0.append(b); + guf_str_append(&s0, guf_str_view{.str = b.c_str(), .len = (ptrdiff_t)b.size()}); + TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str0.size()); + TEST_CHECK((str0 == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); + TEST_CHECK((str0 == std::string_view{guf_str_cstr(&s0), (size_t)guf_str_len(&s0)})); + } + + guf_str_free(&s0, NULL); + TEST_CHECK(guf_str_is_uninit(&s0)); +} + +std::vector StrTest::test_popsplit(std::string_view str, std::string_view delim) +{ + std::vector result = {}; + + if (delim.size() > 0) { // NOTE: str.find with an empty delimiter returns 0, not std::string::npos + std::string_view src_cpp = str; + for (size_t idx = src_cpp.find(delim, 0); src_cpp.size() > 0; idx = src_cpp.find(delim, 0)) { + result.push_back(src_cpp.substr(0, idx)); + if (idx == std::string::npos) { + break; + } + src_cpp = src_cpp.substr(idx + delim.size()); + } + } else { + result.push_back(str); + } + + + const guf_str_view delim_sv = guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()}; + guf_str_view src = guf_str_view{.len = (ptrdiff_t)str.size(), .str = str.data()}; + size_t n = 0; + do { + const guf_str_view popped = guf_str_view_pop_split(&src, delim_sv); + TEST_CHECK(n < result.size()); + TEST_CHECK(std::string_view(popped.str, (size_t)popped.len) == result.at(n)); + const guf_str_view res = {.str = result.at(n).data(), .len = (ptrdiff_t)result.at(n).size()}; + TEST_CHECK(guf_str_view_equal(&popped, &res)); + TEST_CHECK(guf_str_view_equal_val_arg(popped, res)); + // std::cout << "guf: " << std::string_view{popped.str, (size_t)popped.len} << "\n"; + // std::cout << "cpp: " << std::string_view{res.str, (size_t)res.len} << "\n"; + ++n; + } while (src.len > 0); + TEST_CHECK(n == result.size()); + + return result; +} + +std::vector StrTest::get_toks(std::string_view sv_in, const std::vector& delims_in, bool preserve_delims, guf_str_tok_delim_opt opt) +{ + const guf_str_view sv = guf_str_view{.len = (ptrdiff_t)sv_in.size(), .str = sv_in.data()}; + std::vector delims; + for (const auto delim : delims_in) { + delims.push_back(guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()}); + } + guf_str_tok_state tok_state = guf_str_tok_state_new(sv, delims.data(), std::ssize(delims), opt); + + std::vector toks_out; + while (guf_str_tok_next(&tok_state, preserve_delims)) { + if (tok_state.cur_tok.len > 0) { + toks_out.push_back( std::string_view{tok_state.cur_tok.str, (size_t)tok_state.cur_tok.len}); + } + if (preserve_delims && tok_state.cur_delim.len > 0) { + toks_out.push_back( std::string_view{tok_state.cur_delim.str, (size_t)tok_state.cur_delim.len}); + } + } + TEST_CHECK(tok_state.done); + const ptrdiff_t num_toks = preserve_delims ? tok_state.num_delims_read + tok_state.num_toks_read : tok_state.num_toks_read; + TEST_CHECK(num_toks == std::ssize(toks_out)); + return toks_out; +} diff --git a/src/test/test_str.hpp b/src/test/test_str.hpp index ae37079..46acabd 100644 --- a/src/test/test_str.hpp +++ b/src/test/test_str.hpp @@ -1,377 +1,23 @@ +#pragma once + #include #include - #include "test.hpp" - -extern "C" { - #include "guf_alloc_libc.h" +extern "C" +{ #include "guf_str.h" } struct StrTest : public Test { -public: StrTest(const std::string& name) : Test(name) {}; + void run() override; private: - void test_init_free(std::string str) - { - guf_str s0; - guf_str_init(&s0, GUF_CSTR_TO_VIEW_CPP(str.c_str()), &guf_allocator_libc); - guf_str s1 = guf_str_new(GUF_CSTR_TO_VIEW_CPP(str.c_str()), &guf_allocator_libc); - guf_str s2; - guf_str_init_from_cstr(&s2, str.c_str(), &guf_allocator_libc); - - TEST_CHECK(guf_str_equal(&s0, &s1)); - TEST_CHECK(guf_str_equal(&s0, &s2)); - TEST_CHECK(guf_str_equal(&s1, &s2)); - - TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s0)); - TEST_CHECK(str == guf_str_const_cstr(&s0)); - TEST_CHECK(str == guf_str_cstr(&s0)); - - TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s1)); - TEST_CHECK(str == guf_str_const_cstr(&s1)); - TEST_CHECK(str == guf_str_cstr(&s1)); - - TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s2)); - TEST_CHECK(str == guf_str_const_cstr(&s2)); - TEST_CHECK(str == guf_str_cstr(&s2)); - - guf_str_free(&s0, NULL); - guf_str_free(&s1, NULL); - guf_str_free(&s2, NULL); - TEST_CHECK(guf_str_is_uninit(&s0)); - TEST_CHECK(guf_str_is_uninit(&s1)); - TEST_CHECK(guf_str_is_uninit(&s2)); - } - - void test_init_empty() - { - std::string str = ""; - guf_str s = GUF_STR_UNINITIALISED_CPP; - guf_str_init_empty(&s, &guf_allocator_libc); - TEST_CHECK(guf_str_len(&s) == 0); - TEST_CHECK(str == guf_str_const_cstr(&s)); - - guf_str_append_char(&s, 'a', 1024); - str.append(1024, 'a'); - TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size()); - TEST_CHECK(guf_str_const_cstr(&s) == str); - - guf_str_append_char(&s, 'b', 24); - str.append(24, 'b'); - TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size()); - TEST_CHECK(guf_str_const_cstr(&s) == str); - - guf_str_append_char(&s, 'c', 255); - str.append(255, 'c'); - TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size()); - TEST_CHECK(guf_str_const_cstr(&s) == str); - - *guf_str_at(&s, 0) = '<'; - str.at(0) = '<'; - TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size()); - TEST_CHECK(guf_str_const_cstr(&s) == str); - - *guf_str_at(&s, guf_str_len(&s) - 1) = '>'; - str.at(str.size() - 1) = '>'; - TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size()); - TEST_CHECK(guf_str_const_cstr(&s) == str); - - guf_err err = GUF_ERR_NONE; - TEST_CHECK(NULL == guf_str_try_at(&s, guf_str_len(&s), &err)); - TEST_CHECK(err != GUF_ERR_NONE && err == GUF_ERR_IDX_RANGE); - err = GUF_ERR_NONE; - TEST_CHECK(NULL == guf_str_try_at(&s, -1, &err)); - TEST_CHECK(err != GUF_ERR_NONE && err == GUF_ERR_IDX_RANGE); - - guf_str_free(&s, NULL); - TEST_CHECK(guf_str_is_uninit(&s)); - } - - void test_append_char(std::string str, bool include_null = false) - { - guf_str s0 = guf_str_new(guf_str_view{.str = str.c_str(), .len = (ptrdiff_t)str.size()}, &guf_allocator_libc); - - TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s0)); - TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); - - for (int i = include_null ? 0 : 1; i < 128; ++i) { - char ch = (char)i; - guf_str_append_one_char(&s0, ch); - str.append(1, ch); - TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size()); - TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); - } - - for (int i = include_null ? 0 : 1; i < 128; ++i) { - char ch = (char)i; - guf_str_append_char(&s0, ch, i); - str.append(i, ch); - TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size()); - TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); - guf_str_append_char(&s0, ch, i * 16); - str.append(i * 16, ch); - TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size()); - TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); - } - - guf_str_free(&s0, NULL); - TEST_CHECK(guf_str_is_uninit(&s0)); - } - - void append_str(const std::string& a, const std::string& b) - { - std::string str0 = a; - guf_str s0 = guf_str_new(guf_str_view{.str = str0.c_str(), .len = (ptrdiff_t)str0.size()}, &guf_allocator_libc); - TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str0.size()); - TEST_CHECK((str0 == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); - TEST_CHECK((str0 == std::string_view{guf_str_cstr(&s0), (size_t)guf_str_len(&s0)})); - - for (int i = 0; i <= 64; ++i) { - str0.append(b); - guf_str_append(&s0, guf_str_view{.str = b.c_str(), .len = (ptrdiff_t)b.size()}); - TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str0.size()); - TEST_CHECK((str0 == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)})); - TEST_CHECK((str0 == std::string_view{guf_str_cstr(&s0), (size_t)guf_str_len(&s0)})); - } - - guf_str_free(&s0, NULL); - TEST_CHECK(guf_str_is_uninit(&s0)); - } - - auto test_popsplit(std::string_view str, std::string_view delim) - { - std::vector result = {}; - - if (delim.size() > 0) { // NOTE: str.find with an empty delimiter returns 0, not std::string::npos - std::string_view src_cpp = str; - for (size_t idx = src_cpp.find(delim, 0); src_cpp.size() > 0; idx = src_cpp.find(delim, 0)) { - result.push_back(src_cpp.substr(0, idx)); - if (idx == std::string::npos) { - break; - } - src_cpp = src_cpp.substr(idx + delim.size()); - } - } else { - result.push_back(str); - } - - - const guf_str_view delim_sv = guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()}; - guf_str_view src = guf_str_view{.len = (ptrdiff_t)str.size(), .str = str.data()}; - size_t n = 0; - do { - const guf_str_view popped = guf_str_view_pop_split(&src, delim_sv); - TEST_CHECK(n < result.size()); - TEST_CHECK(std::string_view(popped.str, (size_t)popped.len) == result.at(n)); - const guf_str_view res = {.str = result.at(n).data(), .len = (ptrdiff_t)result.at(n).size()}; - TEST_CHECK(guf_str_view_equal(&popped, &res)); - TEST_CHECK(guf_str_view_equal_val_arg(popped, res)); - // std::cout << "guf: " << std::string_view{popped.str, (size_t)popped.len} << "\n"; - // std::cout << "cpp: " << std::string_view{res.str, (size_t)res.len} << "\n"; - ++n; - } while (src.len > 0); - TEST_CHECK(n == result.size()); - - return result; - } - - std::vector get_toks(std::string_view sv_in, const std::vector& delims_in, bool preserve_delims = false, guf_str_tok_delim_opt opt = GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST) - { - const guf_str_view sv = guf_str_view{.len = (ptrdiff_t)sv_in.size(), .str = sv_in.data()}; - std::vector delims; - for (const auto delim : delims_in) { - delims.push_back(guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()}); - } - guf_str_tok_state tok_state = guf_str_tok_state_new(sv, delims.data(), std::ssize(delims), opt); - - std::vector toks_out; - while (guf_str_tok_next(&tok_state, preserve_delims)) { - if (tok_state.cur_tok.len > 0) { - toks_out.push_back( std::string_view{tok_state.cur_tok.str, (size_t)tok_state.cur_tok.len}); - } - if (preserve_delims && tok_state.cur_delim.len > 0) { - toks_out.push_back( std::string_view{tok_state.cur_delim.str, (size_t)tok_state.cur_delim.len}); - } - } - TEST_CHECK(tok_state.done); - const ptrdiff_t num_toks = preserve_delims ? tok_state.num_delims_read + tok_state.num_toks_read : tok_state.num_toks_read; - TEST_CHECK(num_toks == std::ssize(toks_out)); - return toks_out; - } - -public: - bool run() - { - if (done) { - return passed; - } - - const std::vector words = { - "", - "\0", - "Hello", - "Othell\0o", - "f\0\0", - "\0", - "0", - "a", - "ab", - "🌈 waow a rainboge!", - "orange cat(1) :3", - "xes yag", - "Hello, world! This is a pretty darn long string I'd say...", - "I want to eat crayons. I crave crayons because they are tasty, and everybody telling me crayons are not edible must be either lying or dumb. I like trains. 42 is a number. 3.14159265... is not a rational number, and it is called pi. I ate some pie (it was a crayon pie).", - std::string(32, 'a'), - std::string(64, 'b'), - std::string(1024, 'a'), - std::string(2048, 'a'), - std::string(4096, 'a'), - std::string(5001, 'a'), - std::string(7121, 'a'), - std::string(2000, 'a'), - std::string(GUF_STR_SSO_BUF_CAP, 'a'), - std::string(GUF_STR_SSO_BUF_CAP - 1, 'a'), - std::string(GUF_STR_SSO_BUF_CAP + 1, 'a'), - std::string(GUF_STR_SSO_BUF_CAP - 2, 'a'), - std::string(GUF_STR_SSO_BUF_CAP + 2, 'a'), - std::string(GUF_STR_SSO_BUF_CAP - 3, 'a'), - std::string(GUF_STR_SSO_BUF_CAP + 3, 'a'), - std::string(GUF_STR_SSO_BUF_CAP * 2, 'a'), - std::string(GUF_STR_SSO_BUF_CAP * 3, 'a'), - std::string(GUF_STR_SSO_BUF_CAP * 4, 'a'), - std::string(GUF_STR_SSO_BUF_CAP * 5, 'a'), - std::string(GUF_STR_SSO_BUF_CAP * 6, 'a'), - std::string(GUF_STR_SSO_BUF_CAP * 7, 'a'), - }; - - test_init_empty(); - - for (const auto& word : words) { - test_init_free(word); - test_append_char(word); - test_append_char(word, true); - } - - for (size_t i = 0; i < words.size(); ++i) { - const auto& w1 = words.at(i); - append_str(w1, w1); - append_str(w1, w1); - for (size_t j = i + 1; j < words.size(); ++j) { - const auto& w2 = words.at(j); - append_str(w1, w2); - append_str(w2, w1); - } - } - - std::vector split = test_popsplit("1997-04-01", "-"); - if (TEST_CHECK(split.size() == 3)) { - TEST_CHECK(split.at(0) == "1997" && split.at(1) == "04" && split.at(2) == "01"); - } - split = test_popsplit("1997-04-01-", "-"); - if (TEST_CHECK(split.size() == 3)) { - TEST_CHECK(split.at(0) == "1997" && split.at(1) == "04" && split.at(2) == "01"); - } - - split = test_popsplit("2025/05/08", "/"); - if (TEST_CHECK(split.size() == 3)) { - TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08"); - } - split = test_popsplit("2025/05/08/", "/"); - if (TEST_CHECK(split.size() == 3)) { - TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08"); - } - split = test_popsplit("2025/05/08//", "/"); - if (TEST_CHECK(split.size() == 4)) { - TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08" && split.at(3) == ""); - } - - split = test_popsplit("/2025/05/08", "/"); - if (TEST_CHECK(split.size() == 4)) { - TEST_CHECK(split.at(0) == "" && split.at(1) == "2025" && split.at(2) == "05" && split.at(3) == "08"); - } - split = test_popsplit("//2025/05/08", "/"); - if (TEST_CHECK(split.size() == 5)) { - TEST_CHECK(split.at(0) == "" && split.at(1) == "" && split.at(2) == "2025" && split.at(3) == "05" && split.at(4) == "08"); - } - - split = test_popsplit("I eat formidable crayons, oof, for real", "foo"); - if (TEST_CHECK(split.size() == 1)) { - TEST_CHECK(split.at(0) == "I eat formidable crayons, oof, for real"); - } - - split = test_popsplit("Hej <<", "<<"); - if (TEST_CHECK(split.size() == 1)) { - TEST_CHECK(split.at(0) == "Hej "); - } - split = test_popsplit("Hej << verden", "<<"); - if (TEST_CHECK(split.size() == 2)) { - TEST_CHECK(split.at(0) == "Hej " && split.at(1) == " verden"); - } - split = test_popsplit("<< Hej << verden", "<<"); - if (TEST_CHECK(split.size() == 3)) { - TEST_CHECK(split.at(0) == "" && split.at(1) == " Hej " && split.at(2) == " verden"); - } - split = test_popsplit("<< Hej << verden <<< foo<>", "<<"); - if (TEST_CHECK(split.size() == 4)) { - TEST_CHECK(split.at(0) == "" && split.at(1) == " Hej " && split.at(2) == " verden " && split.at(3) == "< foo<>"); - } - - split = test_popsplit("I eat tofu", ""); - if (TEST_CHECK(split.size() == 1)) { - TEST_CHECK(split.at(0) == "I eat tofu"); - } - - split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOP", "FULL-STOP"); - if (TEST_CHECK(split.size() == 2)) { - TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign "); - } - split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOPI like trains, FULL-STO", "FULL-STOP"); - if (TEST_CHECK(split.size() == 3)) { - TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO"); - } - split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOPI like trains, FULL-STO Poo", "FULL-STOP"); - if (TEST_CHECK(split.size() == 3)) { - TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO Poo"); - } - - std::vector tok_words = {"hello", "world", "cat", "vertex", "normal", "pizza", "running", "mouse", "playing", "adjacent"}; - std::vector delims = {",", " ", "\n", "\t", "\r"}; - - for (int is_trailing = 0; is_trailing < 2; ++is_trailing) { - for (ptrdiff_t num_words = 1; num_words < std::ssize(tok_words); ++num_words) { - std::string str = ""; - for (ptrdiff_t j = 0; j < num_words; ++j) { - str += tok_words.at(j); - if (j < num_words - 1 || is_trailing) { - str += ", "; - } - } - std::vector toks = get_toks(std::string_view{str}, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); - if (TEST_CHECK(std::ssize(toks) == num_words)) { - for (ptrdiff_t i = 0; i < num_words; ++i) { - TEST_CHECK(toks.at(i) == tok_words.at(i)); - } - } - } - } - - std::string_view tok_str = "age: 28, occupation: NULL, crayons_eaten: 256 "; - delims = {"", "", ":", ",", " ", "\t", "", "" && tok_result.at(0) == "" && tok_result.at(1) == "age" && tok_result.at(2) == ":" && tok_result.at(3) == " " && tok_result.at(4) == "28"); - - tok_result = get_toks(tok_str, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); - TEST_CHECK(tok_result.size() == 6); - TEST_CHECK(tok_result.at(0) == "age" && tok_result.at(1) == "28" && tok_result.at(2) == "occupation" && tok_result.at(3) == "NULL" && - tok_result.at(4) == "crayons_eaten" && tok_result.at(5) == "256"); - - - done = true; - passed = (num_failed_checks == 0); - return passed; - } -}; \ No newline at end of file + void test_init_free(std::string str); + void test_init_empty(); + void test_append_char(std::string str, bool include_null = false); + void append_str(const std::string& a, const std::string& b); + std::vector test_popsplit(std::string_view str, std::string_view delim); + std::vector get_toks(std::string_view sv_in, const std::vector& delims_in, bool preserve_delims = false, guf_str_tok_delim_opt opt = GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); +}; diff --git a/src/test/test_utf8.cpp b/src/test/test_utf8.cpp new file mode 100644 index 0000000..1642da4 --- /dev/null +++ b/src/test/test_utf8.cpp @@ -0,0 +1,388 @@ +#include "test_utf8.hpp" +extern "C" +{ + #include "guf_alloc_libc.h" + #include "guf_str.h" + #include "impls/dict_impl.h" +} + +/* + UTF8Test: +*/ + +void UTF8Test::run() +{ + if (done) { + return; + } + + push_check_name("read_utf8_chars"); + ptrdiff_t valid = 0, invalid = 0; + read_utf8_chars(TEST_DATA_DIR "/" "utf8-test.txt", &valid, &invalid); + TEST_CHECK(valid == 2635 && invalid == 0); + read_utf8_chars(TEST_DATA_DIR "/" "bartleby.txt", &valid, &invalid); + TEST_CHECK(valid > 16000 && invalid == 0); + pop_check_name(); + + push_check_name("count_words"); + dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc); + for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) { + guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]}; + dbuf_str_view_push_val(&delims, d); + } + for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) { + guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]}; + dbuf_str_view_push_val(&delims, d); + } + int words = count_words(TEST_DATA_DIR "/" "utf8-test.txt", &delims); + TEST_CHECK(words == 422); + int words_with_delims = count_words_with_delims(TEST_DATA_DIR "/" "utf8-test.txt", &delims); + TEST_CHECK(words_with_delims == 950); + + int words2 = count_words(TEST_DATA_DIR "/" "bartleby.txt", &delims); + TEST_CHECK(words2 > 2048); + + dbuf_str_view_free(&delims, NULL); + pop_check_name(); + + push_check_name("encode_decode"); + encode_decode(); + encode_decode_file(TEST_DATA_DIR "/" "utf8-test.txt"); + encode_decode_file(TEST_DATA_DIR "/" "bartleby.txt"); + pop_check_name(); +} + + +bool UTF8Test::load_text(const char *fname) +{ + FILE *in_file {nullptr}; + if (!in_file) { + in_file = fopen(fname, "r"); + } + + if (!in_file) { + return false; + } + + dbuf_char_init(&text_buf, 128, &guf_allocator_libc); + + int c = EOF; + while ((c = fgetc(in_file)) != EOF) { + dbuf_char_push_val(&text_buf, (char)c); + text_vec.push_back((char)c); + } + fclose(in_file); + + return TEST_CHECK(std::ssize(text_vec) == text_buf.size); +} + +void UTF8Test::free_text() +{ + dbuf_char_free(&text_buf, NULL); + text_vec.clear(); +} + + +void UTF8Test::read_utf8_chars(const char *fname, ptrdiff_t *n_valid, ptrdiff_t *n_invalid) +{ + GUF_ASSERT_RELEASE(load_text(fname)); + + ptrdiff_t valid_chars = 0, invalid_chars = 0, bytes = 0; + guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size}; + guf_utf8_char ch = {}; + for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) { + if (stat == GUF_UTF8_READ_VALID) { + ++valid_chars; + // printf("%s", ch.bytes); + } else { + ++invalid_chars; + // printf("::INVALID_UTF8_CHAR::"); + } + bytes += guf_utf8_char_num_bytes(&ch); + } + TEST_CHECK(input_str.len == 0 && input_str.str == NULL); + TEST_CHECK(bytes == text_buf.size); + + // printf("\nread %td bytes\n", bytes); + // printf("read %td valid and %td invalid utf-8 characters\n", valid_chars, invalid_chars); + + free_text(); + + if (n_valid) + *n_valid = valid_chars; + if (n_invalid) + *n_invalid = invalid_chars; +} + +int UTF8Test::count_words(const char *fname, const dbuf_str_view *delims) +{ + GUF_ASSERT_RELEASE(load_text(fname)); + + int num_words = 0; + + guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims->data, delims->size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); + while (guf_str_tok_next(&tok_state, false)) { + TEST_CHECK(tok_state.cur_tok.len > 0); + ++num_words; + } + + free_text(); + return num_words; +} + +int UTF8Test::count_words_with_delims(const char *fname, const dbuf_str_view *delims) +{ + GUF_ASSERT_RELEASE(load_text(fname)); + + int num_words = 0, num_delims = 0; + guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims->data, delims->size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); + while (guf_str_tok_next(&tok_state, true)) { + if (tok_state.cur_tok.len) { + ++num_words; + // printf("'%.*s'\n", (int)tok_state.cur_tok.len, tok_state.cur_tok.str); + } + if (tok_state.cur_delim.len) { + ++num_delims; + // if (tok_state.cur_delim.str[0] == '\n') + // printf("'\\n'\n"); + // else + // printf("'%.*s'\n", (int)tok_state.cur_delim.len, tok_state.cur_delim.str); + } + } + free_text(); + return num_words + num_delims; +} + +void UTF8Test::encode_decode_file(const char *fname) +{ + GUF_ASSERT_RELEASE(load_text(fname)); + + dbuf_i32 cp_buf = dbuf_i32_new(&guf_allocator_libc); + + ptrdiff_t valid_chars = 0, invalid_chars = 0; + guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size}; + guf_utf8_char ch = {}; + for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) { + if (stat == GUF_UTF8_READ_VALID) { + ++valid_chars; + const int32_t codepoint = guf_utf8_decode(&ch); + TEST_CHECK(codepoint >= 0); + dbuf_i32_push_val(&cp_buf, codepoint); + } else { + ++invalid_chars; + const int32_t codepoint = guf_utf8_decode(&ch); + TEST_CHECK(codepoint < 0); + dbuf_i32_push_val(&cp_buf, -1); + } + } + TEST_CHECK(cp_buf.size == valid_chars + invalid_chars); + + guf_str_view in_str = {.str = text_buf.data, .len = text_buf.size}; + GUF_CNT_FOREACH(&cp_buf, dbuf_i32, it) { + GUF_ASSERT_RELEASE(it.ptr); + const int32_t codepoint = *it.ptr; + guf_utf8_char utf8_ch = {}; + const guf_utf8_stat stat = guf_utf8_char_next(&utf8_ch, &in_str); + if (codepoint >= 0) { + TEST_CHECK(stat == GUF_UTF8_READ_VALID); + guf_utf8_char encoded_ch = {}; + TEST_CHECK(guf_utf8_encode(&encoded_ch, codepoint)); + TEST_CHECK(guf_utf8_equal(&encoded_ch, &utf8_ch)); + } + } + guf_utf8_char utf8_ch = {}; + const guf_utf8_stat stat = guf_utf8_char_next(&utf8_ch, &in_str); + TEST_CHECK(stat == GUF_UTF8_READ_DONE); + + dbuf_i32_free(&cp_buf, NULL); + + free_text(); +} + +void UTF8Test::encode_decode() +{ + guf_utf8_char utf8 = {0}; + + // 1 byte characters. + for (uint8_t ascii = 0; ascii <= 0x7F; ++ascii) { + TEST_CHECK(guf_utf8_encode(&utf8, ascii)); + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 1); + TEST_CHECK(utf8.bytes[0] == ascii); + TEST_CHECK(utf8.bytes[1] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == ascii); + } + + // 2 byte characters: + TEST_CHECK(guf_utf8_encode(&utf8, 0x00E6)); // "æ" (Latin Small Letter Ae) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); + TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA6'); + TEST_CHECK(utf8.bytes[2] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E6); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x00E5)); // "å" (Latin Small Letter A with Ring Above) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); + TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA5'); + TEST_CHECK(utf8.bytes[2] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E5); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x00F8)); // "ø" (Latin Small Letter O with Stroke) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); + TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xB8'); + TEST_CHECK(utf8.bytes[2] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x00F8); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x00E4)); // "ä" (Latin Small Letter A with Diaeresis) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); + TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA4'); + TEST_CHECK(utf8.bytes[2] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E4); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x00F6)); // "ö" (Latin Small Letter O with Diaeresis) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); + TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xB6'); + TEST_CHECK(utf8.bytes[2] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x00F6); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x00D6)); // "Ö" (Latin Capital Letter O with Diaeresis) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); + TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\x96'); + TEST_CHECK(utf8.bytes[2] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x00D6); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x00FC)); // "ü" (Latin Small Letter U with Diaeresis) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); + TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xBC'); + TEST_CHECK(utf8.bytes[2] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x00FC); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x00B5)); // "µ" (Micro Sign) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); + TEST_CHECK(utf8.bytes[0] == '\xC2' && utf8.bytes[1] == '\xB5'); + TEST_CHECK(utf8.bytes[2] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x00B5); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x030A)); // "◌̊" (Combining Ring Above) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); + TEST_CHECK(utf8.bytes[0] == '\xCC' && utf8.bytes[1] == '\x8A'); + TEST_CHECK(utf8.bytes[2] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x030A); + + // 3 byte characters: + TEST_CHECK(guf_utf8_encode(&utf8, 0x7121)); // "無" (Nothingness; CJK Unified Ideograph-7121) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); + TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); + TEST_CHECK(utf8.bytes[0] == '\xE7' && utf8.bytes[1] == '\x84' && utf8.bytes[2] == '\xA1'); + TEST_CHECK(utf8.bytes[3] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x7121); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x201E)); // "„" (Double Low-9 Quotation Mark) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); + TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); + TEST_CHECK(utf8.bytes[0] == '\xE2' && utf8.bytes[1] == '\x80' && utf8.bytes[2] == '\x9E'); + TEST_CHECK(utf8.bytes[3] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x201E); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x20AC)); // "€" (Euro Sign) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); + TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); + TEST_CHECK(utf8.bytes[0] == '\xE2' && utf8.bytes[1] == '\x82' && utf8.bytes[2] == '\xAC'); + TEST_CHECK(utf8.bytes[3] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x20AC); + + TEST_CHECK(guf_utf8_encode(&utf8, 0xFC51)); // "ﱑ" (Arabic Ligature Heh with Jeem Isolated Form) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); + TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); + TEST_CHECK(utf8.bytes[0] == '\xEF' && utf8.bytes[1] == '\xB1' && utf8.bytes[2] == '\x91'); + TEST_CHECK(utf8.bytes[3] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0xFC51); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x1AA3)); // "᪣" (Tai Tham Sign Keow) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); + TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); + TEST_CHECK(utf8.bytes[0] == '\xE1' && utf8.bytes[1] == '\xAA' && utf8.bytes[2] == '\xA3'); + TEST_CHECK(utf8.bytes[3] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x1AA3); + + TEST_CHECK(guf_utf8_encode(&utf8, GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT)); // "�" (Replacement Character) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); + TEST_CHECK(utf8.bytes[0] == '\xEF' && utf8.bytes[1] == '\xBF' && utf8.bytes[2] == '\xBD'); + TEST_CHECK(utf8.bytes[3] == '\0'); + TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); + TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT); + + // 4 byte characters: + TEST_CHECK(guf_utf8_encode(&utf8, 0x1F308)); // "🌈" (Rainbow) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); + TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\x8C' && utf8.bytes[3] == '\x88'); + TEST_CHECK(utf8.bytes[4] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F308); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x130B8)); // "𓂸" (Egyptian Hieroglyph D052) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); + TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x93' && utf8.bytes[2] == '\x82' && utf8.bytes[3] == '\xB8'); + TEST_CHECK(utf8.bytes[4] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x130B8); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x1F97A)); // "🥺" (Face with Pleading Eyes) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); + TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\xA5' && utf8.bytes[3] == '\xBA'); + TEST_CHECK(utf8.bytes[4] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F97A); + + TEST_CHECK(guf_utf8_encode(&utf8, 0x1F980)); // "🦀" (Crab) + TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); + TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\xA6' && utf8.bytes[3] == '\x80'); + TEST_CHECK(utf8.bytes[4] == '\0'); + TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F980); + + // Invalid characters: + utf8 = {.bytes = {'\xC0', '\x80', 0, 0, 0}}; + TEST_CHECK(guf_utf8_decode(&utf8) < 0); + + utf8 = {.bytes = {'\xC0', 0, 0, 0, 0}}; + TEST_CHECK(guf_utf8_decode(&utf8) < 0); + + utf8 = {.bytes = {'\x80', 0, 0, 0, 0}}; + TEST_CHECK(guf_utf8_decode(&utf8) < 0); + + // "The definition of UTF-8 prohibits encoding character numbers between U+D800 and U+DFFF" (surrogate pairs). + TEST_CHECK(!guf_utf8_encode(&utf8, 0xD800)); + TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); + TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT); + + TEST_CHECK(!guf_utf8_encode(&utf8, 0xDFFF)); + TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); + TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT); + + TEST_CHECK(!guf_utf8_encode(&utf8, 0xDA00)); + TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); + TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT); + + char buf[] = {'\x2F', '\xC0', '\xAE', '\x2E', '\x2F'}; + guf_str_view input_str = {.str = buf, .len = GUF_ARR_SIZE(buf)}; + guf_utf8_char ch = {}; + int valid_chars = 0, invalid_chars = 0; + for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) { + if (stat == GUF_UTF8_READ_VALID) { + ++valid_chars; + } else { + ++invalid_chars; + } + } + TEST_CHECK(invalid_chars == 2 && valid_chars == 3); + + char buf2[] = {'\xE0', '\x80', 'a', 'b', 'c'}; // 1 invalid 3-byte-character, 2 valid 1-byte-characters + input_str = {.str = buf2, .len = GUF_ARR_SIZE(buf2)}; + ch = {}; + valid_chars = invalid_chars = 0; + for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) { + if (stat == GUF_UTF8_READ_VALID) { + // printf("%s", ch.bytes); + ++valid_chars; + } else { + // printf("%s", GUF_UTF8_REPLACEMENT_CHAR.bytes); + ++invalid_chars; + } + } + TEST_CHECK(invalid_chars == 1 && valid_chars == 2); +} diff --git a/src/test/test_utf8.hpp b/src/test/test_utf8.hpp index a2dd213..e9fca71 100644 --- a/src/test/test_utf8.hpp +++ b/src/test/test_utf8.hpp @@ -1,397 +1,26 @@ #pragma once #include #include "test.hpp" - extern "C" { - #include "guf_alloc_libc.h" - #include "guf_str.h" - #include "impls/dict_impl.h" #include "impls/dbuf_impl.h" } struct UTF8Test : public Test { UTF8Test(const std::string& name) : Test(name) {}; + void run() override; private: dbuf_char text_buf {}; std::vector text_vec; - bool load_text(const char *fname) - { - FILE *in_file {nullptr}; - if (!in_file) { - in_file = fopen(fname, "r"); - } + bool load_text(const char *fname); + void free_text(); - if (!in_file) { - return false; - } - - dbuf_char_init(&text_buf, 128, &guf_allocator_libc); - - int c = EOF; - while ((c = fgetc(in_file)) != EOF) { - dbuf_char_push_val(&text_buf, (char)c); - text_vec.push_back((char)c); - } - fclose(in_file); - - return TEST_CHECK(std::ssize(text_vec) == text_buf.size); - } - - void free_text() - { - dbuf_char_free(&text_buf, NULL); - text_vec.clear(); - } - - - void read_utf8_chars(const char *fname, ptrdiff_t *n_valid, ptrdiff_t *n_invalid) - { - GUF_ASSERT_RELEASE(load_text(fname)); - - ptrdiff_t valid_chars = 0, invalid_chars = 0, bytes = 0; - guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size}; - guf_utf8_char ch = {}; - for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) { - if (stat == GUF_UTF8_READ_VALID) { - ++valid_chars; - // printf("%s", ch.bytes); - } else { - ++invalid_chars; - // printf("::INVALID_UTF8_CHAR::"); - } - bytes += guf_utf8_char_num_bytes(&ch); - } - TEST_CHECK(input_str.len == 0 && input_str.str == NULL); - TEST_CHECK(bytes == text_buf.size); - - // printf("\nread %td bytes\n", bytes); - // printf("read %td valid and %td invalid utf-8 characters\n", valid_chars, invalid_chars); - - free_text(); - - if (n_valid) - *n_valid = valid_chars; - if (n_invalid) - *n_invalid = invalid_chars; - } - - int count_words(const char *fname, const dbuf_str_view *delims) - { - GUF_ASSERT_RELEASE(load_text(fname)); - - int num_words = 0; - - guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims->data, delims->size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); - while (guf_str_tok_next(&tok_state, false)) { - TEST_CHECK(tok_state.cur_tok.len > 0); - ++num_words; - } - - free_text(); - return num_words; - } - - int count_words_with_delims(const char *fname, const dbuf_str_view *delims) - { - GUF_ASSERT_RELEASE(load_text(fname)); - - int num_words = 0, num_delims = 0; - guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims->data, delims->size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST); - while (guf_str_tok_next(&tok_state, true)) { - if (tok_state.cur_tok.len) { - ++num_words; - // printf("'%.*s'\n", (int)tok_state.cur_tok.len, tok_state.cur_tok.str); - } - if (tok_state.cur_delim.len) { - ++num_delims; - // if (tok_state.cur_delim.str[0] == '\n') - // printf("'\\n'\n"); - // else - // printf("'%.*s'\n", (int)tok_state.cur_delim.len, tok_state.cur_delim.str); - } - } - free_text(); - return num_words + num_delims; - } - - void encode_decode_file(const char *fname) - { - GUF_ASSERT_RELEASE(load_text(fname)); - - dbuf_i32 cp_buf = dbuf_i32_new(&guf_allocator_libc); - - ptrdiff_t valid_chars = 0, invalid_chars = 0; - guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size}; - guf_utf8_char ch = {}; - for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) { - if (stat == GUF_UTF8_READ_VALID) { - ++valid_chars; - const int32_t codepoint = guf_utf8_decode(&ch); - TEST_CHECK(codepoint >= 0); - dbuf_i32_push_val(&cp_buf, codepoint); - } else { - ++invalid_chars; - const int32_t codepoint = guf_utf8_decode(&ch); - TEST_CHECK(codepoint < 0); - dbuf_i32_push_val(&cp_buf, -1); - } - } - TEST_CHECK(cp_buf.size == valid_chars + invalid_chars); - - guf_str_view in_str = {.str = text_buf.data, .len = text_buf.size}; - GUF_CNT_FOREACH(&cp_buf, dbuf_i32, it) { - GUF_ASSERT_RELEASE(it.ptr); - const int32_t codepoint = *it.ptr; - guf_utf8_char utf8_ch = {}; - const guf_utf8_stat stat = guf_utf8_char_next(&utf8_ch, &in_str); - if (codepoint >= 0) { - TEST_CHECK(stat == GUF_UTF8_READ_VALID); - guf_utf8_char encoded_ch = {}; - TEST_CHECK(guf_utf8_encode(&encoded_ch, codepoint)); - TEST_CHECK(guf_utf8_equal(&encoded_ch, &utf8_ch)); - } - } - guf_utf8_char utf8_ch = {}; - const guf_utf8_stat stat = guf_utf8_char_next(&utf8_ch, &in_str); - TEST_CHECK(stat == GUF_UTF8_READ_DONE); - - dbuf_i32_free(&cp_buf, NULL); - - free_text(); - } - - void encode_decode() - { - guf_utf8_char utf8 = {0}; - - // 1 byte characters. - for (uint8_t ascii = 0; ascii <= 0x7F; ++ascii) { - TEST_CHECK(guf_utf8_encode(&utf8, ascii)); - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 1); - TEST_CHECK(utf8.bytes[0] == ascii); - TEST_CHECK(utf8.bytes[1] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == ascii); - } - - // 2 byte characters: - TEST_CHECK(guf_utf8_encode(&utf8, 0x00E6)); // "æ" (Latin Small Letter Ae) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); - TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA6'); - TEST_CHECK(utf8.bytes[2] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E6); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x00E5)); // "å" (Latin Small Letter A with Ring Above) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); - TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA5'); - TEST_CHECK(utf8.bytes[2] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E5); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x00F8)); // "ø" (Latin Small Letter O with Stroke) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); - TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xB8'); - TEST_CHECK(utf8.bytes[2] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x00F8); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x00E4)); // "ä" (Latin Small Letter A with Diaeresis) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); - TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA4'); - TEST_CHECK(utf8.bytes[2] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E4); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x00F6)); // "ö" (Latin Small Letter O with Diaeresis) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); - TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xB6'); - TEST_CHECK(utf8.bytes[2] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x00F6); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x00D6)); // "Ö" (Latin Capital Letter O with Diaeresis) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); - TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\x96'); - TEST_CHECK(utf8.bytes[2] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x00D6); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x00FC)); // "ü" (Latin Small Letter U with Diaeresis) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); - TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xBC'); - TEST_CHECK(utf8.bytes[2] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x00FC); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x00B5)); // "µ" (Micro Sign) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); - TEST_CHECK(utf8.bytes[0] == '\xC2' && utf8.bytes[1] == '\xB5'); - TEST_CHECK(utf8.bytes[2] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x00B5); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x030A)); // "◌̊" (Combining Ring Above) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2); - TEST_CHECK(utf8.bytes[0] == '\xCC' && utf8.bytes[1] == '\x8A'); - TEST_CHECK(utf8.bytes[2] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x030A); - - // 3 byte characters: - TEST_CHECK(guf_utf8_encode(&utf8, 0x7121)); // "無" (Nothingness; CJK Unified Ideograph-7121) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); - TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); - TEST_CHECK(utf8.bytes[0] == '\xE7' && utf8.bytes[1] == '\x84' && utf8.bytes[2] == '\xA1'); - TEST_CHECK(utf8.bytes[3] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x7121); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x201E)); // "„" (Double Low-9 Quotation Mark) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); - TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); - TEST_CHECK(utf8.bytes[0] == '\xE2' && utf8.bytes[1] == '\x80' && utf8.bytes[2] == '\x9E'); - TEST_CHECK(utf8.bytes[3] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x201E); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x20AC)); // "€" (Euro Sign) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); - TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); - TEST_CHECK(utf8.bytes[0] == '\xE2' && utf8.bytes[1] == '\x82' && utf8.bytes[2] == '\xAC'); - TEST_CHECK(utf8.bytes[3] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x20AC); - - TEST_CHECK(guf_utf8_encode(&utf8, 0xFC51)); // "ﱑ" (Arabic Ligature Heh with Jeem Isolated Form) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); - TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); - TEST_CHECK(utf8.bytes[0] == '\xEF' && utf8.bytes[1] == '\xB1' && utf8.bytes[2] == '\x91'); - TEST_CHECK(utf8.bytes[3] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0xFC51); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x1AA3)); // "᪣" (Tai Tham Sign Keow) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); - TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); - TEST_CHECK(utf8.bytes[0] == '\xE1' && utf8.bytes[1] == '\xAA' && utf8.bytes[2] == '\xA3'); - TEST_CHECK(utf8.bytes[3] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x1AA3); - - TEST_CHECK(guf_utf8_encode(&utf8, GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT)); // "�" (Replacement Character) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3); - TEST_CHECK(utf8.bytes[0] == '\xEF' && utf8.bytes[1] == '\xBF' && utf8.bytes[2] == '\xBD'); - TEST_CHECK(utf8.bytes[3] == '\0'); - TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); - TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT); - - // 4 byte characters: - TEST_CHECK(guf_utf8_encode(&utf8, 0x1F308)); // "🌈" (Rainbow) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); - TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\x8C' && utf8.bytes[3] == '\x88'); - TEST_CHECK(utf8.bytes[4] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F308); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x130B8)); // "𓂸" (Egyptian Hieroglyph D052) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); - TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x93' && utf8.bytes[2] == '\x82' && utf8.bytes[3] == '\xB8'); - TEST_CHECK(utf8.bytes[4] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x130B8); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x1F97A)); // "🥺" (Face with Pleading Eyes) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); - TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\xA5' && utf8.bytes[3] == '\xBA'); - TEST_CHECK(utf8.bytes[4] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F97A); - - TEST_CHECK(guf_utf8_encode(&utf8, 0x1F980)); // "🦀" (Crab) - TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); - TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\xA6' && utf8.bytes[3] == '\x80'); - TEST_CHECK(utf8.bytes[4] == '\0'); - TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F980); - - // Invalid characters: - utf8 = {.bytes = {'\xC0', '\x80', 0, 0, 0}}; - TEST_CHECK(guf_utf8_decode(&utf8) < 0); - - utf8 = {.bytes = {'\xC0', 0, 0, 0, 0}}; - TEST_CHECK(guf_utf8_decode(&utf8) < 0); - - utf8 = {.bytes = {'\x80', 0, 0, 0, 0}}; - TEST_CHECK(guf_utf8_decode(&utf8) < 0); - - // "The definition of UTF-8 prohibits encoding character numbers between U+D800 and U+DFFF" (surrogate pairs). - TEST_CHECK(!guf_utf8_encode(&utf8, 0xD800)); - TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); - TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT); - - TEST_CHECK(!guf_utf8_encode(&utf8, 0xDFFF)); - TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); - TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT); - - TEST_CHECK(!guf_utf8_encode(&utf8, 0xDA00)); - TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR)); - TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT); - - char buf[] = {'\x2F', '\xC0', '\xAE', '\x2E', '\x2F'}; - guf_str_view input_str = {.str = buf, .len = GUF_ARR_SIZE(buf)}; - guf_utf8_char ch = {}; - int valid_chars = 0, invalid_chars = 0; - for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) { - if (stat == GUF_UTF8_READ_VALID) { - ++valid_chars; - } else { - ++invalid_chars; - } - } - TEST_CHECK(invalid_chars == 2 && valid_chars == 3); - - char buf2[] = {'\xE0', '\x80', 'a', 'b', 'c'}; // 1 invalid 3-byte-character, 2 valid 1-byte-characters - input_str = {.str = buf2, .len = GUF_ARR_SIZE(buf2)}; - ch = {}; - valid_chars = invalid_chars = 0; - for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) { - if (stat == GUF_UTF8_READ_VALID) { - // printf("%s", ch.bytes); - ++valid_chars; - } else { - // printf("%s", GUF_UTF8_REPLACEMENT_CHAR.bytes); - ++invalid_chars; - } - } - TEST_CHECK(invalid_chars == 1 && valid_chars == 2); - } - -public: - - bool run() - { - if (done) { - return passed; - } - - ptrdiff_t valid = 0, invalid = 0; - read_utf8_chars(TEST_DATA_DIR "/" "utf8-test.txt", &valid, &invalid); - TEST_CHECK(valid == 2635 && invalid == 0); - - read_utf8_chars(TEST_DATA_DIR "/" "bartleby.txt", &valid, &invalid); - TEST_CHECK(valid > 16000 && invalid == 0); - - dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc); - for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) { - guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]}; - dbuf_str_view_push_val(&delims, d); - } - for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) { - guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]}; - dbuf_str_view_push_val(&delims, d); - } - int words = count_words(TEST_DATA_DIR "/" "utf8-test.txt", &delims); - TEST_CHECK(words == 422); - int words_with_delims = count_words_with_delims(TEST_DATA_DIR "/" "utf8-test.txt", &delims); - TEST_CHECK(words_with_delims == 950); - - int words2 = count_words(TEST_DATA_DIR "/" "bartleby.txt", &delims); - TEST_CHECK(words2 > 2048); - - dbuf_str_view_free(&delims, NULL); - - encode_decode(); - encode_decode_file(TEST_DATA_DIR "/" "utf8-test.txt"); - encode_decode_file(TEST_DATA_DIR "/" "bartleby.txt"); - - done = true; - passed = (num_failed_checks == 0); - return passed; - } + void read_utf8_chars(const char *fname, ptrdiff_t *n_valid, ptrdiff_t *n_invalid); + int count_words(const char *fname, const dbuf_str_view *delims); + int count_words_with_delims(const char *fname, const dbuf_str_view *delims); + void encode_decode_file(const char *fname); + void encode_decode(); };