Refactor tests

This commit is contained in:
jun 2025-05-13 17:36:08 +02:00
parent 6091eac820
commit c4f3616b23
11 changed files with 1712 additions and 1638 deletions

View File

@ -19,7 +19,7 @@ endif ()
if (NOT DEFINED MSVC)
set(WARNING_FLAGS_C -Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef -Wmisleading-indentation -Wnull-dereference -Wswitch-default -Wconversion -Wno-sign-conversion -Wdouble-promotion -Wno-unused-function)
set(WARNING_FLAGS_CXX -Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef -Wmisleading-indentation -Wnull-dereference -Wshadow -Wundef -Wstrict-overflow=5 -Wsign-promo -Wcast-align -Wcast-qual -Woverloaded-virtual -Wredundant-decls -Wctor-dtor-privacy -Wdisabled-optimization -Wformat=2 -Winit-self -Wswitch-default -Wno-unused-function)
set(WARNING_FLAGS_CXX -Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef -Wmisleading-indentation -Wnull-dereference -Wshadow -Wundef -Wstrict-overflow=5 -Wsign-promo -Wcast-align -Wcast-qual -Woverloaded-virtual -Wredundant-decls -Wctor-dtor-privacy -Wdisabled-optimization -Wformat=2 -Winit-self -Wswitch-default -Wno-unused-function)
set(DBG_FLAGS -fsanitize=undefined,address -g3 -glldb -Og)
else ()
set(WARNING_FLAGS_C /W4)
@ -30,7 +30,7 @@ endif ()
add_executable(libguf_example src/test/example.c src/test/impls/str_impl.c src/test/impls/dict_impl.c src/test/impls/linalg_impl.c)
target_include_directories(libguf_example PRIVATE src src/test)
add_executable(libguf_test src/test/test.cpp src/test/impls/init_impl.c src/test/impls/dbuf_impl.c src/test/impls/str_impl.c src/test/impls/dict_impl.c src/test/impls/rand_impl.c src/test/impls/sort_impl.c src/test/impls/linalg_impl.c)
add_executable(libguf_test src/test/test.cpp src/test/test_dbuf.cpp src/test/test_dict.cpp src/test/test_str.cpp src/test/test_utf8.cpp src/test/impls/init_impl.c src/test/impls/dbuf_impl.c src/test/impls/str_impl.c src/test/impls/dict_impl.c src/test/impls/rand_impl.c src/test/impls/sort_impl.c src/test/impls/linalg_impl.c)
target_include_directories(libguf_test PRIVATE src src/test)
set_target_properties(libguf_example libguf_test PROPERTIES DEBUG_POSTFIX ${CMAKE_DEBUG_POSTFIX})

View File

@ -3,19 +3,19 @@
#include <cstdio>
#include <iostream>
extern "C" {
#include "guf_assert.h"
#include "guf_math.h"
}
#include "test_dbuf.hpp"
#include "test_dict.hpp"
#include "test_utf8.hpp"
#include "test_str.hpp"
extern "C"
{
#include "guf_assert.h"
#include "guf_math.h"
}
std::unordered_set<std::unique_ptr<Test>> g_tests {};
static std::unordered_set<std::unique_ptr<Test>> g_tests {};
void init_tests()
static void init_tests()
{
std::unique_ptr<Test> test = std::make_unique<DbufIntTest>("DbufIntTest");
GUF_ASSERT_RELEASE(test.get());
@ -70,4 +70,4 @@ int main()
}
return passed_all ? EXIT_SUCCESS : EXIT_FAILURE;
}
}

View File

@ -7,19 +7,19 @@
#include <iostream>
#include <chrono>
#include <iomanip>
#include "guf_common.h"
extern "C" {
#include "guf_common.h"
#include "guf_assert.h"
}
#define TEST_CHECK(COND) (check((COND), GUF_STRINGIFY(COND), __LINE__, __FILE__))
struct Test
{
private:
std::chrono::steady_clock::time_point time_start, time_end;
protected:
std::stack<std::string> check_name_stack;
std::string full_check_name = "";
@ -50,7 +50,6 @@ protected:
}
public:
const std::string name {};
std::chrono::duration<float, std::milli> runtime_ms {0};
bool passed {false}, done {false};
@ -64,7 +63,7 @@ public:
return num_passed_checks + num_failed_checks;
}
virtual bool run() = 0;
virtual void run() = 0;
void before_run()
{
@ -73,6 +72,9 @@ public:
void after_run()
{
done = true;
passed = (num_failed_checks == 0);
time_end = std::chrono::high_resolution_clock::now();
runtime_ms = std::chrono::duration_cast<decltype(runtime_ms)>(time_end - time_start);
}

523
src/test/test_dbuf.cpp Normal file
View File

@ -0,0 +1,523 @@
#include "test_dbuf.hpp"
extern "C"
{
#include "guf_alloc_libc.h"
#include "impls/dbuf_impl.h"
}
/*
DbufIntTest
*/
void DbufIntTest::run()
{
if (done) {
return;
}
dbuf_int dbuf {};
dbuf_int_init(&dbuf, 0, &guf_allocator_libc);
push_check_name("test_push");
test_push(&dbuf, 256);
test_push(&dbuf, 128);
test_push(&dbuf, 17);
TEST_CHECK(dbuf.size == (256 + 128 + 17));
dbuf_int_free(&dbuf, NULL);
TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && dbuf.data == NULL);
dbuf_int_init(&dbuf, 24, &guf_allocator_libc);
TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 24 && dbuf.data);
test_push(&dbuf, 365);
test_push(&dbuf, 4);
test_push(&dbuf, 25);
test_push(&dbuf, 64);
TEST_CHECK(dbuf.size == (365 + 4 + 25 + 64));
dbuf_int_free(&dbuf, NULL);
TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && dbuf.data == NULL);
pop_check_name();
push_check_name("insert_remove");
for (int n = 0; n <= 128; ++n) {
test_insert_remove(n);
}
test_insert_remove(400);
test_insert_remove(401);
test_insert_remove(512);
test_insert_remove(513);
test_insert_remove(601);
test_insert_remove(2048);
test_insert_remove(2049);
pop_check_name();
}
std::vector<int> DbufIntTest::dbuf_to_vec(dbuf_int *dbuf)
{
std::vector<int> vec;
GUF_CNT_FOREACH(dbuf, dbuf_int, it) {
vec.push_back(*it.ptr);
}
return vec;
}
void DbufIntTest::test_push(dbuf_int *dbuf, int n)
{
std::vector<int> vec = dbuf_to_vec(dbuf);
TEST_CHECK(std::ssize(vec) == dbuf->size);
for (int i = 0; i < n; ++i) {
dbuf_int_push_val(dbuf, i);
vec.push_back(i);
TEST_CHECK(*dbuf_int_back(dbuf) == vec.back());
}
ptrdiff_t i = 0;
GUF_CNT_FOREACH(dbuf, dbuf_int, it) {
TEST_CHECK(*it.ptr == vec.at(i++));
}
TEST_CHECK(i == dbuf->size);
i = dbuf->size - 1;
GUF_CNT_FOREACH_REVERSE(dbuf, dbuf_int, rit) {
TEST_CHECK(*rit.ptr == vec.at(i--));
}
TEST_CHECK(i == -1);
}
void DbufIntTest::test_insert_remove(int n)
{
dbuf_int dbuf = {};
dbuf_int_init(&dbuf, 0, &guf_allocator_libc);
std::vector<int> vec = dbuf_to_vec(&dbuf);
guf_err err = GUF_ERR_NONE;
dbuf_int_try_erase(&dbuf, 0, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_int_try_erase(&dbuf, 12, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_int_try_front(&dbuf, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_int_try_back(&dbuf, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_int_try_at(&dbuf, 0, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
for (int i = 0; i < n; ++i) {
dbuf_int_insert_val(&dbuf, i, i);
dbuf_int_insert_val(&dbuf, i * 2, 0);
dbuf_int_insert_val(&dbuf, i * 4, dbuf.size);
vec.insert(vec.begin() + i, i);
vec.insert(vec.begin(), i * 2);
vec.insert(vec.end(), i * 4);
}
TEST_CHECK(std::ssize(vec) == dbuf.size);
// Iterate
dbuf_int_iter it_dbuf = dbuf_int_begin(&dbuf);
std::vector<int>::const_iterator it_vec = vec.begin();
while (!dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec != vec.end()) {
TEST_CHECK(*it_dbuf.ptr == *it_vec);
it_dbuf = dbuf_int_iter_next(&dbuf, it_dbuf, 1);
std::advance(it_vec, 1);
}
TEST_CHECK(dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec == vec.end());
// Step iterate.
it_dbuf = dbuf_int_begin(&dbuf);
it_vec = vec.begin();
while (!dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec != vec.end()) {
TEST_CHECK(*it_dbuf.ptr == *it_vec);
it_dbuf = dbuf_int_iter_next(&dbuf, it_dbuf, 7);
if (dbuf_int_iter_is_end(&dbuf, it_dbuf)) {
it_vec = vec.end();
} else {
std::advance(it_vec, 7);
}
}
TEST_CHECK(dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec == vec.end());
// Reverse iterate.
dbuf_int_iter rit_dbuf = dbuf_int_rbegin(&dbuf);
std::vector<int>::const_reverse_iterator rit_vec = vec.crbegin();
while (!dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec != vec.crend()) {
TEST_CHECK(*rit_dbuf.ptr == *rit_vec);
rit_dbuf = dbuf_int_iter_next(&dbuf, rit_dbuf, 1);
std::advance(rit_vec, 1);
}
TEST_CHECK(dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec == vec.rend());
// Reverse iterate step.
rit_dbuf = dbuf_int_rbegin(&dbuf);
rit_vec = vec.crbegin();
while (!dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec != vec.crend()) {
TEST_CHECK(*rit_dbuf.ptr == *rit_vec);
rit_dbuf = dbuf_int_iter_next(&dbuf, rit_dbuf, 4);
if (dbuf_int_iter_is_end(&dbuf, rit_dbuf)) {
rit_vec = vec.rend();
} else {
std::advance(rit_vec, 4);
}
}
TEST_CHECK(dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec == vec.rend());
TEST_CHECK(dbuf.size == std::ssize(vec));
for (ptrdiff_t i = 0; i < dbuf.size; i += 8) {
dbuf_int_erase(&dbuf, i);
dbuf_int_erase(&dbuf, 0);
dbuf_int_pop(&dbuf);
vec.erase(vec.begin() + i);
vec.erase(vec.begin() + 0);
vec.pop_back();
}
TEST_CHECK(dbuf.size == std::ssize(vec));
for (ptrdiff_t i = 0; i < dbuf.size; i += 8) {
TEST_CHECK(*dbuf_int_at(&dbuf, i) == vec.at(i));
}
const ptrdiff_t size = dbuf.size;
for (ptrdiff_t i = 0; i < size; ++i) {
int a = dbuf_int_pop_move(&dbuf);
int b = vec.back();
TEST_CHECK(a == b);
vec.pop_back();
}
TEST_CHECK(dbuf.size == 0 && vec.size() == 0);
dbuf_int_free(&dbuf, NULL);
TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && !dbuf.data);
}
/*
DbufCstringTest
*/
void DbufCstringTest::run()
{
if (done) {
return;
}
push_check_name("push_insert_erase");
for (int i = 1; i <= 32; ++i) {
test_push_insert_erase(i);
test_push_insert_erase(i, i - 1);
test_push_insert_erase(i, i + 1);
test_push_insert_erase(i, i);
test_push_insert_erase(i, i / 2);
}
test_push_insert_erase(2048);
test_push_insert_erase(2048, 11);
dbuf_heap_cstr str_dbuf = {};
dbuf_heap_cstr_init(&str_dbuf, 0, &guf_allocator_libc);
std::vector<std::string> str_vec {};
for (int i = 0; i < 512; ++i) {
char buf[128];
memset(buf, '\0', GUF_ARR_SIZE(buf));
snprintf(buf, GUF_ARR_SIZE(buf), "This is a pretty guf string (number %d)", i);
guf_cstr_heap str = buf;
dbuf_heap_cstr_push(&str_dbuf, &str, GUF_CPY_DEEP);
str_vec.push_back(std::string{buf});
}
for (int i = 0; i < str_dbuf.size + 16; ++i) {
test_iter(str_vec, &str_dbuf, i);
}
dbuf_heap_cstr_free(&str_dbuf, NULL);
TEST_CHECK(str_dbuf.size == 0 && str_dbuf.capacity == 0 && !str_dbuf.data);
pop_check_name();
push_check_name("find");
test_find();
test_find(3);
test_find(42);
test_find(129);
pop_check_name();
}
void DbufCstringTest::test_iter(std::vector<std::string>& str_vec, dbuf_heap_cstr *str_dbuf, int step)
{
GUF_ASSERT_RELEASE(str_dbuf);
if (step <= 0) {
step = 1;
}
ptrdiff_t i = 0;
GUF_CNT_FOREACH(str_dbuf, dbuf_heap_cstr, it) {
char *str = *it.ptr;
TEST_CHECK(str_vec.at(i) == str);
++i;
}
TEST_CHECK(i == str_dbuf->size);
i = str_dbuf->size - 1;
GUF_CNT_FOREACH_REVERSE(str_dbuf, dbuf_heap_cstr, rit) {
char *str = *rit.ptr;
TEST_CHECK(str_vec.at(i) == str);
--i;
}
TEST_CHECK(i == -1);
dbuf_heap_cstr_iter it_dbuf = dbuf_heap_cstr_begin(str_dbuf);
std::vector<std::string>::iterator it_vec = str_vec.begin();
while (!dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf)) {
TEST_CHECK(it_vec != str_vec.end());
TEST_CHECK(*it_vec == *it_dbuf.ptr);
it_dbuf = dbuf_heap_cstr_iter_next(str_dbuf, it_dbuf, step);
if (!dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf)) {
std::advance(it_vec, step);
} else {
it_vec = str_vec.end();
}
}
TEST_CHECK(dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf) && it_vec == str_vec.end());
dbuf_heap_cstr_iter rit_dbuf = dbuf_heap_cstr_rbegin(str_dbuf);
std::vector<std::string>::reverse_iterator rit_vec = str_vec.rbegin();
while (!dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf)) {
TEST_CHECK(rit_vec != str_vec.rend());
TEST_CHECK(*rit_vec == *rit_dbuf.ptr);
rit_dbuf = dbuf_heap_cstr_iter_next(str_dbuf, rit_dbuf, step);
if (!dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf)) {
std::advance(rit_vec, step);
} else {
rit_vec = str_vec.rend();
}
}
TEST_CHECK(dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf) && rit_vec == str_vec.rend());
for (i = 0; i < str_dbuf->size; ++i) {
char *str = *dbuf_heap_cstr_at(str_dbuf, i);
TEST_CHECK(str_vec.at(i) == str);
}
}
void DbufCstringTest::test_push_insert_erase(int n, ptrdiff_t start_cap)
{
std::vector<std::string> str_vec;
dbuf_heap_cstr str_dbuf {};
dbuf_heap_cstr_init(&str_dbuf, start_cap, &guf_allocator_libc);
for (int i = 0; i < n; ++i) {
constexpr int BUF_SZ = 128;
char buf[BUF_SZ];
memset(buf, '\0', BUF_SZ);
snprintf(buf, BUF_SZ, "This is string number %d", i);
guf_cstr_heap str = buf;
dbuf_heap_cstr_push(&str_dbuf, &str, GUF_CPY_DEEP);
dbuf_heap_cstr_push_val_cpy(&str_dbuf, str);
char *heap_buf = strdup("Move me plz");
dbuf_heap_cstr_push(&str_dbuf, &heap_buf, GUF_CPY_MOVE);
TEST_CHECK(heap_buf == NULL);
TEST_CHECK(strncmp(*dbuf_heap_cstr_back(&str_dbuf), "Move me plz", BUF_SZ) == 0);
TEST_CHECK(strncmp(*dbuf_heap_cstr_at(&str_dbuf, str_dbuf.size - 2), buf, BUF_SZ) == 0);
TEST_CHECK(strncmp(*dbuf_heap_cstr_at(&str_dbuf, str_dbuf.size - 3), buf, BUF_SZ) == 0);
str_vec.push_back(std::string{buf});
str_vec.push_back(std::string{buf});
str_vec.emplace_back("Move me plz");
}
TEST_CHECK(str_dbuf.size == std::ssize(str_vec));
TEST_CHECK(str_dbuf.size == 3 * n);
for (int i = 1; i <= 8; ++i) {
test_iter(str_vec, &str_dbuf, i);
}
test_iter(str_vec, &str_dbuf, str_dbuf.size);
test_iter(str_vec, &str_dbuf, str_dbuf.size - 1);
test_iter(str_vec, &str_dbuf, str_dbuf.size + 1);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i));
}
// Insert front.
for (ptrdiff_t i = 0; i < 16; ++i) {
char str[] = "front";
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, 0);
str_vec.insert(str_vec.begin(), std::string{str});
}
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i));
}
// Insert back.
for (ptrdiff_t i = 0; i < 16; ++i) {
char str[] = "front";
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size);
str_vec.insert(str_vec.end(), std::string{str});
}
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i));
}
// Insert at i.
char str[] = "guf";
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size / 2);
str_vec.insert(str_vec.begin() + str_vec.size() / 2, str);
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size / 4);
str_vec.insert(str_vec.begin() + str_vec.size() / 4, str);
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, 1);
str_vec.insert(str_vec.begin() + 1, str);
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size - 1);
str_vec.insert(str_vec.begin() + (str_vec.size() - 1), str);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i));
}
guf_err err = GUF_ERR_NONE;
dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, str_dbuf.size + 1, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, -1, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, str_dbuf.size + 2, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i));
}
if (str_dbuf.size) {
dbuf_heap_cstr_erase(&str_dbuf, str_dbuf.size - 1);
str_vec.erase(str_vec.end() - 1);
}
ptrdiff_t to_rem = 8;
while (str_dbuf.size && to_rem--) {
dbuf_heap_cstr_erase(&str_dbuf, 0);
str_vec.erase(str_vec.begin());
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
if (str_dbuf.size) {
dbuf_heap_cstr_pop(&str_dbuf);
str_vec.pop_back();
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
}
if (str_dbuf.size) {
dbuf_heap_cstr_erase(&str_dbuf, str_dbuf.size / 2);
str_vec.erase(str_vec.begin() + (str_vec.size() / 2));
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
}
}
dbuf_heap_cstr_free(&str_dbuf, NULL);
TEST_CHECK(str_dbuf.size == 0 && str_dbuf.capacity == 0 && !str_dbuf.data);
}
void DbufCstringTest::test_find(int n)
{
if (n < 2) {
n = 2;
}
std::vector<std::string> str_vec {};
dbuf_heap_cstr str_dbuf = {};
dbuf_heap_cstr_init(&str_dbuf, 0, &guf_allocator_libc);
for (int i = 0; i < n; ++i) {
constexpr int BUF_SZ = 128;
char buf[BUF_SZ];
memset(buf, '\0', BUF_SZ);
snprintf(buf, BUF_SZ, "String number %d", i);
dbuf_heap_cstr_push_val_cpy(&str_dbuf, buf);
str_vec.push_back(buf);
}
char *move_me = strdup("Moved string");
dbuf_heap_cstr_push(&str_dbuf, &move_me, GUF_CPY_MOVE);
GUF_ASSERT_RELEASE(move_me == NULL);
str_vec.emplace_back("Moved string");
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
char *needle = *dbuf_heap_cstr_at(&str_dbuf, i);
TEST_CHECK(str_vec.at(i) == needle);
TEST_CHECK(dbuf_heap_cstr_contains_val(&str_dbuf, needle));
dbuf_heap_cstr_iter fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_end(&str_dbuf), needle);
TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cend(), needle) != str_vec.end());
dbuf_heap_cstr_iter begin = dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), i);
dbuf_heap_cstr_iter end = dbuf_heap_cstr_end(&str_dbuf);
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, begin, end, needle);
TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin() + i, str_vec.cend(), needle) != str_vec.end());
begin = dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), i + 1);
end = dbuf_heap_cstr_end(&str_dbuf);
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, begin, end, needle);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin() + i + 1, str_vec.cend(), needle) == str_vec.end());
// Reverse.
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_rbegin(&str_dbuf), dbuf_heap_cstr_rend(&str_dbuf), needle);
TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.crbegin(), str_vec.crend(), needle) != str_vec.rend());
}
char needle[] = "Definitely not inside";
dbuf_heap_cstr_iter fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_end(&str_dbuf), needle);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cend(), needle) == str_vec.end());
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_rbegin(&str_dbuf), dbuf_heap_cstr_rend(&str_dbuf), needle);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.crbegin(), str_vec.crend(), needle) == str_vec.rend());
char *needle2 = *dbuf_heap_cstr_at(&str_dbuf, 0);
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), 1), dbuf_heap_cstr_end(&str_dbuf), needle2);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin() + 1, str_vec.cend(), needle2) == str_vec.end());
needle2 = *dbuf_heap_cstr_back(&str_dbuf);
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), 1), dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_end(&str_dbuf), -1), needle2);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.begin(), str_vec.end() - 1, needle2) == (str_vec.end() - 1));
needle2 = *dbuf_heap_cstr_at(&str_dbuf, 0);
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_begin(&str_dbuf), needle2);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cbegin(), needle2) == str_vec.cbegin());
dbuf_heap_cstr_free(&str_dbuf, NULL);
}

View File

@ -11,529 +11,22 @@ extern "C"
struct DbufIntTest : public Test
{
DbufIntTest(const std::string& name) : Test(name) {};
void run() override;
private:
std::vector<int> dbuf_to_vec(dbuf_int *dbuf)
{
std::vector<int> vec;
GUF_CNT_FOREACH(dbuf, dbuf_int, it) {
vec.push_back(*it.ptr);
}
return vec;
}
void test_push(dbuf_int *dbuf, int n)
{
std::vector<int> vec = dbuf_to_vec(dbuf);
TEST_CHECK(std::ssize(vec) == dbuf->size);
for (int i = 0; i < n; ++i) {
dbuf_int_push_val(dbuf, i);
vec.push_back(i);
TEST_CHECK(*dbuf_int_back(dbuf) == vec.back());
}
ptrdiff_t i = 0;
GUF_CNT_FOREACH(dbuf, dbuf_int, it) {
TEST_CHECK(*it.ptr == vec.at(i++));
}
TEST_CHECK(i == dbuf->size);
i = dbuf->size - 1;
GUF_CNT_FOREACH_REVERSE(dbuf, dbuf_int, rit) {
TEST_CHECK(*rit.ptr == vec.at(i--));
}
TEST_CHECK(i == -1);
}
void test_insert_remove(int n)
{
dbuf_int dbuf = {};
dbuf_int_init(&dbuf, 0, &guf_allocator_libc);
std::vector<int> vec = dbuf_to_vec(&dbuf);
guf_err err = GUF_ERR_NONE;
dbuf_int_try_erase(&dbuf, 0, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_int_try_erase(&dbuf, 12, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_int_try_front(&dbuf, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_int_try_back(&dbuf, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_int_try_at(&dbuf, 0, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
for (int i = 0; i < n; ++i) {
dbuf_int_insert_val(&dbuf, i, i);
dbuf_int_insert_val(&dbuf, i * 2, 0);
dbuf_int_insert_val(&dbuf, i * 4, dbuf.size);
vec.insert(vec.begin() + i, i);
vec.insert(vec.begin(), i * 2);
vec.insert(vec.end(), i * 4);
}
TEST_CHECK(std::ssize(vec) == dbuf.size);
// Iterate
dbuf_int_iter it_dbuf = dbuf_int_begin(&dbuf);
std::vector<int>::const_iterator it_vec = vec.begin();
while (!dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec != vec.end()) {
TEST_CHECK(*it_dbuf.ptr == *it_vec);
it_dbuf = dbuf_int_iter_next(&dbuf, it_dbuf, 1);
std::advance(it_vec, 1);
}
TEST_CHECK(dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec == vec.end());
// Step iterate.
it_dbuf = dbuf_int_begin(&dbuf);
it_vec = vec.begin();
while (!dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec != vec.end()) {
TEST_CHECK(*it_dbuf.ptr == *it_vec);
it_dbuf = dbuf_int_iter_next(&dbuf, it_dbuf, 7);
if (dbuf_int_iter_is_end(&dbuf, it_dbuf)) {
it_vec = vec.end();
} else {
std::advance(it_vec, 7);
}
}
TEST_CHECK(dbuf_int_iter_is_end(&dbuf, it_dbuf) && it_vec == vec.end());
// Reverse iterate.
dbuf_int_iter rit_dbuf = dbuf_int_rbegin(&dbuf);
std::vector<int>::const_reverse_iterator rit_vec = vec.crbegin();
while (!dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec != vec.crend()) {
TEST_CHECK(*rit_dbuf.ptr == *rit_vec);
rit_dbuf = dbuf_int_iter_next(&dbuf, rit_dbuf, 1);
std::advance(rit_vec, 1);
}
TEST_CHECK(dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec == vec.rend());
// Reverse iterate step.
rit_dbuf = dbuf_int_rbegin(&dbuf);
rit_vec = vec.crbegin();
while (!dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec != vec.crend()) {
TEST_CHECK(*rit_dbuf.ptr == *rit_vec);
rit_dbuf = dbuf_int_iter_next(&dbuf, rit_dbuf, 4);
if (dbuf_int_iter_is_end(&dbuf, rit_dbuf)) {
rit_vec = vec.rend();
} else {
std::advance(rit_vec, 4);
}
}
TEST_CHECK(dbuf_int_iter_is_end(&dbuf, rit_dbuf) && rit_vec == vec.rend());
TEST_CHECK(dbuf.size == std::ssize(vec));
for (ptrdiff_t i = 0; i < dbuf.size; i += 8) {
dbuf_int_erase(&dbuf, i);
dbuf_int_erase(&dbuf, 0);
dbuf_int_pop(&dbuf);
vec.erase(vec.begin() + i);
vec.erase(vec.begin() + 0);
vec.pop_back();
}
TEST_CHECK(dbuf.size == std::ssize(vec));
for (ptrdiff_t i = 0; i < dbuf.size; i += 8) {
TEST_CHECK(*dbuf_int_at(&dbuf, i) == vec.at(i));
}
const ptrdiff_t size = dbuf.size;
for (ptrdiff_t i = 0; i < size; ++i) {
int a = dbuf_int_pop_move(&dbuf);
int b = vec.back();
TEST_CHECK(a == b);
vec.pop_back();
}
TEST_CHECK(dbuf.size == 0 && vec.size() == 0);
dbuf_int_free(&dbuf, NULL);
TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && !dbuf.data);
}
public:
bool run() override
{
if (done) {
return passed;
}
dbuf_int dbuf {};
dbuf_int_init(&dbuf, 0, &guf_allocator_libc);
push_check_name("test_push");
test_push(&dbuf, 256);
test_push(&dbuf, 128);
test_push(&dbuf, 17);
TEST_CHECK(dbuf.size == (256 + 128 + 17));
dbuf_int_free(&dbuf, NULL);
TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && dbuf.data == NULL);
dbuf_int_init(&dbuf, 24, &guf_allocator_libc);
TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 24 && dbuf.data);
test_push(&dbuf, 365);
test_push(&dbuf, 4);
test_push(&dbuf, 25);
test_push(&dbuf, 64);
TEST_CHECK(dbuf.size == (365 + 4 + 25 + 64));
dbuf_int_free(&dbuf, NULL);
TEST_CHECK(dbuf.size == 0 && dbuf.capacity == 0 && dbuf.data == NULL);
pop_check_name();
push_check_name("insert_remove");
for (int n = 0; n <= 128; ++n) {
test_insert_remove(n);
}
test_insert_remove(400);
test_insert_remove(401);
test_insert_remove(512);
test_insert_remove(513);
test_insert_remove(601);
test_insert_remove(2048);
test_insert_remove(2049);
pop_check_name();
done = true;
passed = (num_failed_checks == 0);
return passed;
}
private:
std::vector<int> dbuf_to_vec(dbuf_int *dbuf);
void test_push(dbuf_int *dbuf, int n);
void test_insert_remove(int n);
};
struct DbufCstringTest : public Test
{
DbufCstringTest(std::string name) : Test(name) {};
void run() override;
private:
void test_iter(std::vector<std::string>& str_vec, dbuf_heap_cstr *str_dbuf, int step = 1)
{
GUF_ASSERT_RELEASE(str_dbuf);
if (step <= 0) {
step = 1;
}
ptrdiff_t i = 0;
GUF_CNT_FOREACH(str_dbuf, dbuf_heap_cstr, it) {
char *str = *it.ptr;
TEST_CHECK(str_vec.at(i) == str);
++i;
}
TEST_CHECK(i == str_dbuf->size);
i = str_dbuf->size - 1;
GUF_CNT_FOREACH_REVERSE(str_dbuf, dbuf_heap_cstr, rit) {
char *str = *rit.ptr;
TEST_CHECK(str_vec.at(i) == str);
--i;
}
TEST_CHECK(i == -1);
dbuf_heap_cstr_iter it_dbuf = dbuf_heap_cstr_begin(str_dbuf);
std::vector<std::string>::iterator it_vec = str_vec.begin();
while (!dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf)) {
TEST_CHECK(it_vec != str_vec.end());
TEST_CHECK(*it_vec == *it_dbuf.ptr);
it_dbuf = dbuf_heap_cstr_iter_next(str_dbuf, it_dbuf, step);
if (!dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf)) {
std::advance(it_vec, step);
} else {
it_vec = str_vec.end();
}
}
TEST_CHECK(dbuf_heap_cstr_iter_is_end(str_dbuf, it_dbuf) && it_vec == str_vec.end());
dbuf_heap_cstr_iter rit_dbuf = dbuf_heap_cstr_rbegin(str_dbuf);
std::vector<std::string>::reverse_iterator rit_vec = str_vec.rbegin();
while (!dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf)) {
TEST_CHECK(rit_vec != str_vec.rend());
TEST_CHECK(*rit_vec == *rit_dbuf.ptr);
rit_dbuf = dbuf_heap_cstr_iter_next(str_dbuf, rit_dbuf, step);
if (!dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf)) {
std::advance(rit_vec, step);
} else {
rit_vec = str_vec.rend();
}
}
TEST_CHECK(dbuf_heap_cstr_iter_is_end(str_dbuf, rit_dbuf) && rit_vec == str_vec.rend());
for (i = 0; i < str_dbuf->size; ++i) {
char *str = *dbuf_heap_cstr_at(str_dbuf, i);
TEST_CHECK(str_vec.at(i) == str);
}
}
void test_push_insert_erase(int n, ptrdiff_t start_cap = 0)
{
std::vector<std::string> str_vec;
dbuf_heap_cstr str_dbuf {};
dbuf_heap_cstr_init(&str_dbuf, start_cap, &guf_allocator_libc);
for (int i = 0; i < n; ++i) {
constexpr int BUF_SZ = 128;
char buf[BUF_SZ];
memset(buf, '\0', BUF_SZ);
snprintf(buf, BUF_SZ, "This is string number %d", i);
guf_cstr_heap str = buf;
dbuf_heap_cstr_push(&str_dbuf, &str, GUF_CPY_DEEP);
dbuf_heap_cstr_push_val_cpy(&str_dbuf, str);
char *heap_buf = strdup("Move me plz");
dbuf_heap_cstr_push(&str_dbuf, &heap_buf, GUF_CPY_MOVE);
TEST_CHECK(heap_buf == NULL);
TEST_CHECK(strncmp(*dbuf_heap_cstr_back(&str_dbuf), "Move me plz", BUF_SZ) == 0);
TEST_CHECK(strncmp(*dbuf_heap_cstr_at(&str_dbuf, str_dbuf.size - 2), buf, BUF_SZ) == 0);
TEST_CHECK(strncmp(*dbuf_heap_cstr_at(&str_dbuf, str_dbuf.size - 3), buf, BUF_SZ) == 0);
str_vec.push_back(std::string{buf});
str_vec.push_back(std::string{buf});
str_vec.emplace_back("Move me plz");
}
TEST_CHECK(str_dbuf.size == std::ssize(str_vec));
TEST_CHECK(str_dbuf.size == 3 * n);
for (int i = 1; i <= 8; ++i) {
test_iter(str_vec, &str_dbuf, i);
}
test_iter(str_vec, &str_dbuf, str_dbuf.size);
test_iter(str_vec, &str_dbuf, str_dbuf.size - 1);
test_iter(str_vec, &str_dbuf, str_dbuf.size + 1);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i));
}
// Insert front.
for (ptrdiff_t i = 0; i < 16; ++i) {
char str[] = "front";
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, 0);
str_vec.insert(str_vec.begin(), std::string{str});
}
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i));
}
// Insert back.
for (ptrdiff_t i = 0; i < 16; ++i) {
char str[] = "front";
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size);
str_vec.insert(str_vec.end(), std::string{str});
}
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i));
}
// Insert at i.
char str[] = "guf";
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size / 2);
str_vec.insert(str_vec.begin() + str_vec.size() / 2, str);
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size / 4);
str_vec.insert(str_vec.begin() + str_vec.size() / 4, str);
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, 1);
str_vec.insert(str_vec.begin() + 1, str);
dbuf_heap_cstr_insert_val_cpy(&str_dbuf, str, str_dbuf.size - 1);
str_vec.insert(str_vec.begin() + (str_vec.size() - 1), str);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i));
}
guf_err err = GUF_ERR_NONE;
dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, str_dbuf.size + 1, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, -1, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
dbuf_heap_cstr_try_insert_val_cpy(&str_dbuf, str, str_dbuf.size + 2, &err);
TEST_CHECK(err == GUF_ERR_IDX_RANGE);
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
TEST_CHECK(str_vec.at(i) == *dbuf_heap_cstr_at(&str_dbuf, i));
}
if (str_dbuf.size) {
dbuf_heap_cstr_erase(&str_dbuf, str_dbuf.size - 1);
str_vec.erase(str_vec.end() - 1);
}
ptrdiff_t to_rem = 8;
while (str_dbuf.size && to_rem--) {
dbuf_heap_cstr_erase(&str_dbuf, 0);
str_vec.erase(str_vec.begin());
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
if (str_dbuf.size) {
dbuf_heap_cstr_pop(&str_dbuf);
str_vec.pop_back();
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
}
if (str_dbuf.size) {
dbuf_heap_cstr_erase(&str_dbuf, str_dbuf.size / 2);
str_vec.erase(str_vec.begin() + (str_vec.size() / 2));
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
}
}
dbuf_heap_cstr_free(&str_dbuf, NULL);
TEST_CHECK(str_dbuf.size == 0 && str_dbuf.capacity == 0 && !str_dbuf.data);
}
void test_find(int n = 32)
{
if (n < 2) {
n = 2;
}
std::vector<std::string> str_vec {};
dbuf_heap_cstr str_dbuf = {};
dbuf_heap_cstr_init(&str_dbuf, 0, &guf_allocator_libc);
for (int i = 0; i < n; ++i) {
constexpr int BUF_SZ = 128;
char buf[BUF_SZ];
memset(buf, '\0', BUF_SZ);
snprintf(buf, BUF_SZ, "String number %d", i);
dbuf_heap_cstr_push_val_cpy(&str_dbuf, buf);
str_vec.push_back(buf);
}
char *move_me = strdup("Moved string");
dbuf_heap_cstr_push(&str_dbuf, &move_me, GUF_CPY_MOVE);
GUF_ASSERT_RELEASE(move_me == NULL);
str_vec.emplace_back("Moved string");
TEST_CHECK(std::ssize(str_vec) == str_dbuf.size);
for (ptrdiff_t i = 0; i < str_dbuf.size; ++i) {
char *needle = *dbuf_heap_cstr_at(&str_dbuf, i);
TEST_CHECK(str_vec.at(i) == needle);
TEST_CHECK(dbuf_heap_cstr_contains_val(&str_dbuf, needle));
dbuf_heap_cstr_iter fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_end(&str_dbuf), needle);
TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cend(), needle) != str_vec.end());
dbuf_heap_cstr_iter begin = dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), i);
dbuf_heap_cstr_iter end = dbuf_heap_cstr_end(&str_dbuf);
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, begin, end, needle);
TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin() + i, str_vec.cend(), needle) != str_vec.end());
begin = dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), i + 1);
end = dbuf_heap_cstr_end(&str_dbuf);
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, begin, end, needle);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin() + i + 1, str_vec.cend(), needle) == str_vec.end());
// Reverse.
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_rbegin(&str_dbuf), dbuf_heap_cstr_rend(&str_dbuf), needle);
TEST_CHECK(!dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.crbegin(), str_vec.crend(), needle) != str_vec.rend());
}
char needle[] = "Definitely not inside";
dbuf_heap_cstr_iter fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_end(&str_dbuf), needle);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cend(), needle) == str_vec.end());
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_rbegin(&str_dbuf), dbuf_heap_cstr_rend(&str_dbuf), needle);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.crbegin(), str_vec.crend(), needle) == str_vec.rend());
char *needle2 = *dbuf_heap_cstr_at(&str_dbuf, 0);
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), 1), dbuf_heap_cstr_end(&str_dbuf), needle2);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin() + 1, str_vec.cend(), needle2) == str_vec.end());
needle2 = *dbuf_heap_cstr_back(&str_dbuf);
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), 1), dbuf_heap_cstr_iter_next(&str_dbuf, dbuf_heap_cstr_end(&str_dbuf), -1), needle2);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.begin(), str_vec.end() - 1, needle2) == (str_vec.end() - 1));
needle2 = *dbuf_heap_cstr_at(&str_dbuf, 0);
fnd_it = dbuf_heap_cstr_find_val(&str_dbuf, dbuf_heap_cstr_begin(&str_dbuf), dbuf_heap_cstr_begin(&str_dbuf), needle2);
TEST_CHECK(dbuf_heap_cstr_iter_is_end(&str_dbuf, fnd_it));
TEST_CHECK(std::find(str_vec.cbegin(), str_vec.cbegin(), needle2) == str_vec.cbegin());
dbuf_heap_cstr_free(&str_dbuf, NULL);
}
public:
bool run()
{
push_check_name("push_insert_erase");
for (int i = 1; i <= 32; ++i) {
test_push_insert_erase(i);
test_push_insert_erase(i, i - 1);
test_push_insert_erase(i, i + 1);
test_push_insert_erase(i, i);
test_push_insert_erase(i, i / 2);
}
test_push_insert_erase(2048);
test_push_insert_erase(2048, 11);
dbuf_heap_cstr str_dbuf = {};
dbuf_heap_cstr_init(&str_dbuf, 0, &guf_allocator_libc);
std::vector<std::string> str_vec {};
for (int i = 0; i < 512; ++i) {
char buf[128];
memset(buf, '\0', GUF_ARR_SIZE(buf));
snprintf(buf, GUF_ARR_SIZE(buf), "This is a pretty guf string (number %d)", i);
guf_cstr_heap str = buf;
dbuf_heap_cstr_push(&str_dbuf, &str, GUF_CPY_DEEP);
str_vec.push_back(std::string{buf});
}
for (int i = 0; i < str_dbuf.size + 16; ++i) {
test_iter(str_vec, &str_dbuf, i);
}
dbuf_heap_cstr_free(&str_dbuf, NULL);
TEST_CHECK(str_dbuf.size == 0 && str_dbuf.capacity == 0 && !str_dbuf.data);
pop_check_name();
push_check_name("find");
test_find();
test_find(3);
test_find(42);
test_find(129);
pop_check_name();
done = true;
passed = (num_failed_checks == 0);
return passed;
}
private:
void test_iter(std::vector<std::string>& str_vec, dbuf_heap_cstr *str_dbuf, int step = 1);
void test_push_insert_erase(int n, ptrdiff_t start_cap = 0);
void test_find(int n = 32);
};

368
src/test/test_dict.cpp Normal file
View File

@ -0,0 +1,368 @@
#include "test_dict.hpp"
#include <unordered_map>
#include <cstring>
extern "C"
{
#include "guf_alloc_libc.h"
#include "guf_str.h"
#include "impls/dict_impl.h"
#include "impls/dbuf_impl.h"
}
/*
DictSvToIntTest:
*/
void DictSvToIntTest::run()
{
if (done) {
return;
}
push_check_name("insert_lookup(\"utf8-test.txt\")");
if (TEST_CHECK(load_file(TEST_DATA_DIR "/utf8-test.txt"))) {
insert_lookup();
for (ptrdiff_t i = 0; i <= 64; ++i) {
insert_lookup(i);
}
insert_lookup(512);
insert_lookup(1997);
insert_lookup(1999);
}
free_file();
pop_check_name();
push_check_name("insert_lookup(\"bartleby.txt\")");
if (TEST_CHECK(load_file(TEST_DATA_DIR "/bartleby.txt"))) {
insert_lookup();
insert_lookup(201);
}
free_file();
pop_check_name();
}
void DictSvToIntTest::insert_lookup(std::optional<ptrdiff_t> inital_dict_cap)
{
std::unordered_map<std::string_view, int32_t> word_cnt_map {};
dict_sv_i32 word_cnt_dict {};
if (inital_dict_cap) {
dict_sv_i32_init_with_capacity(&word_cnt_dict, &guf_allocator_libc, inital_dict_cap.value());
} else {
dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc);
}
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]};
dbuf_str_view_push_val(&delims, d);
}
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]};
dbuf_str_view_push_val(&delims, d);
}
guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims.data, delims.size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
while (guf_str_tok_next(&tok_state, true)) {
guf_str_view tok = tok_state.cur_tok;
// if (tok.len <= 0) {
// continue;
// }
std::string_view sv(tok.str , tok.len);
//std::cout << sv << std::string_view(tok_state.cur_delim.str, tok_state.cur_delim.len);
TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &tok) == word_cnt_map.contains(sv));
if (!dict_sv_i32_contains(&word_cnt_dict, &tok)) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, tok, 1, GUF_CPY_VALUE, GUF_CPY_VALUE);
word_cnt_map.insert({sv, 1});
} else {
int32_t *cnt = dict_sv_i32_at_val_arg(&word_cnt_dict, tok);
if (TEST_CHECK(cnt)) {
*cnt += 1;
}
word_cnt_map.at(sv) += 1;
}
// printf("tok_len: %td ", tok.len);
// printf("'%.*s'\n", (int)tok.len, tok.str);
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
}
dbuf_str_view_free(&delims, NULL);
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map));
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
for (const auto & [word, cnt] : word_cnt_map ) {
guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()};
int32_t *res = dict_sv_i32_at(&word_cnt_dict, &sv);
TEST_CHECK(res && *res == cnt);
}
ptrdiff_t i = 0;
GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) {
const dict_sv_i32_kv *kv = kv_it.ptr;
if (TEST_CHECK(kv)) {
const int32_t cnt = kv->val;
// printf("%.*s: %d\n", (int)kv->key.len, kv->key.str, cnt);
const std::string_view sv(kv->key.str, kv->key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
TEST_CHECK(word_cnt_map.at(sv) == cnt);
}
}
++i;
}
TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict));
TEST_CHECK(i == std::ssize(word_cnt_map));
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
// std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << " elem cap: " << word_cnt_dict.kv_elems.capacity << "\n";
// std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n";
// std::cout << "mem usage: " << dict_sv_i32_memory_usage(&word_cnt_dict) << "\n";
// Erase tests:
const double load_fac_before_erase = dict_sv_i32_load_factor(&word_cnt_dict);
const ptrdiff_t size_before_erase = dict_sv_i32_size(&word_cnt_dict);
ptrdiff_t num_del = 0;
while (dict_sv_i32_size(&word_cnt_dict) > size_before_erase / 2) {
dict_sv_i32_kv *kv = NULL;
if (num_del % 2) {
dict_sv_i32_iter it = dict_sv_i32_begin(&word_cnt_dict);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
kv = it.ptr;
} else {
dict_sv_i32_iter rit = dict_sv_i32_rbegin(&word_cnt_dict);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, rit));
kv = rit.ptr;
}
GUF_ASSERT_RELEASE(kv);
const guf_str_view key = kv->key;
const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
std::string_view sv(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
TEST_CHECK(!word_cnt_map.contains(sv));
if (del_success) {
++num_del;
}
}
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) >= 0);
TEST_CHECK(size_before_erase - num_del == dict_sv_i32_size(&word_cnt_dict));
TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict));
if (dict_sv_i32_size(&word_cnt_dict) != 0) {
TEST_CHECK(load_fac_before_erase == dict_sv_i32_load_factor(&word_cnt_dict));
} else {
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
}
if (dict_sv_i32_size(&word_cnt_dict) >= 4) {
dict_sv_i32_kv_dbuf_iter it = dict_sv_i32_begin(&word_cnt_dict);
it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
guf_str_view key = it.ptr->key;
bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
std::string_view sv(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
it = dict_sv_i32_rbegin(&word_cnt_dict);
it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
key = it.ptr->key;
del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
sv = std::string_view(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
}
TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict));
i = 0;
GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) {
const dict_sv_i32_kv *kv = kv_it.ptr;
if (TEST_CHECK(kv)) {
const int32_t cnt = kv->val;
const std::string_view sv(kv->key.str, (size_t)kv->key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
TEST_CHECK(word_cnt_map.at(sv) == cnt);
}
++i;
}
}
TEST_CHECK(i == word_cnt_dict.kv_elems.size);
TEST_CHECK(i == std::ssize(word_cnt_map));
while (dict_sv_i32_size(&word_cnt_dict) > 0) {
const dict_sv_i32_iter beg = dict_sv_i32_begin(&word_cnt_dict);
if (TEST_CHECK(!dict_sv_i32_iter_is_end(&word_cnt_dict, beg))) {
const guf_str_view key = beg.ptr->key;
if (TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &key))) {
const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
}
const std::string_view sv(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
}
}
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0 && word_cnt_map.size() == 0);
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), (size_t)64, GUF_CPY_VALUE, GUF_CPY_VALUE);
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), (size_t)128, GUF_CPY_VALUE, GUF_CPY_VALUE);
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), (size_t)256, GUF_CPY_VALUE, GUF_CPY_VALUE);
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), (size_t)512, GUF_CPY_VALUE, GUF_CPY_VALUE);
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), (size_t)1024, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 5);
int32_t *val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"));
TEST_CHECK(val && *val == 64);
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"));
TEST_CHECK(val && *val == 256);
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."));
TEST_CHECK(val && *val == 1024);
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"));
TEST_CHECK(val && *val == 128);
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"));
TEST_CHECK(val && *val == 512);
TEST_CHECK(word_cnt_dict.kv_elems.size == 5);
TEST_CHECK(word_cnt_dict.kv_elems.data[0].val == 64 && std::strcmp(word_cnt_dict.kv_elems.data[0].key.str, "Hej") == 0);
TEST_CHECK(word_cnt_dict.kv_elems.data[1].val == 128 && std::strcmp(word_cnt_dict.kv_elems.data[1].key.str, "verden!") == 0);
TEST_CHECK(word_cnt_dict.kv_elems.data[2].val == 256 && std::strcmp(word_cnt_dict.kv_elems.data[2].key.str, "Flødeskum") == 0);
TEST_CHECK(word_cnt_dict.kv_elems.data[3].val == 512 && std::strcmp(word_cnt_dict.kv_elems.data[3].key.str, "med") == 0);
TEST_CHECK(word_cnt_dict.kv_elems.data[4].val == 1024 && std::strcmp(word_cnt_dict.kv_elems.data[4].key.str, "Faxe Kondi.") == 0);
const double load_fac_beg = dict_sv_i32_load_factor(&word_cnt_dict);
const ptrdiff_t cap_begin = word_cnt_dict.kv_indices_cap;
ptrdiff_t del = 0;
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
for (ptrdiff_t n = 0; n < cap_begin + 128; ++n) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), 64, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
}
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi.")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
for (ptrdiff_t n = 0; n < 256; ++n) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), 128, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi.")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
}
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
for (ptrdiff_t n = 0; n < 512 + cap_begin; ++n) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), 256, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
}
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
for (ptrdiff_t n = 0; n < 71; ++n) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), 512, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
}
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!")));
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
for (ptrdiff_t n = 0; n < 201; ++n) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), 128, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) > 0);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!")));
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
}
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
TEST_CHECK(word_cnt_dict.kv_elems.size == 0);
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0);
dict_sv_i32_free(&word_cnt_dict, NULL);
bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size;
TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones);
}
bool DictSvToIntTest::load_file(const char *fname)
{
FILE *in_file {nullptr};
if (!in_file) {
in_file = fopen(fname, "r");
}
GUF_ASSERT_RELEASE(in_file);
dbuf_char_init(&text_buf, 128, &guf_allocator_libc);
int c = EOF;
while ((c = fgetc(in_file)) != EOF) {
dbuf_char_push_val(&text_buf, (char)c);
text_vec.push_back((char)c);
}
fclose(in_file);
// dbuf_char_insert_val(&text_buf, '\xC0', 1);
// text_vec.insert(text_vec.cbegin() + 1, '\xC0');
return TEST_CHECK(std::ssize(text_vec) == text_buf.size);
}
void DictSvToIntTest::free_file()
{
dbuf_char_free(&text_buf, NULL);
text_buf = {};
text_vec.clear();
}

View File

@ -1,376 +1,25 @@
#pragma once
#include <unordered_map>
#include <cstring>
#include <vector>
#include <optional>
#include <string>
#include "test.hpp"
extern "C"
{
#include "guf_alloc_libc.h"
#include "guf_str.h"
#include "impls/dict_impl.h"
#include "impls/dbuf_impl.h"
}
struct DictSvToIntTest : public Test
{
DictSvToIntTest(const std::string& name) : Test(name) {};
void run() override;
private:
dbuf_char text_buf {};
std::vector<char> text_vec {};
bool load_file(const char *fname);
void free_file();
void insert_lookup(std::optional<ptrdiff_t> inital_dict_cap = {})
{
std::unordered_map<std::string_view, int32_t> word_cnt_map {};
dict_sv_i32 word_cnt_dict {};
if (inital_dict_cap) {
dict_sv_i32_init_with_capacity(&word_cnt_dict, &guf_allocator_libc, inital_dict_cap.value());
} else {
dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc);
}
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]};
dbuf_str_view_push_val(&delims, d);
}
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]};
dbuf_str_view_push_val(&delims, d);
}
guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims.data, delims.size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
while (guf_str_tok_next(&tok_state, true)) {
guf_str_view tok = tok_state.cur_tok;
// if (tok.len <= 0) {
// continue;
// }
std::string_view sv(tok.str , tok.len);
//std::cout << sv << std::string_view(tok_state.cur_delim.str, tok_state.cur_delim.len);
TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &tok) == word_cnt_map.contains(sv));
if (!dict_sv_i32_contains(&word_cnt_dict, &tok)) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, tok, 1, GUF_CPY_VALUE, GUF_CPY_VALUE);
word_cnt_map.insert({sv, 1});
} else {
int32_t *cnt = dict_sv_i32_at_val_arg(&word_cnt_dict, tok);
if (TEST_CHECK(cnt)) {
*cnt += 1;
}
word_cnt_map.at(sv) += 1;
}
// printf("tok_len: %td ", tok.len);
// printf("'%.*s'\n", (int)tok.len, tok.str);
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
}
dbuf_str_view_free(&delims, NULL);
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map));
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
for (const auto & [word, cnt] : word_cnt_map ) {
guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()};
int32_t *res = dict_sv_i32_at(&word_cnt_dict, &sv);
TEST_CHECK(res && *res == cnt);
}
ptrdiff_t i = 0;
GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) {
const dict_sv_i32_kv *kv = kv_it.ptr;
if (TEST_CHECK(kv)) {
const int32_t cnt = kv->val;
// printf("%.*s: %d\n", (int)kv->key.len, kv->key.str, cnt);
const std::string_view sv(kv->key.str, kv->key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
TEST_CHECK(word_cnt_map.at(sv) == cnt);
}
}
++i;
}
TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict));
TEST_CHECK(i == std::ssize(word_cnt_map));
TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict));
// std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << " elem cap: " << word_cnt_dict.kv_elems.capacity << "\n";
// std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n";
// std::cout << "mem usage: " << dict_sv_i32_memory_usage(&word_cnt_dict) << "\n";
// Erase tests:
const double load_fac_before_erase = dict_sv_i32_load_factor(&word_cnt_dict);
const ptrdiff_t size_before_erase = dict_sv_i32_size(&word_cnt_dict);
ptrdiff_t num_del = 0;
while (dict_sv_i32_size(&word_cnt_dict) > size_before_erase / 2) {
dict_sv_i32_kv *kv = NULL;
if (num_del % 2) {
dict_sv_i32_iter it = dict_sv_i32_begin(&word_cnt_dict);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
kv = it.ptr;
} else {
dict_sv_i32_iter rit = dict_sv_i32_rbegin(&word_cnt_dict);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, rit));
kv = rit.ptr;
}
GUF_ASSERT_RELEASE(kv);
const guf_str_view key = kv->key;
const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
std::string_view sv(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
TEST_CHECK(!word_cnt_map.contains(sv));
if (del_success) {
++num_del;
}
}
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) >= 0);
TEST_CHECK(size_before_erase - num_del == dict_sv_i32_size(&word_cnt_dict));
TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict));
if (dict_sv_i32_size(&word_cnt_dict) != 0) {
TEST_CHECK(load_fac_before_erase == dict_sv_i32_load_factor(&word_cnt_dict));
} else {
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
}
if (dict_sv_i32_size(&word_cnt_dict) >= 4) {
dict_sv_i32_kv_dbuf_iter it = dict_sv_i32_begin(&word_cnt_dict);
it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
guf_str_view key = it.ptr->key;
bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
std::string_view sv(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
it = dict_sv_i32_rbegin(&word_cnt_dict);
it = dict_sv_i32_iter_next(&word_cnt_dict, it, 1);
GUF_ASSERT_RELEASE(!dict_sv_i32_iter_is_end(&word_cnt_dict, it));
key = it.ptr->key;
del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
sv = std::string_view(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
}
TEST_CHECK(std::ssize(word_cnt_map) == dict_sv_i32_size(&word_cnt_dict));
i = 0;
GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) {
const dict_sv_i32_kv *kv = kv_it.ptr;
if (TEST_CHECK(kv)) {
const int32_t cnt = kv->val;
const std::string_view sv(kv->key.str, (size_t)kv->key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
TEST_CHECK(word_cnt_map.at(sv) == cnt);
}
++i;
}
}
TEST_CHECK(i == word_cnt_dict.kv_elems.size);
TEST_CHECK(i == std::ssize(word_cnt_map));
while (dict_sv_i32_size(&word_cnt_dict) > 0) {
const dict_sv_i32_iter beg = dict_sv_i32_begin(&word_cnt_dict);
if (TEST_CHECK(!dict_sv_i32_iter_is_end(&word_cnt_dict, beg))) {
const guf_str_view key = beg.ptr->key;
if (TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &key))) {
const bool del_success = dict_sv_i32_erase(&word_cnt_dict, &key);
TEST_CHECK(del_success);
TEST_CHECK(!dict_sv_i32_contains(&word_cnt_dict, &key));
}
const std::string_view sv(key.str, (size_t)key.len);
if (TEST_CHECK(word_cnt_map.contains(sv))) {
word_cnt_map.erase(sv);
}
}
}
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0 && word_cnt_map.size() == 0);
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), (size_t)64, GUF_CPY_VALUE, GUF_CPY_VALUE);
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), (size_t)128, GUF_CPY_VALUE, GUF_CPY_VALUE);
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), (size_t)256, GUF_CPY_VALUE, GUF_CPY_VALUE);
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), (size_t)512, GUF_CPY_VALUE, GUF_CPY_VALUE);
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), (size_t)1024, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 5);
int32_t *val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"));
TEST_CHECK(val && *val == 64);
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"));
TEST_CHECK(val && *val == 256);
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."));
TEST_CHECK(val && *val == 1024);
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"));
TEST_CHECK(val && *val == 128);
val = dict_sv_i32_at_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"));
TEST_CHECK(val && *val == 512);
TEST_CHECK(word_cnt_dict.kv_elems.size == 5);
TEST_CHECK(word_cnt_dict.kv_elems.data[0].val == 64 && std::strcmp(word_cnt_dict.kv_elems.data[0].key.str, "Hej") == 0);
TEST_CHECK(word_cnt_dict.kv_elems.data[1].val == 128 && std::strcmp(word_cnt_dict.kv_elems.data[1].key.str, "verden!") == 0);
TEST_CHECK(word_cnt_dict.kv_elems.data[2].val == 256 && std::strcmp(word_cnt_dict.kv_elems.data[2].key.str, "Flødeskum") == 0);
TEST_CHECK(word_cnt_dict.kv_elems.data[3].val == 512 && std::strcmp(word_cnt_dict.kv_elems.data[3].key.str, "med") == 0);
TEST_CHECK(word_cnt_dict.kv_elems.data[4].val == 1024 && std::strcmp(word_cnt_dict.kv_elems.data[4].key.str, "Faxe Kondi.") == 0);
const double load_fac_beg = dict_sv_i32_load_factor(&word_cnt_dict);
const ptrdiff_t cap_begin = word_cnt_dict.kv_indices_cap;
ptrdiff_t del = 0;
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
for (ptrdiff_t n = 0; n < cap_begin + 128; ++n) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej"), 64, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Hej")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
}
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi.")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
for (ptrdiff_t n = 0; n < 256; ++n) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi."), 128, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Faxe Kondi.")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
}
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
for (ptrdiff_t n = 0; n < 512 + cap_begin; ++n) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med"), 256, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("med")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
}
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
for (ptrdiff_t n = 0; n < 71; ++n) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum"), 512, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(word_cnt_dict.num_tombstones == --del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("Flødeskum")));
TEST_CHECK(word_cnt_dict.num_tombstones == ++del);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == load_fac_beg);
}
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!")));
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
for (ptrdiff_t n = 0; n < 201; ++n) {
dict_sv_i32_insert_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!"), 128, GUF_CPY_VALUE, GUF_CPY_VALUE);
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) > 0);
TEST_CHECK(dict_sv_i32_erase_val_arg(&word_cnt_dict, GUF_CSTR_TO_VIEW_CPP("verden!")));
TEST_CHECK(word_cnt_dict.num_tombstones == 0);
TEST_CHECK(dict_sv_i32_load_factor(&word_cnt_dict) == 0);
}
TEST_CHECK(word_cnt_dict.kv_indices_cap == cap_begin);
TEST_CHECK(word_cnt_dict.kv_elems.size == 0);
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0);
dict_sv_i32_free(&word_cnt_dict, NULL);
bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size;
TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones);
}
bool load_file(const char *fname)
{
FILE *in_file {nullptr};
if (!in_file) {
in_file = fopen(fname, "r");
}
GUF_ASSERT_RELEASE(in_file);
dbuf_char_init(&text_buf, 128, &guf_allocator_libc);
int c = EOF;
while ((c = fgetc(in_file)) != EOF) {
dbuf_char_push_val(&text_buf, (char)c);
text_vec.push_back((char)c);
}
fclose(in_file);
// dbuf_char_insert_val(&text_buf, '\xC0', 1);
// text_vec.insert(text_vec.cbegin() + 1, '\xC0');
return TEST_CHECK(std::ssize(text_vec) == text_buf.size);
}
void free_file()
{
dbuf_char_free(&text_buf, NULL);
text_buf = {};
text_vec.clear();
}
public:
bool run() override
{
if (done) {
return passed;
}
if (TEST_CHECK(load_file(TEST_DATA_DIR "/utf8-test.txt"))) {
insert_lookup();
for (ptrdiff_t i = 0; i <= 64; ++i) {
insert_lookup(i);
}
insert_lookup(512);
insert_lookup(1997);
insert_lookup(1999);
}
free_file();
if (TEST_CHECK(load_file(TEST_DATA_DIR "/bartleby.txt"))) {
insert_lookup();
insert_lookup(201);
}
free_file();
passed = (num_failed_checks == 0);
done = true;
return passed;
}
void insert_lookup(std::optional<ptrdiff_t> inital_dict_cap = {});
};

376
src/test/test_str.cpp Normal file
View File

@ -0,0 +1,376 @@
#include "test_str.hpp"
extern "C"
{
#include "guf_alloc_libc.h"
}
/*
StrTest:
*/
void StrTest::run()
{
if (done) {
return;
}
const std::vector<std::string> words = {
"",
"\0",
"Hello",
"Othell\0o",
"f\0\0",
"\0",
"0",
"a",
"ab",
"🌈 waow a rainboge!",
"orange cat(1) :3",
"xes yag",
"Hello, world! This is a pretty darn long string I'd say...",
"I want to eat crayons. I crave crayons because they are tasty, and everybody telling me crayons are not edible must be either lying or dumb. I like trains. 42 is a number. 3.14159265... is not a rational number, and it is called pi. I ate some pie (it was a crayon pie).",
std::string(32, 'a'),
std::string(64, 'b'),
std::string(1024, 'a'),
std::string(2048, 'a'),
std::string(4096, 'a'),
std::string(5001, 'a'),
std::string(7121, 'a'),
std::string(2000, 'a'),
std::string(GUF_STR_SSO_BUF_CAP, 'a'),
std::string(GUF_STR_SSO_BUF_CAP - 1, 'a'),
std::string(GUF_STR_SSO_BUF_CAP + 1, 'a'),
std::string(GUF_STR_SSO_BUF_CAP - 2, 'a'),
std::string(GUF_STR_SSO_BUF_CAP + 2, 'a'),
std::string(GUF_STR_SSO_BUF_CAP - 3, 'a'),
std::string(GUF_STR_SSO_BUF_CAP + 3, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 2, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 3, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 4, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 5, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 6, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 7, 'a'),
};
push_check_name("init_empy");
test_init_empty();
pop_check_name();
push_check_name("append_char");
for (const auto& word : words) {
test_init_free(word);
test_append_char(word);
test_append_char(word, true);
}
pop_check_name();
push_check_name("append_str");
for (size_t i = 0; i < words.size(); ++i) {
const auto& w1 = words.at(i);
append_str(w1, w1);
append_str(w1, w1);
for (size_t j = i + 1; j < words.size(); ++j) {
const auto& w2 = words.at(j);
append_str(w1, w2);
append_str(w2, w1);
}
}
pop_check_name();
push_check_name("test_popsplit");
std::vector<std::string_view> split = test_popsplit("1997-04-01", "-");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "1997" && split.at(1) == "04" && split.at(2) == "01");
}
split = test_popsplit("1997-04-01-", "-");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "1997" && split.at(1) == "04" && split.at(2) == "01");
}
split = test_popsplit("2025/05/08", "/");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08");
}
split = test_popsplit("2025/05/08/", "/");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08");
}
split = test_popsplit("2025/05/08//", "/");
if (TEST_CHECK(split.size() == 4)) {
TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08" && split.at(3) == "");
}
split = test_popsplit("/2025/05/08", "/");
if (TEST_CHECK(split.size() == 4)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == "2025" && split.at(2) == "05" && split.at(3) == "08");
}
split = test_popsplit("//2025/05/08", "/");
if (TEST_CHECK(split.size() == 5)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == "" && split.at(2) == "2025" && split.at(3) == "05" && split.at(4) == "08");
}
split = test_popsplit("I eat formidable crayons, oof, for real", "foo");
if (TEST_CHECK(split.size() == 1)) {
TEST_CHECK(split.at(0) == "I eat formidable crayons, oof, for real");
}
split = test_popsplit("Hej <<", "<<");
if (TEST_CHECK(split.size() == 1)) {
TEST_CHECK(split.at(0) == "Hej ");
}
split = test_popsplit("Hej << verden", "<<");
if (TEST_CHECK(split.size() == 2)) {
TEST_CHECK(split.at(0) == "Hej " && split.at(1) == " verden");
}
split = test_popsplit("<< Hej << verden", "<<");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == " Hej " && split.at(2) == " verden");
}
split = test_popsplit("<< Hej << verden <<< foo<>", "<<");
if (TEST_CHECK(split.size() == 4)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == " Hej " && split.at(2) == " verden " && split.at(3) == "< foo<>");
}
split = test_popsplit("I eat tofu", "");
if (TEST_CHECK(split.size() == 1)) {
TEST_CHECK(split.at(0) == "I eat tofu");
}
split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOP", "FULL-STOP");
if (TEST_CHECK(split.size() == 2)) {
TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign ");
}
split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOPI like trains, FULL-STO", "FULL-STOP");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO");
}
split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOPI like trains, FULL-STO Poo", "FULL-STOP");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO Poo");
}
pop_check_name();
push_check_name("get_toks");
std::vector<std::string_view> tok_words = {"hello", "world", "cat", "vertex", "normal", "pizza", "running", "mouse", "playing", "adjacent"};
std::vector<std::string_view> delims = {",", " ", "\n", "\t", "\r"};
for (int is_trailing = 0; is_trailing < 2; ++is_trailing) {
for (ptrdiff_t num_words = 1; num_words < std::ssize(tok_words); ++num_words) {
std::string str = "";
for (ptrdiff_t j = 0; j < num_words; ++j) {
str += tok_words.at(j);
if (j < num_words - 1 || is_trailing) {
str += ", ";
}
}
std::vector<std::string_view> toks = get_toks(std::string_view{str}, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
if (TEST_CHECK(std::ssize(toks) == num_words)) {
for (ptrdiff_t i = 0; i < num_words; ++i) {
TEST_CHECK(toks.at(i) == tok_words.at(i));
}
}
}
}
std::string_view tok_str = "<stats>age: 28, occupation: NULL, crayons_eaten: 256 </stats>";
delims = {"<stats>", "</stats>", ":", ",", " ", "\t", "<stats", "<", ">", "</"};
auto tok_result = get_toks(tok_str, delims, true, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
TEST_CHECK(tok_result.size() == 19);
TEST_CHECK(tok_result.at(18) == "</stats>" && tok_result.at(0) == "<stats>" && tok_result.at(1) == "age" && tok_result.at(2) == ":" && tok_result.at(3) == " " && tok_result.at(4) == "28");
tok_result = get_toks(tok_str, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
TEST_CHECK(tok_result.size() == 6);
TEST_CHECK(tok_result.at(0) == "age" && tok_result.at(1) == "28" && tok_result.at(2) == "occupation" && tok_result.at(3) == "NULL" &&
tok_result.at(4) == "crayons_eaten" && tok_result.at(5) == "256");
pop_check_name();
}
void StrTest::test_init_free(std::string str)
{
guf_str s0;
guf_str_init(&s0, GUF_CSTR_TO_VIEW_CPP(str.c_str()), &guf_allocator_libc);
guf_str s1 = guf_str_new(GUF_CSTR_TO_VIEW_CPP(str.c_str()), &guf_allocator_libc);
guf_str s2;
guf_str_init_from_cstr(&s2, str.c_str(), &guf_allocator_libc);
TEST_CHECK(guf_str_equal(&s0, &s1));
TEST_CHECK(guf_str_equal(&s0, &s2));
TEST_CHECK(guf_str_equal(&s1, &s2));
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s0));
TEST_CHECK(str == guf_str_const_cstr(&s0));
TEST_CHECK(str == guf_str_cstr(&s0));
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s1));
TEST_CHECK(str == guf_str_const_cstr(&s1));
TEST_CHECK(str == guf_str_cstr(&s1));
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s2));
TEST_CHECK(str == guf_str_const_cstr(&s2));
TEST_CHECK(str == guf_str_cstr(&s2));
guf_str_free(&s0, NULL);
guf_str_free(&s1, NULL);
guf_str_free(&s2, NULL);
TEST_CHECK(guf_str_is_uninit(&s0));
TEST_CHECK(guf_str_is_uninit(&s1));
TEST_CHECK(guf_str_is_uninit(&s2));
}
void StrTest::test_init_empty()
{
std::string str = "";
guf_str s = GUF_STR_UNINITIALISED_CPP;
guf_str_init_empty(&s, &guf_allocator_libc);
TEST_CHECK(guf_str_len(&s) == 0);
TEST_CHECK(str == guf_str_const_cstr(&s));
guf_str_append_char(&s, 'a', 1024);
str.append(1024, 'a');
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
guf_str_append_char(&s, 'b', 24);
str.append(24, 'b');
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
guf_str_append_char(&s, 'c', 255);
str.append(255, 'c');
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
*guf_str_at(&s, 0) = '<';
str.at(0) = '<';
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
*guf_str_at(&s, guf_str_len(&s) - 1) = '>';
str.at(str.size() - 1) = '>';
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
guf_err err = GUF_ERR_NONE;
TEST_CHECK(NULL == guf_str_try_at(&s, guf_str_len(&s), &err));
TEST_CHECK(err != GUF_ERR_NONE && err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
TEST_CHECK(NULL == guf_str_try_at(&s, -1, &err));
TEST_CHECK(err != GUF_ERR_NONE && err == GUF_ERR_IDX_RANGE);
guf_str_free(&s, NULL);
TEST_CHECK(guf_str_is_uninit(&s));
}
void StrTest::test_append_char(std::string str, bool include_null)
{
guf_str s0 = guf_str_new(guf_str_view{.str = str.c_str(), .len = (ptrdiff_t)str.size()}, &guf_allocator_libc);
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s0));
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
for (int i = include_null ? 0 : 1; i < 128; ++i) {
char ch = (char)i;
guf_str_append_one_char(&s0, ch);
str.append(1, ch);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size());
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
}
for (int i = include_null ? 0 : 1; i < 128; ++i) {
char ch = (char)i;
guf_str_append_char(&s0, ch, i);
str.append(i, ch);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size());
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
guf_str_append_char(&s0, ch, i * 16);
str.append(i * 16, ch);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size());
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
}
guf_str_free(&s0, NULL);
TEST_CHECK(guf_str_is_uninit(&s0));
}
void StrTest::append_str(const std::string& a, const std::string& b)
{
std::string str0 = a;
guf_str s0 = guf_str_new(guf_str_view{.str = str0.c_str(), .len = (ptrdiff_t)str0.size()}, &guf_allocator_libc);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str0.size());
TEST_CHECK((str0 == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
TEST_CHECK((str0 == std::string_view{guf_str_cstr(&s0), (size_t)guf_str_len(&s0)}));
for (int i = 0; i <= 64; ++i) {
str0.append(b);
guf_str_append(&s0, guf_str_view{.str = b.c_str(), .len = (ptrdiff_t)b.size()});
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str0.size());
TEST_CHECK((str0 == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
TEST_CHECK((str0 == std::string_view{guf_str_cstr(&s0), (size_t)guf_str_len(&s0)}));
}
guf_str_free(&s0, NULL);
TEST_CHECK(guf_str_is_uninit(&s0));
}
std::vector<std::string_view> StrTest::test_popsplit(std::string_view str, std::string_view delim)
{
std::vector<std::string_view> result = {};
if (delim.size() > 0) { // NOTE: str.find with an empty delimiter returns 0, not std::string::npos
std::string_view src_cpp = str;
for (size_t idx = src_cpp.find(delim, 0); src_cpp.size() > 0; idx = src_cpp.find(delim, 0)) {
result.push_back(src_cpp.substr(0, idx));
if (idx == std::string::npos) {
break;
}
src_cpp = src_cpp.substr(idx + delim.size());
}
} else {
result.push_back(str);
}
const guf_str_view delim_sv = guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()};
guf_str_view src = guf_str_view{.len = (ptrdiff_t)str.size(), .str = str.data()};
size_t n = 0;
do {
const guf_str_view popped = guf_str_view_pop_split(&src, delim_sv);
TEST_CHECK(n < result.size());
TEST_CHECK(std::string_view(popped.str, (size_t)popped.len) == result.at(n));
const guf_str_view res = {.str = result.at(n).data(), .len = (ptrdiff_t)result.at(n).size()};
TEST_CHECK(guf_str_view_equal(&popped, &res));
TEST_CHECK(guf_str_view_equal_val_arg(popped, res));
// std::cout << "guf: " << std::string_view{popped.str, (size_t)popped.len} << "\n";
// std::cout << "cpp: " << std::string_view{res.str, (size_t)res.len} << "\n";
++n;
} while (src.len > 0);
TEST_CHECK(n == result.size());
return result;
}
std::vector<std::string_view> StrTest::get_toks(std::string_view sv_in, const std::vector<std::string_view>& delims_in, bool preserve_delims, guf_str_tok_delim_opt opt)
{
const guf_str_view sv = guf_str_view{.len = (ptrdiff_t)sv_in.size(), .str = sv_in.data()};
std::vector<guf_str_view> delims;
for (const auto delim : delims_in) {
delims.push_back(guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()});
}
guf_str_tok_state tok_state = guf_str_tok_state_new(sv, delims.data(), std::ssize(delims), opt);
std::vector<std::string_view> toks_out;
while (guf_str_tok_next(&tok_state, preserve_delims)) {
if (tok_state.cur_tok.len > 0) {
toks_out.push_back( std::string_view{tok_state.cur_tok.str, (size_t)tok_state.cur_tok.len});
}
if (preserve_delims && tok_state.cur_delim.len > 0) {
toks_out.push_back( std::string_view{tok_state.cur_delim.str, (size_t)tok_state.cur_delim.len});
}
}
TEST_CHECK(tok_state.done);
const ptrdiff_t num_toks = preserve_delims ? tok_state.num_delims_read + tok_state.num_toks_read : tok_state.num_toks_read;
TEST_CHECK(num_toks == std::ssize(toks_out));
return toks_out;
}

View File

@ -1,377 +1,23 @@
#pragma once
#include <vector>
#include <string>
#include "test.hpp"
extern "C" {
#include "guf_alloc_libc.h"
extern "C"
{
#include "guf_str.h"
}
struct StrTest : public Test
{
public:
StrTest(const std::string& name) : Test(name) {};
void run() override;
private:
void test_init_free(std::string str)
{
guf_str s0;
guf_str_init(&s0, GUF_CSTR_TO_VIEW_CPP(str.c_str()), &guf_allocator_libc);
guf_str s1 = guf_str_new(GUF_CSTR_TO_VIEW_CPP(str.c_str()), &guf_allocator_libc);
guf_str s2;
guf_str_init_from_cstr(&s2, str.c_str(), &guf_allocator_libc);
TEST_CHECK(guf_str_equal(&s0, &s1));
TEST_CHECK(guf_str_equal(&s0, &s2));
TEST_CHECK(guf_str_equal(&s1, &s2));
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s0));
TEST_CHECK(str == guf_str_const_cstr(&s0));
TEST_CHECK(str == guf_str_cstr(&s0));
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s1));
TEST_CHECK(str == guf_str_const_cstr(&s1));
TEST_CHECK(str == guf_str_cstr(&s1));
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s2));
TEST_CHECK(str == guf_str_const_cstr(&s2));
TEST_CHECK(str == guf_str_cstr(&s2));
guf_str_free(&s0, NULL);
guf_str_free(&s1, NULL);
guf_str_free(&s2, NULL);
TEST_CHECK(guf_str_is_uninit(&s0));
TEST_CHECK(guf_str_is_uninit(&s1));
TEST_CHECK(guf_str_is_uninit(&s2));
}
void test_init_empty()
{
std::string str = "";
guf_str s = GUF_STR_UNINITIALISED_CPP;
guf_str_init_empty(&s, &guf_allocator_libc);
TEST_CHECK(guf_str_len(&s) == 0);
TEST_CHECK(str == guf_str_const_cstr(&s));
guf_str_append_char(&s, 'a', 1024);
str.append(1024, 'a');
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
guf_str_append_char(&s, 'b', 24);
str.append(24, 'b');
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
guf_str_append_char(&s, 'c', 255);
str.append(255, 'c');
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
*guf_str_at(&s, 0) = '<';
str.at(0) = '<';
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
*guf_str_at(&s, guf_str_len(&s) - 1) = '>';
str.at(str.size() - 1) = '>';
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
guf_err err = GUF_ERR_NONE;
TEST_CHECK(NULL == guf_str_try_at(&s, guf_str_len(&s), &err));
TEST_CHECK(err != GUF_ERR_NONE && err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
TEST_CHECK(NULL == guf_str_try_at(&s, -1, &err));
TEST_CHECK(err != GUF_ERR_NONE && err == GUF_ERR_IDX_RANGE);
guf_str_free(&s, NULL);
TEST_CHECK(guf_str_is_uninit(&s));
}
void test_append_char(std::string str, bool include_null = false)
{
guf_str s0 = guf_str_new(guf_str_view{.str = str.c_str(), .len = (ptrdiff_t)str.size()}, &guf_allocator_libc);
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s0));
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
for (int i = include_null ? 0 : 1; i < 128; ++i) {
char ch = (char)i;
guf_str_append_one_char(&s0, ch);
str.append(1, ch);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size());
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
}
for (int i = include_null ? 0 : 1; i < 128; ++i) {
char ch = (char)i;
guf_str_append_char(&s0, ch, i);
str.append(i, ch);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size());
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
guf_str_append_char(&s0, ch, i * 16);
str.append(i * 16, ch);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size());
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
}
guf_str_free(&s0, NULL);
TEST_CHECK(guf_str_is_uninit(&s0));
}
void append_str(const std::string& a, const std::string& b)
{
std::string str0 = a;
guf_str s0 = guf_str_new(guf_str_view{.str = str0.c_str(), .len = (ptrdiff_t)str0.size()}, &guf_allocator_libc);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str0.size());
TEST_CHECK((str0 == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
TEST_CHECK((str0 == std::string_view{guf_str_cstr(&s0), (size_t)guf_str_len(&s0)}));
for (int i = 0; i <= 64; ++i) {
str0.append(b);
guf_str_append(&s0, guf_str_view{.str = b.c_str(), .len = (ptrdiff_t)b.size()});
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str0.size());
TEST_CHECK((str0 == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
TEST_CHECK((str0 == std::string_view{guf_str_cstr(&s0), (size_t)guf_str_len(&s0)}));
}
guf_str_free(&s0, NULL);
TEST_CHECK(guf_str_is_uninit(&s0));
}
auto test_popsplit(std::string_view str, std::string_view delim)
{
std::vector<std::string_view> result = {};
if (delim.size() > 0) { // NOTE: str.find with an empty delimiter returns 0, not std::string::npos
std::string_view src_cpp = str;
for (size_t idx = src_cpp.find(delim, 0); src_cpp.size() > 0; idx = src_cpp.find(delim, 0)) {
result.push_back(src_cpp.substr(0, idx));
if (idx == std::string::npos) {
break;
}
src_cpp = src_cpp.substr(idx + delim.size());
}
} else {
result.push_back(str);
}
const guf_str_view delim_sv = guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()};
guf_str_view src = guf_str_view{.len = (ptrdiff_t)str.size(), .str = str.data()};
size_t n = 0;
do {
const guf_str_view popped = guf_str_view_pop_split(&src, delim_sv);
TEST_CHECK(n < result.size());
TEST_CHECK(std::string_view(popped.str, (size_t)popped.len) == result.at(n));
const guf_str_view res = {.str = result.at(n).data(), .len = (ptrdiff_t)result.at(n).size()};
TEST_CHECK(guf_str_view_equal(&popped, &res));
TEST_CHECK(guf_str_view_equal_val_arg(popped, res));
// std::cout << "guf: " << std::string_view{popped.str, (size_t)popped.len} << "\n";
// std::cout << "cpp: " << std::string_view{res.str, (size_t)res.len} << "\n";
++n;
} while (src.len > 0);
TEST_CHECK(n == result.size());
return result;
}
std::vector<std::string_view> get_toks(std::string_view sv_in, const std::vector<std::string_view>& delims_in, bool preserve_delims = false, guf_str_tok_delim_opt opt = GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST)
{
const guf_str_view sv = guf_str_view{.len = (ptrdiff_t)sv_in.size(), .str = sv_in.data()};
std::vector<guf_str_view> delims;
for (const auto delim : delims_in) {
delims.push_back(guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()});
}
guf_str_tok_state tok_state = guf_str_tok_state_new(sv, delims.data(), std::ssize(delims), opt);
std::vector<std::string_view> toks_out;
while (guf_str_tok_next(&tok_state, preserve_delims)) {
if (tok_state.cur_tok.len > 0) {
toks_out.push_back( std::string_view{tok_state.cur_tok.str, (size_t)tok_state.cur_tok.len});
}
if (preserve_delims && tok_state.cur_delim.len > 0) {
toks_out.push_back( std::string_view{tok_state.cur_delim.str, (size_t)tok_state.cur_delim.len});
}
}
TEST_CHECK(tok_state.done);
const ptrdiff_t num_toks = preserve_delims ? tok_state.num_delims_read + tok_state.num_toks_read : tok_state.num_toks_read;
TEST_CHECK(num_toks == std::ssize(toks_out));
return toks_out;
}
public:
bool run()
{
if (done) {
return passed;
}
const std::vector<std::string> words = {
"",
"\0",
"Hello",
"Othell\0o",
"f\0\0",
"\0",
"0",
"a",
"ab",
"🌈 waow a rainboge!",
"orange cat(1) :3",
"xes yag",
"Hello, world! This is a pretty darn long string I'd say...",
"I want to eat crayons. I crave crayons because they are tasty, and everybody telling me crayons are not edible must be either lying or dumb. I like trains. 42 is a number. 3.14159265... is not a rational number, and it is called pi. I ate some pie (it was a crayon pie).",
std::string(32, 'a'),
std::string(64, 'b'),
std::string(1024, 'a'),
std::string(2048, 'a'),
std::string(4096, 'a'),
std::string(5001, 'a'),
std::string(7121, 'a'),
std::string(2000, 'a'),
std::string(GUF_STR_SSO_BUF_CAP, 'a'),
std::string(GUF_STR_SSO_BUF_CAP - 1, 'a'),
std::string(GUF_STR_SSO_BUF_CAP + 1, 'a'),
std::string(GUF_STR_SSO_BUF_CAP - 2, 'a'),
std::string(GUF_STR_SSO_BUF_CAP + 2, 'a'),
std::string(GUF_STR_SSO_BUF_CAP - 3, 'a'),
std::string(GUF_STR_SSO_BUF_CAP + 3, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 2, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 3, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 4, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 5, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 6, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 7, 'a'),
};
test_init_empty();
for (const auto& word : words) {
test_init_free(word);
test_append_char(word);
test_append_char(word, true);
}
for (size_t i = 0; i < words.size(); ++i) {
const auto& w1 = words.at(i);
append_str(w1, w1);
append_str(w1, w1);
for (size_t j = i + 1; j < words.size(); ++j) {
const auto& w2 = words.at(j);
append_str(w1, w2);
append_str(w2, w1);
}
}
std::vector<std::string_view> split = test_popsplit("1997-04-01", "-");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "1997" && split.at(1) == "04" && split.at(2) == "01");
}
split = test_popsplit("1997-04-01-", "-");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "1997" && split.at(1) == "04" && split.at(2) == "01");
}
split = test_popsplit("2025/05/08", "/");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08");
}
split = test_popsplit("2025/05/08/", "/");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08");
}
split = test_popsplit("2025/05/08//", "/");
if (TEST_CHECK(split.size() == 4)) {
TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08" && split.at(3) == "");
}
split = test_popsplit("/2025/05/08", "/");
if (TEST_CHECK(split.size() == 4)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == "2025" && split.at(2) == "05" && split.at(3) == "08");
}
split = test_popsplit("//2025/05/08", "/");
if (TEST_CHECK(split.size() == 5)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == "" && split.at(2) == "2025" && split.at(3) == "05" && split.at(4) == "08");
}
split = test_popsplit("I eat formidable crayons, oof, for real", "foo");
if (TEST_CHECK(split.size() == 1)) {
TEST_CHECK(split.at(0) == "I eat formidable crayons, oof, for real");
}
split = test_popsplit("Hej <<", "<<");
if (TEST_CHECK(split.size() == 1)) {
TEST_CHECK(split.at(0) == "Hej ");
}
split = test_popsplit("Hej << verden", "<<");
if (TEST_CHECK(split.size() == 2)) {
TEST_CHECK(split.at(0) == "Hej " && split.at(1) == " verden");
}
split = test_popsplit("<< Hej << verden", "<<");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == " Hej " && split.at(2) == " verden");
}
split = test_popsplit("<< Hej << verden <<< foo<>", "<<");
if (TEST_CHECK(split.size() == 4)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == " Hej " && split.at(2) == " verden " && split.at(3) == "< foo<>");
}
split = test_popsplit("I eat tofu", "");
if (TEST_CHECK(split.size() == 1)) {
TEST_CHECK(split.at(0) == "I eat tofu");
}
split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOP", "FULL-STOP");
if (TEST_CHECK(split.size() == 2)) {
TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign ");
}
split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOPI like trains, FULL-STO", "FULL-STOP");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO");
}
split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOPI like trains, FULL-STO Poo", "FULL-STOP");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO Poo");
}
std::vector<std::string_view> tok_words = {"hello", "world", "cat", "vertex", "normal", "pizza", "running", "mouse", "playing", "adjacent"};
std::vector<std::string_view> delims = {",", " ", "\n", "\t", "\r"};
for (int is_trailing = 0; is_trailing < 2; ++is_trailing) {
for (ptrdiff_t num_words = 1; num_words < std::ssize(tok_words); ++num_words) {
std::string str = "";
for (ptrdiff_t j = 0; j < num_words; ++j) {
str += tok_words.at(j);
if (j < num_words - 1 || is_trailing) {
str += ", ";
}
}
std::vector<std::string_view> toks = get_toks(std::string_view{str}, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
if (TEST_CHECK(std::ssize(toks) == num_words)) {
for (ptrdiff_t i = 0; i < num_words; ++i) {
TEST_CHECK(toks.at(i) == tok_words.at(i));
}
}
}
}
std::string_view tok_str = "<stats>age: 28, occupation: NULL, crayons_eaten: 256 </stats>";
delims = {"<stats>", "</stats>", ":", ",", " ", "\t", "<stats", "<", ">", "</"};
auto tok_result = get_toks(tok_str, delims, true, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
TEST_CHECK(tok_result.size() == 19);
TEST_CHECK(tok_result.at(18) == "</stats>" && tok_result.at(0) == "<stats>" && tok_result.at(1) == "age" && tok_result.at(2) == ":" && tok_result.at(3) == " " && tok_result.at(4) == "28");
tok_result = get_toks(tok_str, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
TEST_CHECK(tok_result.size() == 6);
TEST_CHECK(tok_result.at(0) == "age" && tok_result.at(1) == "28" && tok_result.at(2) == "occupation" && tok_result.at(3) == "NULL" &&
tok_result.at(4) == "crayons_eaten" && tok_result.at(5) == "256");
done = true;
passed = (num_failed_checks == 0);
return passed;
}
};
void test_init_free(std::string str);
void test_init_empty();
void test_append_char(std::string str, bool include_null = false);
void append_str(const std::string& a, const std::string& b);
std::vector<std::string_view> test_popsplit(std::string_view str, std::string_view delim);
std::vector<std::string_view> get_toks(std::string_view sv_in, const std::vector<std::string_view>& delims_in, bool preserve_delims = false, guf_str_tok_delim_opt opt = GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
};

388
src/test/test_utf8.cpp Normal file
View File

@ -0,0 +1,388 @@
#include "test_utf8.hpp"
extern "C"
{
#include "guf_alloc_libc.h"
#include "guf_str.h"
#include "impls/dict_impl.h"
}
/*
UTF8Test:
*/
void UTF8Test::run()
{
if (done) {
return;
}
push_check_name("read_utf8_chars");
ptrdiff_t valid = 0, invalid = 0;
read_utf8_chars(TEST_DATA_DIR "/" "utf8-test.txt", &valid, &invalid);
TEST_CHECK(valid == 2635 && invalid == 0);
read_utf8_chars(TEST_DATA_DIR "/" "bartleby.txt", &valid, &invalid);
TEST_CHECK(valid > 16000 && invalid == 0);
pop_check_name();
push_check_name("count_words");
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]};
dbuf_str_view_push_val(&delims, d);
}
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]};
dbuf_str_view_push_val(&delims, d);
}
int words = count_words(TEST_DATA_DIR "/" "utf8-test.txt", &delims);
TEST_CHECK(words == 422);
int words_with_delims = count_words_with_delims(TEST_DATA_DIR "/" "utf8-test.txt", &delims);
TEST_CHECK(words_with_delims == 950);
int words2 = count_words(TEST_DATA_DIR "/" "bartleby.txt", &delims);
TEST_CHECK(words2 > 2048);
dbuf_str_view_free(&delims, NULL);
pop_check_name();
push_check_name("encode_decode");
encode_decode();
encode_decode_file(TEST_DATA_DIR "/" "utf8-test.txt");
encode_decode_file(TEST_DATA_DIR "/" "bartleby.txt");
pop_check_name();
}
bool UTF8Test::load_text(const char *fname)
{
FILE *in_file {nullptr};
if (!in_file) {
in_file = fopen(fname, "r");
}
if (!in_file) {
return false;
}
dbuf_char_init(&text_buf, 128, &guf_allocator_libc);
int c = EOF;
while ((c = fgetc(in_file)) != EOF) {
dbuf_char_push_val(&text_buf, (char)c);
text_vec.push_back((char)c);
}
fclose(in_file);
return TEST_CHECK(std::ssize(text_vec) == text_buf.size);
}
void UTF8Test::free_text()
{
dbuf_char_free(&text_buf, NULL);
text_vec.clear();
}
void UTF8Test::read_utf8_chars(const char *fname, ptrdiff_t *n_valid, ptrdiff_t *n_invalid)
{
GUF_ASSERT_RELEASE(load_text(fname));
ptrdiff_t valid_chars = 0, invalid_chars = 0, bytes = 0;
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
guf_utf8_char ch = {};
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
if (stat == GUF_UTF8_READ_VALID) {
++valid_chars;
// printf("%s", ch.bytes);
} else {
++invalid_chars;
// printf("::INVALID_UTF8_CHAR::");
}
bytes += guf_utf8_char_num_bytes(&ch);
}
TEST_CHECK(input_str.len == 0 && input_str.str == NULL);
TEST_CHECK(bytes == text_buf.size);
// printf("\nread %td bytes\n", bytes);
// printf("read %td valid and %td invalid utf-8 characters\n", valid_chars, invalid_chars);
free_text();
if (n_valid)
*n_valid = valid_chars;
if (n_invalid)
*n_invalid = invalid_chars;
}
int UTF8Test::count_words(const char *fname, const dbuf_str_view *delims)
{
GUF_ASSERT_RELEASE(load_text(fname));
int num_words = 0;
guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims->data, delims->size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
while (guf_str_tok_next(&tok_state, false)) {
TEST_CHECK(tok_state.cur_tok.len > 0);
++num_words;
}
free_text();
return num_words;
}
int UTF8Test::count_words_with_delims(const char *fname, const dbuf_str_view *delims)
{
GUF_ASSERT_RELEASE(load_text(fname));
int num_words = 0, num_delims = 0;
guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims->data, delims->size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
while (guf_str_tok_next(&tok_state, true)) {
if (tok_state.cur_tok.len) {
++num_words;
// printf("'%.*s'\n", (int)tok_state.cur_tok.len, tok_state.cur_tok.str);
}
if (tok_state.cur_delim.len) {
++num_delims;
// if (tok_state.cur_delim.str[0] == '\n')
// printf("'\\n'\n");
// else
// printf("'%.*s'\n", (int)tok_state.cur_delim.len, tok_state.cur_delim.str);
}
}
free_text();
return num_words + num_delims;
}
void UTF8Test::encode_decode_file(const char *fname)
{
GUF_ASSERT_RELEASE(load_text(fname));
dbuf_i32 cp_buf = dbuf_i32_new(&guf_allocator_libc);
ptrdiff_t valid_chars = 0, invalid_chars = 0;
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
guf_utf8_char ch = {};
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
if (stat == GUF_UTF8_READ_VALID) {
++valid_chars;
const int32_t codepoint = guf_utf8_decode(&ch);
TEST_CHECK(codepoint >= 0);
dbuf_i32_push_val(&cp_buf, codepoint);
} else {
++invalid_chars;
const int32_t codepoint = guf_utf8_decode(&ch);
TEST_CHECK(codepoint < 0);
dbuf_i32_push_val(&cp_buf, -1);
}
}
TEST_CHECK(cp_buf.size == valid_chars + invalid_chars);
guf_str_view in_str = {.str = text_buf.data, .len = text_buf.size};
GUF_CNT_FOREACH(&cp_buf, dbuf_i32, it) {
GUF_ASSERT_RELEASE(it.ptr);
const int32_t codepoint = *it.ptr;
guf_utf8_char utf8_ch = {};
const guf_utf8_stat stat = guf_utf8_char_next(&utf8_ch, &in_str);
if (codepoint >= 0) {
TEST_CHECK(stat == GUF_UTF8_READ_VALID);
guf_utf8_char encoded_ch = {};
TEST_CHECK(guf_utf8_encode(&encoded_ch, codepoint));
TEST_CHECK(guf_utf8_equal(&encoded_ch, &utf8_ch));
}
}
guf_utf8_char utf8_ch = {};
const guf_utf8_stat stat = guf_utf8_char_next(&utf8_ch, &in_str);
TEST_CHECK(stat == GUF_UTF8_READ_DONE);
dbuf_i32_free(&cp_buf, NULL);
free_text();
}
void UTF8Test::encode_decode()
{
guf_utf8_char utf8 = {0};
// 1 byte characters.
for (uint8_t ascii = 0; ascii <= 0x7F; ++ascii) {
TEST_CHECK(guf_utf8_encode(&utf8, ascii));
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 1);
TEST_CHECK(utf8.bytes[0] == ascii);
TEST_CHECK(utf8.bytes[1] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == ascii);
}
// 2 byte characters:
TEST_CHECK(guf_utf8_encode(&utf8, 0x00E6)); // "æ" (Latin Small Letter Ae)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA6');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E6);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00E5)); // "å" (Latin Small Letter A with Ring Above)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA5');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E5);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00F8)); // "ø" (Latin Small Letter O with Stroke)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xB8');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00F8);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00E4)); // "ä" (Latin Small Letter A with Diaeresis)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA4');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E4);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00F6)); // "ö" (Latin Small Letter O with Diaeresis)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xB6');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00F6);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00D6)); // "Ö" (Latin Capital Letter O with Diaeresis)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\x96');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00D6);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00FC)); // "ü" (Latin Small Letter U with Diaeresis)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xBC');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00FC);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00B5)); // "µ" (Micro Sign)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC2' && utf8.bytes[1] == '\xB5');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00B5);
TEST_CHECK(guf_utf8_encode(&utf8, 0x030A)); // "◌̊" (Combining Ring Above)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xCC' && utf8.bytes[1] == '\x8A');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x030A);
// 3 byte characters:
TEST_CHECK(guf_utf8_encode(&utf8, 0x7121)); // "無" (Nothingness; CJK Unified Ideograph-7121)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(utf8.bytes[0] == '\xE7' && utf8.bytes[1] == '\x84' && utf8.bytes[2] == '\xA1');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x7121);
TEST_CHECK(guf_utf8_encode(&utf8, 0x201E)); // "„" (Double Low-9 Quotation Mark)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(utf8.bytes[0] == '\xE2' && utf8.bytes[1] == '\x80' && utf8.bytes[2] == '\x9E');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x201E);
TEST_CHECK(guf_utf8_encode(&utf8, 0x20AC)); // "€" (Euro Sign)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(utf8.bytes[0] == '\xE2' && utf8.bytes[1] == '\x82' && utf8.bytes[2] == '\xAC');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x20AC);
TEST_CHECK(guf_utf8_encode(&utf8, 0xFC51)); // "ﱑ" (Arabic Ligature Heh with Jeem Isolated Form)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(utf8.bytes[0] == '\xEF' && utf8.bytes[1] == '\xB1' && utf8.bytes[2] == '\x91');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0xFC51);
TEST_CHECK(guf_utf8_encode(&utf8, 0x1AA3)); // "᪣" (Tai Tham Sign Keow)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(utf8.bytes[0] == '\xE1' && utf8.bytes[1] == '\xAA' && utf8.bytes[2] == '\xA3');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x1AA3);
TEST_CHECK(guf_utf8_encode(&utf8, GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT)); // "<22>" (Replacement Character)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(utf8.bytes[0] == '\xEF' && utf8.bytes[1] == '\xBF' && utf8.bytes[2] == '\xBD');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT);
// 4 byte characters:
TEST_CHECK(guf_utf8_encode(&utf8, 0x1F308)); // "🌈" (Rainbow)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4);
TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\x8C' && utf8.bytes[3] == '\x88');
TEST_CHECK(utf8.bytes[4] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F308);
TEST_CHECK(guf_utf8_encode(&utf8, 0x130B8)); // "𓂸" (Egyptian Hieroglyph D052)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4);
TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x93' && utf8.bytes[2] == '\x82' && utf8.bytes[3] == '\xB8');
TEST_CHECK(utf8.bytes[4] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x130B8);
TEST_CHECK(guf_utf8_encode(&utf8, 0x1F97A)); // "🥺" (Face with Pleading Eyes)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4);
TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\xA5' && utf8.bytes[3] == '\xBA');
TEST_CHECK(utf8.bytes[4] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F97A);
TEST_CHECK(guf_utf8_encode(&utf8, 0x1F980)); // "🦀" (Crab)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4);
TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\xA6' && utf8.bytes[3] == '\x80');
TEST_CHECK(utf8.bytes[4] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F980);
// Invalid characters:
utf8 = {.bytes = {'\xC0', '\x80', 0, 0, 0}};
TEST_CHECK(guf_utf8_decode(&utf8) < 0);
utf8 = {.bytes = {'\xC0', 0, 0, 0, 0}};
TEST_CHECK(guf_utf8_decode(&utf8) < 0);
utf8 = {.bytes = {'\x80', 0, 0, 0, 0}};
TEST_CHECK(guf_utf8_decode(&utf8) < 0);
// "The definition of UTF-8 prohibits encoding character numbers between U+D800 and U+DFFF" (surrogate pairs).
TEST_CHECK(!guf_utf8_encode(&utf8, 0xD800));
TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT);
TEST_CHECK(!guf_utf8_encode(&utf8, 0xDFFF));
TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT);
TEST_CHECK(!guf_utf8_encode(&utf8, 0xDA00));
TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT);
char buf[] = {'\x2F', '\xC0', '\xAE', '\x2E', '\x2F'};
guf_str_view input_str = {.str = buf, .len = GUF_ARR_SIZE(buf)};
guf_utf8_char ch = {};
int valid_chars = 0, invalid_chars = 0;
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
if (stat == GUF_UTF8_READ_VALID) {
++valid_chars;
} else {
++invalid_chars;
}
}
TEST_CHECK(invalid_chars == 2 && valid_chars == 3);
char buf2[] = {'\xE0', '\x80', 'a', 'b', 'c'}; // 1 invalid 3-byte-character, 2 valid 1-byte-characters
input_str = {.str = buf2, .len = GUF_ARR_SIZE(buf2)};
ch = {};
valid_chars = invalid_chars = 0;
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
if (stat == GUF_UTF8_READ_VALID) {
// printf("%s", ch.bytes);
++valid_chars;
} else {
// printf("%s", GUF_UTF8_REPLACEMENT_CHAR.bytes);
++invalid_chars;
}
}
TEST_CHECK(invalid_chars == 1 && valid_chars == 2);
}

View File

@ -1,397 +1,26 @@
#pragma once
#include <vector>
#include "test.hpp"
extern "C"
{
#include "guf_alloc_libc.h"
#include "guf_str.h"
#include "impls/dict_impl.h"
#include "impls/dbuf_impl.h"
}
struct UTF8Test : public Test
{
UTF8Test(const std::string& name) : Test(name) {};
void run() override;
private:
dbuf_char text_buf {};
std::vector<char> text_vec;
bool load_text(const char *fname)
{
FILE *in_file {nullptr};
if (!in_file) {
in_file = fopen(fname, "r");
}
bool load_text(const char *fname);
void free_text();
if (!in_file) {
return false;
}
dbuf_char_init(&text_buf, 128, &guf_allocator_libc);
int c = EOF;
while ((c = fgetc(in_file)) != EOF) {
dbuf_char_push_val(&text_buf, (char)c);
text_vec.push_back((char)c);
}
fclose(in_file);
return TEST_CHECK(std::ssize(text_vec) == text_buf.size);
}
void free_text()
{
dbuf_char_free(&text_buf, NULL);
text_vec.clear();
}
void read_utf8_chars(const char *fname, ptrdiff_t *n_valid, ptrdiff_t *n_invalid)
{
GUF_ASSERT_RELEASE(load_text(fname));
ptrdiff_t valid_chars = 0, invalid_chars = 0, bytes = 0;
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
guf_utf8_char ch = {};
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
if (stat == GUF_UTF8_READ_VALID) {
++valid_chars;
// printf("%s", ch.bytes);
} else {
++invalid_chars;
// printf("::INVALID_UTF8_CHAR::");
}
bytes += guf_utf8_char_num_bytes(&ch);
}
TEST_CHECK(input_str.len == 0 && input_str.str == NULL);
TEST_CHECK(bytes == text_buf.size);
// printf("\nread %td bytes\n", bytes);
// printf("read %td valid and %td invalid utf-8 characters\n", valid_chars, invalid_chars);
free_text();
if (n_valid)
*n_valid = valid_chars;
if (n_invalid)
*n_invalid = invalid_chars;
}
int count_words(const char *fname, const dbuf_str_view *delims)
{
GUF_ASSERT_RELEASE(load_text(fname));
int num_words = 0;
guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims->data, delims->size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
while (guf_str_tok_next(&tok_state, false)) {
TEST_CHECK(tok_state.cur_tok.len > 0);
++num_words;
}
free_text();
return num_words;
}
int count_words_with_delims(const char *fname, const dbuf_str_view *delims)
{
GUF_ASSERT_RELEASE(load_text(fname));
int num_words = 0, num_delims = 0;
guf_str_tok_state tok_state = guf_str_tok_state_new(guf_str_view{.str = text_buf.data, .len = text_buf.size}, delims->data, delims->size, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
while (guf_str_tok_next(&tok_state, true)) {
if (tok_state.cur_tok.len) {
++num_words;
// printf("'%.*s'\n", (int)tok_state.cur_tok.len, tok_state.cur_tok.str);
}
if (tok_state.cur_delim.len) {
++num_delims;
// if (tok_state.cur_delim.str[0] == '\n')
// printf("'\\n'\n");
// else
// printf("'%.*s'\n", (int)tok_state.cur_delim.len, tok_state.cur_delim.str);
}
}
free_text();
return num_words + num_delims;
}
void encode_decode_file(const char *fname)
{
GUF_ASSERT_RELEASE(load_text(fname));
dbuf_i32 cp_buf = dbuf_i32_new(&guf_allocator_libc);
ptrdiff_t valid_chars = 0, invalid_chars = 0;
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
guf_utf8_char ch = {};
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
if (stat == GUF_UTF8_READ_VALID) {
++valid_chars;
const int32_t codepoint = guf_utf8_decode(&ch);
TEST_CHECK(codepoint >= 0);
dbuf_i32_push_val(&cp_buf, codepoint);
} else {
++invalid_chars;
const int32_t codepoint = guf_utf8_decode(&ch);
TEST_CHECK(codepoint < 0);
dbuf_i32_push_val(&cp_buf, -1);
}
}
TEST_CHECK(cp_buf.size == valid_chars + invalid_chars);
guf_str_view in_str = {.str = text_buf.data, .len = text_buf.size};
GUF_CNT_FOREACH(&cp_buf, dbuf_i32, it) {
GUF_ASSERT_RELEASE(it.ptr);
const int32_t codepoint = *it.ptr;
guf_utf8_char utf8_ch = {};
const guf_utf8_stat stat = guf_utf8_char_next(&utf8_ch, &in_str);
if (codepoint >= 0) {
TEST_CHECK(stat == GUF_UTF8_READ_VALID);
guf_utf8_char encoded_ch = {};
TEST_CHECK(guf_utf8_encode(&encoded_ch, codepoint));
TEST_CHECK(guf_utf8_equal(&encoded_ch, &utf8_ch));
}
}
guf_utf8_char utf8_ch = {};
const guf_utf8_stat stat = guf_utf8_char_next(&utf8_ch, &in_str);
TEST_CHECK(stat == GUF_UTF8_READ_DONE);
dbuf_i32_free(&cp_buf, NULL);
free_text();
}
void encode_decode()
{
guf_utf8_char utf8 = {0};
// 1 byte characters.
for (uint8_t ascii = 0; ascii <= 0x7F; ++ascii) {
TEST_CHECK(guf_utf8_encode(&utf8, ascii));
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 1);
TEST_CHECK(utf8.bytes[0] == ascii);
TEST_CHECK(utf8.bytes[1] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == ascii);
}
// 2 byte characters:
TEST_CHECK(guf_utf8_encode(&utf8, 0x00E6)); // "æ" (Latin Small Letter Ae)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA6');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E6);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00E5)); // "å" (Latin Small Letter A with Ring Above)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA5');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E5);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00F8)); // "ø" (Latin Small Letter O with Stroke)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xB8');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00F8);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00E4)); // "ä" (Latin Small Letter A with Diaeresis)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xA4');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00E4);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00F6)); // "ö" (Latin Small Letter O with Diaeresis)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xB6');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00F6);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00D6)); // "Ö" (Latin Capital Letter O with Diaeresis)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\x96');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00D6);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00FC)); // "ü" (Latin Small Letter U with Diaeresis)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC3' && utf8.bytes[1] == '\xBC');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00FC);
TEST_CHECK(guf_utf8_encode(&utf8, 0x00B5)); // "µ" (Micro Sign)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xC2' && utf8.bytes[1] == '\xB5');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x00B5);
TEST_CHECK(guf_utf8_encode(&utf8, 0x030A)); // "◌̊" (Combining Ring Above)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 2);
TEST_CHECK(utf8.bytes[0] == '\xCC' && utf8.bytes[1] == '\x8A');
TEST_CHECK(utf8.bytes[2] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x030A);
// 3 byte characters:
TEST_CHECK(guf_utf8_encode(&utf8, 0x7121)); // "無" (Nothingness; CJK Unified Ideograph-7121)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(utf8.bytes[0] == '\xE7' && utf8.bytes[1] == '\x84' && utf8.bytes[2] == '\xA1');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x7121);
TEST_CHECK(guf_utf8_encode(&utf8, 0x201E)); // "„" (Double Low-9 Quotation Mark)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(utf8.bytes[0] == '\xE2' && utf8.bytes[1] == '\x80' && utf8.bytes[2] == '\x9E');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x201E);
TEST_CHECK(guf_utf8_encode(&utf8, 0x20AC)); // "€" (Euro Sign)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(utf8.bytes[0] == '\xE2' && utf8.bytes[1] == '\x82' && utf8.bytes[2] == '\xAC');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x20AC);
TEST_CHECK(guf_utf8_encode(&utf8, 0xFC51)); // "ﱑ" (Arabic Ligature Heh with Jeem Isolated Form)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(utf8.bytes[0] == '\xEF' && utf8.bytes[1] == '\xB1' && utf8.bytes[2] == '\x91');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0xFC51);
TEST_CHECK(guf_utf8_encode(&utf8, 0x1AA3)); // "᪣" (Tai Tham Sign Keow)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(!guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(utf8.bytes[0] == '\xE1' && utf8.bytes[1] == '\xAA' && utf8.bytes[2] == '\xA3');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x1AA3);
TEST_CHECK(guf_utf8_encode(&utf8, GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT)); // "<22>" (Replacement Character)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 3);
TEST_CHECK(utf8.bytes[0] == '\xEF' && utf8.bytes[1] == '\xBF' && utf8.bytes[2] == '\xBD');
TEST_CHECK(utf8.bytes[3] == '\0');
TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT);
// 4 byte characters:
TEST_CHECK(guf_utf8_encode(&utf8, 0x1F308)); // "🌈" (Rainbow)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4);
TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\x8C' && utf8.bytes[3] == '\x88');
TEST_CHECK(utf8.bytes[4] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F308);
TEST_CHECK(guf_utf8_encode(&utf8, 0x130B8)); // "𓂸" (Egyptian Hieroglyph D052)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4);
TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x93' && utf8.bytes[2] == '\x82' && utf8.bytes[3] == '\xB8');
TEST_CHECK(utf8.bytes[4] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x130B8);
TEST_CHECK(guf_utf8_encode(&utf8, 0x1F97A)); // "🥺" (Face with Pleading Eyes)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4);
TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\xA5' && utf8.bytes[3] == '\xBA');
TEST_CHECK(utf8.bytes[4] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F97A);
TEST_CHECK(guf_utf8_encode(&utf8, 0x1F980)); // "🦀" (Crab)
TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4);
TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\xA6' && utf8.bytes[3] == '\x80');
TEST_CHECK(utf8.bytes[4] == '\0');
TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F980);
// Invalid characters:
utf8 = {.bytes = {'\xC0', '\x80', 0, 0, 0}};
TEST_CHECK(guf_utf8_decode(&utf8) < 0);
utf8 = {.bytes = {'\xC0', 0, 0, 0, 0}};
TEST_CHECK(guf_utf8_decode(&utf8) < 0);
utf8 = {.bytes = {'\x80', 0, 0, 0, 0}};
TEST_CHECK(guf_utf8_decode(&utf8) < 0);
// "The definition of UTF-8 prohibits encoding character numbers between U+D800 and U+DFFF" (surrogate pairs).
TEST_CHECK(!guf_utf8_encode(&utf8, 0xD800));
TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT);
TEST_CHECK(!guf_utf8_encode(&utf8, 0xDFFF));
TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT);
TEST_CHECK(!guf_utf8_encode(&utf8, 0xDA00));
TEST_CHECK(guf_utf8_equal(&utf8, &GUF_UTF8_REPLACEMENT_CHAR));
TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT);
char buf[] = {'\x2F', '\xC0', '\xAE', '\x2E', '\x2F'};
guf_str_view input_str = {.str = buf, .len = GUF_ARR_SIZE(buf)};
guf_utf8_char ch = {};
int valid_chars = 0, invalid_chars = 0;
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
if (stat == GUF_UTF8_READ_VALID) {
++valid_chars;
} else {
++invalid_chars;
}
}
TEST_CHECK(invalid_chars == 2 && valid_chars == 3);
char buf2[] = {'\xE0', '\x80', 'a', 'b', 'c'}; // 1 invalid 3-byte-character, 2 valid 1-byte-characters
input_str = {.str = buf2, .len = GUF_ARR_SIZE(buf2)};
ch = {};
valid_chars = invalid_chars = 0;
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
if (stat == GUF_UTF8_READ_VALID) {
// printf("%s", ch.bytes);
++valid_chars;
} else {
// printf("%s", GUF_UTF8_REPLACEMENT_CHAR.bytes);
++invalid_chars;
}
}
TEST_CHECK(invalid_chars == 1 && valid_chars == 2);
}
public:
bool run()
{
if (done) {
return passed;
}
ptrdiff_t valid = 0, invalid = 0;
read_utf8_chars(TEST_DATA_DIR "/" "utf8-test.txt", &valid, &invalid);
TEST_CHECK(valid == 2635 && invalid == 0);
read_utf8_chars(TEST_DATA_DIR "/" "bartleby.txt", &valid, &invalid);
TEST_CHECK(valid > 16000 && invalid == 0);
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]};
dbuf_str_view_push_val(&delims, d);
}
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]};
dbuf_str_view_push_val(&delims, d);
}
int words = count_words(TEST_DATA_DIR "/" "utf8-test.txt", &delims);
TEST_CHECK(words == 422);
int words_with_delims = count_words_with_delims(TEST_DATA_DIR "/" "utf8-test.txt", &delims);
TEST_CHECK(words_with_delims == 950);
int words2 = count_words(TEST_DATA_DIR "/" "bartleby.txt", &delims);
TEST_CHECK(words2 > 2048);
dbuf_str_view_free(&delims, NULL);
encode_decode();
encode_decode_file(TEST_DATA_DIR "/" "utf8-test.txt");
encode_decode_file(TEST_DATA_DIR "/" "bartleby.txt");
done = true;
passed = (num_failed_checks == 0);
return passed;
}
void read_utf8_chars(const char *fname, ptrdiff_t *n_valid, ptrdiff_t *n_invalid);
int count_words(const char *fname, const dbuf_str_view *delims);
int count_words_with_delims(const char *fname, const dbuf_str_view *delims);
void encode_decode_file(const char *fname);
void encode_decode();
};