libguf/src/test/test_str.cpp
2025-05-13 17:36:08 +02:00

377 lines
15 KiB
C++

#include "test_str.hpp"
extern "C"
{
#include "guf_alloc_libc.h"
}
/*
StrTest:
*/
void StrTest::run()
{
if (done) {
return;
}
const std::vector<std::string> words = {
"",
"\0",
"Hello",
"Othell\0o",
"f\0\0",
"\0",
"0",
"a",
"ab",
"🌈 waow a rainboge!",
"orange cat(1) :3",
"xes yag",
"Hello, world! This is a pretty darn long string I'd say...",
"I want to eat crayons. I crave crayons because they are tasty, and everybody telling me crayons are not edible must be either lying or dumb. I like trains. 42 is a number. 3.14159265... is not a rational number, and it is called pi. I ate some pie (it was a crayon pie).",
std::string(32, 'a'),
std::string(64, 'b'),
std::string(1024, 'a'),
std::string(2048, 'a'),
std::string(4096, 'a'),
std::string(5001, 'a'),
std::string(7121, 'a'),
std::string(2000, 'a'),
std::string(GUF_STR_SSO_BUF_CAP, 'a'),
std::string(GUF_STR_SSO_BUF_CAP - 1, 'a'),
std::string(GUF_STR_SSO_BUF_CAP + 1, 'a'),
std::string(GUF_STR_SSO_BUF_CAP - 2, 'a'),
std::string(GUF_STR_SSO_BUF_CAP + 2, 'a'),
std::string(GUF_STR_SSO_BUF_CAP - 3, 'a'),
std::string(GUF_STR_SSO_BUF_CAP + 3, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 2, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 3, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 4, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 5, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 6, 'a'),
std::string(GUF_STR_SSO_BUF_CAP * 7, 'a'),
};
push_check_name("init_empy");
test_init_empty();
pop_check_name();
push_check_name("append_char");
for (const auto& word : words) {
test_init_free(word);
test_append_char(word);
test_append_char(word, true);
}
pop_check_name();
push_check_name("append_str");
for (size_t i = 0; i < words.size(); ++i) {
const auto& w1 = words.at(i);
append_str(w1, w1);
append_str(w1, w1);
for (size_t j = i + 1; j < words.size(); ++j) {
const auto& w2 = words.at(j);
append_str(w1, w2);
append_str(w2, w1);
}
}
pop_check_name();
push_check_name("test_popsplit");
std::vector<std::string_view> split = test_popsplit("1997-04-01", "-");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "1997" && split.at(1) == "04" && split.at(2) == "01");
}
split = test_popsplit("1997-04-01-", "-");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "1997" && split.at(1) == "04" && split.at(2) == "01");
}
split = test_popsplit("2025/05/08", "/");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08");
}
split = test_popsplit("2025/05/08/", "/");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08");
}
split = test_popsplit("2025/05/08//", "/");
if (TEST_CHECK(split.size() == 4)) {
TEST_CHECK(split.at(0) == "2025" && split.at(1) == "05" && split.at(2) == "08" && split.at(3) == "");
}
split = test_popsplit("/2025/05/08", "/");
if (TEST_CHECK(split.size() == 4)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == "2025" && split.at(2) == "05" && split.at(3) == "08");
}
split = test_popsplit("//2025/05/08", "/");
if (TEST_CHECK(split.size() == 5)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == "" && split.at(2) == "2025" && split.at(3) == "05" && split.at(4) == "08");
}
split = test_popsplit("I eat formidable crayons, oof, for real", "foo");
if (TEST_CHECK(split.size() == 1)) {
TEST_CHECK(split.at(0) == "I eat formidable crayons, oof, for real");
}
split = test_popsplit("Hej <<", "<<");
if (TEST_CHECK(split.size() == 1)) {
TEST_CHECK(split.at(0) == "Hej ");
}
split = test_popsplit("Hej << verden", "<<");
if (TEST_CHECK(split.size() == 2)) {
TEST_CHECK(split.at(0) == "Hej " && split.at(1) == " verden");
}
split = test_popsplit("<< Hej << verden", "<<");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == " Hej " && split.at(2) == " verden");
}
split = test_popsplit("<< Hej << verden <<< foo<>", "<<");
if (TEST_CHECK(split.size() == 4)) {
TEST_CHECK(split.at(0) == "" && split.at(1) == " Hej " && split.at(2) == " verden " && split.at(3) == "< foo<>");
}
split = test_popsplit("I eat tofu", "");
if (TEST_CHECK(split.size() == 1)) {
TEST_CHECK(split.at(0) == "I eat tofu");
}
split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOP", "FULL-STOP");
if (TEST_CHECK(split.size() == 2)) {
TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign ");
}
split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOPI like trains, FULL-STO", "FULL-STOP");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO");
}
split = test_popsplit("At 3 a.m. during FULL-moon FULL-STOP Next to the public-library's -STOP sign FULL-STOPI like trains, FULL-STO Poo", "FULL-STOP");
if (TEST_CHECK(split.size() == 3)) {
TEST_CHECK(split.at(0) == "At 3 a.m. during FULL-moon " && split.at(1) == " Next to the public-library's -STOP sign " && split.at(2) == "I like trains, FULL-STO Poo");
}
pop_check_name();
push_check_name("get_toks");
std::vector<std::string_view> tok_words = {"hello", "world", "cat", "vertex", "normal", "pizza", "running", "mouse", "playing", "adjacent"};
std::vector<std::string_view> delims = {",", " ", "\n", "\t", "\r"};
for (int is_trailing = 0; is_trailing < 2; ++is_trailing) {
for (ptrdiff_t num_words = 1; num_words < std::ssize(tok_words); ++num_words) {
std::string str = "";
for (ptrdiff_t j = 0; j < num_words; ++j) {
str += tok_words.at(j);
if (j < num_words - 1 || is_trailing) {
str += ", ";
}
}
std::vector<std::string_view> toks = get_toks(std::string_view{str}, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
if (TEST_CHECK(std::ssize(toks) == num_words)) {
for (ptrdiff_t i = 0; i < num_words; ++i) {
TEST_CHECK(toks.at(i) == tok_words.at(i));
}
}
}
}
std::string_view tok_str = "<stats>age: 28, occupation: NULL, crayons_eaten: 256 </stats>";
delims = {"<stats>", "</stats>", ":", ",", " ", "\t", "<stats", "<", ">", "</"};
auto tok_result = get_toks(tok_str, delims, true, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
TEST_CHECK(tok_result.size() == 19);
TEST_CHECK(tok_result.at(18) == "</stats>" && tok_result.at(0) == "<stats>" && tok_result.at(1) == "age" && tok_result.at(2) == ":" && tok_result.at(3) == " " && tok_result.at(4) == "28");
tok_result = get_toks(tok_str, delims, false, GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST);
TEST_CHECK(tok_result.size() == 6);
TEST_CHECK(tok_result.at(0) == "age" && tok_result.at(1) == "28" && tok_result.at(2) == "occupation" && tok_result.at(3) == "NULL" &&
tok_result.at(4) == "crayons_eaten" && tok_result.at(5) == "256");
pop_check_name();
}
void StrTest::test_init_free(std::string str)
{
guf_str s0;
guf_str_init(&s0, GUF_CSTR_TO_VIEW_CPP(str.c_str()), &guf_allocator_libc);
guf_str s1 = guf_str_new(GUF_CSTR_TO_VIEW_CPP(str.c_str()), &guf_allocator_libc);
guf_str s2;
guf_str_init_from_cstr(&s2, str.c_str(), &guf_allocator_libc);
TEST_CHECK(guf_str_equal(&s0, &s1));
TEST_CHECK(guf_str_equal(&s0, &s2));
TEST_CHECK(guf_str_equal(&s1, &s2));
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s0));
TEST_CHECK(str == guf_str_const_cstr(&s0));
TEST_CHECK(str == guf_str_cstr(&s0));
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s1));
TEST_CHECK(str == guf_str_const_cstr(&s1));
TEST_CHECK(str == guf_str_cstr(&s1));
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s2));
TEST_CHECK(str == guf_str_const_cstr(&s2));
TEST_CHECK(str == guf_str_cstr(&s2));
guf_str_free(&s0, NULL);
guf_str_free(&s1, NULL);
guf_str_free(&s2, NULL);
TEST_CHECK(guf_str_is_uninit(&s0));
TEST_CHECK(guf_str_is_uninit(&s1));
TEST_CHECK(guf_str_is_uninit(&s2));
}
void StrTest::test_init_empty()
{
std::string str = "";
guf_str s = GUF_STR_UNINITIALISED_CPP;
guf_str_init_empty(&s, &guf_allocator_libc);
TEST_CHECK(guf_str_len(&s) == 0);
TEST_CHECK(str == guf_str_const_cstr(&s));
guf_str_append_char(&s, 'a', 1024);
str.append(1024, 'a');
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
guf_str_append_char(&s, 'b', 24);
str.append(24, 'b');
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
guf_str_append_char(&s, 'c', 255);
str.append(255, 'c');
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
*guf_str_at(&s, 0) = '<';
str.at(0) = '<';
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
*guf_str_at(&s, guf_str_len(&s) - 1) = '>';
str.at(str.size() - 1) = '>';
TEST_CHECK(guf_str_len(&s) == (ptrdiff_t)str.size());
TEST_CHECK(guf_str_const_cstr(&s) == str);
guf_err err = GUF_ERR_NONE;
TEST_CHECK(NULL == guf_str_try_at(&s, guf_str_len(&s), &err));
TEST_CHECK(err != GUF_ERR_NONE && err == GUF_ERR_IDX_RANGE);
err = GUF_ERR_NONE;
TEST_CHECK(NULL == guf_str_try_at(&s, -1, &err));
TEST_CHECK(err != GUF_ERR_NONE && err == GUF_ERR_IDX_RANGE);
guf_str_free(&s, NULL);
TEST_CHECK(guf_str_is_uninit(&s));
}
void StrTest::test_append_char(std::string str, bool include_null)
{
guf_str s0 = guf_str_new(guf_str_view{.str = str.c_str(), .len = (ptrdiff_t)str.size()}, &guf_allocator_libc);
TEST_CHECK((ptrdiff_t)str.size() == guf_str_len(&s0));
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
for (int i = include_null ? 0 : 1; i < 128; ++i) {
char ch = (char)i;
guf_str_append_one_char(&s0, ch);
str.append(1, ch);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size());
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
}
for (int i = include_null ? 0 : 1; i < 128; ++i) {
char ch = (char)i;
guf_str_append_char(&s0, ch, i);
str.append(i, ch);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size());
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
guf_str_append_char(&s0, ch, i * 16);
str.append(i * 16, ch);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str.size());
TEST_CHECK((str == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
}
guf_str_free(&s0, NULL);
TEST_CHECK(guf_str_is_uninit(&s0));
}
void StrTest::append_str(const std::string& a, const std::string& b)
{
std::string str0 = a;
guf_str s0 = guf_str_new(guf_str_view{.str = str0.c_str(), .len = (ptrdiff_t)str0.size()}, &guf_allocator_libc);
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str0.size());
TEST_CHECK((str0 == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
TEST_CHECK((str0 == std::string_view{guf_str_cstr(&s0), (size_t)guf_str_len(&s0)}));
for (int i = 0; i <= 64; ++i) {
str0.append(b);
guf_str_append(&s0, guf_str_view{.str = b.c_str(), .len = (ptrdiff_t)b.size()});
TEST_CHECK(guf_str_len(&s0) == (ptrdiff_t)str0.size());
TEST_CHECK((str0 == std::string_view{guf_str_const_cstr(&s0), (size_t)guf_str_len(&s0)}));
TEST_CHECK((str0 == std::string_view{guf_str_cstr(&s0), (size_t)guf_str_len(&s0)}));
}
guf_str_free(&s0, NULL);
TEST_CHECK(guf_str_is_uninit(&s0));
}
std::vector<std::string_view> StrTest::test_popsplit(std::string_view str, std::string_view delim)
{
std::vector<std::string_view> result = {};
if (delim.size() > 0) { // NOTE: str.find with an empty delimiter returns 0, not std::string::npos
std::string_view src_cpp = str;
for (size_t idx = src_cpp.find(delim, 0); src_cpp.size() > 0; idx = src_cpp.find(delim, 0)) {
result.push_back(src_cpp.substr(0, idx));
if (idx == std::string::npos) {
break;
}
src_cpp = src_cpp.substr(idx + delim.size());
}
} else {
result.push_back(str);
}
const guf_str_view delim_sv = guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()};
guf_str_view src = guf_str_view{.len = (ptrdiff_t)str.size(), .str = str.data()};
size_t n = 0;
do {
const guf_str_view popped = guf_str_view_pop_split(&src, delim_sv);
TEST_CHECK(n < result.size());
TEST_CHECK(std::string_view(popped.str, (size_t)popped.len) == result.at(n));
const guf_str_view res = {.str = result.at(n).data(), .len = (ptrdiff_t)result.at(n).size()};
TEST_CHECK(guf_str_view_equal(&popped, &res));
TEST_CHECK(guf_str_view_equal_val_arg(popped, res));
// std::cout << "guf: " << std::string_view{popped.str, (size_t)popped.len} << "\n";
// std::cout << "cpp: " << std::string_view{res.str, (size_t)res.len} << "\n";
++n;
} while (src.len > 0);
TEST_CHECK(n == result.size());
return result;
}
std::vector<std::string_view> StrTest::get_toks(std::string_view sv_in, const std::vector<std::string_view>& delims_in, bool preserve_delims, guf_str_tok_delim_opt opt)
{
const guf_str_view sv = guf_str_view{.len = (ptrdiff_t)sv_in.size(), .str = sv_in.data()};
std::vector<guf_str_view> delims;
for (const auto delim : delims_in) {
delims.push_back(guf_str_view{.len = (ptrdiff_t)delim.size(), .str = delim.data()});
}
guf_str_tok_state tok_state = guf_str_tok_state_new(sv, delims.data(), std::ssize(delims), opt);
std::vector<std::string_view> toks_out;
while (guf_str_tok_next(&tok_state, preserve_delims)) {
if (tok_state.cur_tok.len > 0) {
toks_out.push_back( std::string_view{tok_state.cur_tok.str, (size_t)tok_state.cur_tok.len});
}
if (preserve_delims && tok_state.cur_delim.len > 0) {
toks_out.push_back( std::string_view{tok_state.cur_delim.str, (size_t)tok_state.cur_delim.len});
}
}
TEST_CHECK(tok_state.done);
const ptrdiff_t num_toks = preserve_delims ? tok_state.num_delims_read + tok_state.num_toks_read : tok_state.num_toks_read;
TEST_CHECK(num_toks == std::ssize(toks_out));
return toks_out;
}