diff --git a/src/guf_dict.h b/src/guf_dict.h index 303ad36..96e58d4 100755 --- a/src/guf_dict.h +++ b/src/guf_dict.h @@ -659,13 +659,13 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D GUF_DICT_KEY_T *key_cpy_res = NULL; if (key_opt == GUF_CPY_DEEP) { #ifdef GUF_DICT_KEY_T_COPY - key_cpy_res = GUF_DICT_KEY_T_COPY(&key_cpy, key); + key_cpy_res = GUF_DICT_KEY_T_COPY(&key_cpy, key, NULL); #else GUF_ASSERT_RELEASE(false); #endif } else if (key_opt == GUF_CPY_MOVE) { #ifdef GUF_DICT_KEY_T_MOVE - key_cpy_res = GUF_DICT_KEY_T_MOVE(&key_cpy, key); + key_cpy_res = GUF_DICT_KEY_T_MOVE(&key_cpy, key, NULL); #else GUF_ASSERT_RELEASE(false); #endif diff --git a/src/guf_str.h b/src/guf_str.h index 1f8c118..8e1d9a5 100644 --- a/src/guf_str.h +++ b/src/guf_str.h @@ -68,16 +68,16 @@ typedef struct guf_str_tok_state { #define GUF_CSTR_TO_VIEW(CSTR) ((guf_str_view){.str = (CSTR), .len = (ptrdiff_t)strlen((CSTR))}) #define GUF_CSTR_LIT_TO_VIEW(CSTR) ((guf_str_view){.str = (CSTR), .len = (ptrdiff_t)sizeof((CSTR)) - 1}) #define GUF_STR_TO_VIEW(GUF_STR_PTR) ((guf_str_view){.str = guf_str_const_cstr((GUF_STR_PTR)), .len = (ptrdiff_t)guf_str_len((GUF_STR_PTR))}) -#define GUF_CSTR_TO_READONLY_STR(CSTR) ((guf_str){.allocator = NULL, .data.lng.c_str = (CSTR), .data.lng.size = strlen(CSTR) + 1, .data.lng.capacity = 0}) +// TODO: #define GUF_CSTR_TO_READONLY_STR(CSTR) ((guf_str){.allocator = NULL, .data.lng.c_str = (CSTR), .data.lng.size = strlen(CSTR) + 1, .data.lng.capacity = 0}) -#define GUF_STR_UNINITIALISED (guf_str){.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'} +#define GUF_STR_UNINITIALISED (guf_str){.allocator = NULL, .data.lng.size = 0, .data.lng.capacity = 0, .data.lng.c_str = NULL} #ifdef __cplusplus // Standard C++ does not have compound literals like C99... #define GUF_CSTR_TO_VIEW_CPP(CSTR) guf_str_view {.str = (CSTR), .len = (ptrdiff_t)strlen(CSTR)} #define GUF_CSTR_LIT_TO_VIEW_CPP(CSTR) guf_str_view {.str = (CSTR), .len = (ptrdiff_t)sizeof(CSTR) - 1} - #define GUF_STR_UNINITIALISED_CPP guf_str{.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'} + #define GUF_STR_UNINITIALISED_CPP guf_str{.allocator = NULL, .data.lng.size = 0, .data.lng.capacity = 0, .data.lng.c_str = NULL} #endif // 1.) guf_str_view: @@ -137,6 +137,8 @@ GUF_STR_KWRDS guf_str *guf_str_init(guf_str *str, guf_str_view str_view, guf_all GUF_STR_KWRDS guf_str *guf_str_init_empty(guf_str *str, guf_allocator *alloc); GUF_STR_KWRDS guf_str *guf_str_try_init_from_cstr(guf_str *str, const char* c_str, guf_allocator *alloc, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_init_from_cstr(guf_str *str, const char* c_str, guf_allocator *alloc); +GUF_STR_KWRDS guf_str *guf_str_init_readonly(guf_str *str, guf_str_view sv); +GUF_STR_KWRDS guf_str guf_str_new_readonly(guf_str_view sv); // Return an initialised guf_str (or GUF_STR_UNINITIALISED on error) GUF_STR_KWRDS guf_str guf_str_try_new(guf_str_view str_view, guf_allocator *alloc, guf_err *err); @@ -597,7 +599,7 @@ GUF_STR_KWRDS guf_str guf_str_new_uninitialised(void) GUF_STR_KWRDS bool guf_str_is_uninit(const guf_str *str) { GUF_ASSERT(str); - return !str->allocator && !str->data.shrt.size && str->data.shrt.c_str[0] == '\0'; + return !str->allocator && !str->data.lng.size && !str->data.lng.capacity && str->data.lng.c_str == NULL; } @@ -719,6 +721,29 @@ GUF_STR_KWRDS guf_str *guf_str_init_from_cstr(guf_str *str, const char* c_str, g return guf_str_try_init_from_cstr(str, c_str, alloc, NULL); } +GUF_STR_KWRDS guf_str *guf_str_init_readonly(guf_str *str, guf_str_view sv) +{ + // #define GUF_STR_UNINITIALISED (guf_str){.allocator = NULL, .data.lng.size = 0, .data.lng.capacity = 0, .data.lng.c_str = NULL} + + GUF_ASSERT(str); + GUF_ASSERT(guf_str_view_is_valid(sv)); + str->allocator = NULL; + str->data.lng.c_str = (char*)sv.str; // TODO: is this actually legal as long as we don't modify the pointer's contents? + str->data.lng.size = sv.len + 1; + str->data.lng.capacity = 0; + GUF_ASSERT(guf_str_is_readonly(str)); + GUF_ASSERT(guf_str_is_valid(str)); + return str; +} + +GUF_STR_KWRDS guf_str guf_str_new_readonly(guf_str_view sv) +{ + guf_str s; + guf_str_init_readonly(&s, sv); + return s; +} + + GUF_STR_KWRDS guf_str_view guf_str_to_view(const guf_str *str) { GUF_ASSERT(guf_str_is_valid(str)); diff --git a/src/test/impls/dict_impl.c b/src/test/impls/dict_impl.c index d45360b..3e773df 100644 --- a/src/test/impls/dict_impl.c +++ b/src/test/impls/dict_impl.c @@ -21,6 +21,22 @@ // #define GUF_DICT_32_BIT_HASH #include "guf_dict.h" +#define GUF_DICT_KEY_T guf_str +#define GUF_DICT_KEY_HASH guf_str_hash +#define GUF_DICT_KEY_T_EQ guf_str_equal +#define GUF_DICT_KEY_T_CMP guf_str_cmp +#define GUF_DICT_KEY_T_COPY guf_str_copy +#define GUF_DICT_KEY_T_MOVE guf_str_move +#define GUF_DICT_KEY_T_FREE guf_str_free +#define GUF_DICT_VAL_T int32_t +#define GUF_DICT_VAL_T_IS_INTEGRAL_TYPE +#define GUF_DICT_NAME dict_str_i32 +#define GUF_DICT_IMPL +// #define GUF_DICT_64_BIT_IDX +// #define GUF_DICT_PROBE_LINEAR +// #define GUF_DICT_32_BIT_HASH +#include "guf_dict.h" + #define GUF_DICT_KEY_T int32_t #define GUF_DICT_KEY_HASH int32_hash #define GUF_DICT_KEY_T_EQ int32_eq diff --git a/src/test/impls/dict_impl.h b/src/test/impls/dict_impl.h index d16fd37..ba4cbad 100644 --- a/src/test/impls/dict_impl.h +++ b/src/test/impls/dict_impl.h @@ -26,6 +26,21 @@ // #define GUF_DICT_32_BIT_HASH #include "guf_dict.h" +#define GUF_DICT_KEY_T guf_str +#define GUF_DICT_KEY_HASH guf_str_hash +#define GUF_DICT_KEY_T_EQ guf_str_equal +#define GUF_DICT_KEY_T_CMP guf_str_cmp +#define GUF_DICT_KEY_T_COPY guf_str_copy +#define GUF_DICT_KEY_T_MOVE guf_str_move +#define GUF_DICT_KEY_T_FREE guf_str_free +#define GUF_DICT_VAL_T int32_t +#define GUF_DICT_VAL_T_IS_INTEGRAL_TYPE +#define GUF_DICT_NAME dict_str_i32 +// #define GUF_DICT_64_BIT_IDX +// #define GUF_DICT_PROBE_LINEAR +// #define GUF_DICT_32_BIT_HASH +#include "guf_dict.h" + static inline guf_hash_size_t int32_hash(const int32_t *a) { return guf_hash(a, sizeof(int32_t), GUF_HASH_INIT); // TODO: byte order... diff --git a/src/test/test_dbuf.cpp b/src/test/test_dbuf.cpp index 2dbc896..d3b8646 100644 --- a/src/test/test_dbuf.cpp +++ b/src/test/test_dbuf.cpp @@ -550,7 +550,20 @@ void DbufCstringTest::test_find(int n) void DbufStrTest::run() { - test_push_insert_erase(16); + test_push_insert_erase(16); + test_push_insert_erase(16, 1); + test_push_insert_erase(16, 15); + test_push_insert_erase(16, 16); + test_push_insert_erase(16, 97); + test_push_insert_erase(16, 256); + + test_push_insert_erase(500); + test_push_insert_erase(500, 1); + test_push_insert_erase(500, 499); + test_push_insert_erase(500, 500); + test_push_insert_erase(500, 97); + test_push_insert_erase(500, 256); + } diff --git a/src/test/test_dict.cpp b/src/test/test_dict.cpp index 28898e0..2ee7127 100644 --- a/src/test/test_dict.cpp +++ b/src/test/test_dict.cpp @@ -49,10 +49,14 @@ void DictSvToIntTest::insert_lookup(std::optional inital_dict_cap) { std::unordered_map word_cnt_map {}; dict_sv_i32 word_cnt_dict {}; + dict_str_i32 word_cnt_dict_str {}; + if (inital_dict_cap) { dict_sv_i32_init_with_capacity(&word_cnt_dict, &allocator, inital_dict_cap.value()); + dict_str_i32_init_with_capacity(&word_cnt_dict_str, &allocator, inital_dict_cap.value()); } else { dict_sv_i32_init(&word_cnt_dict, &allocator); + dict_str_i32_init(&word_cnt_dict_str, &allocator); } dbuf_str_view delims = dbuf_str_view_new(&allocator); @@ -77,26 +81,43 @@ void DictSvToIntTest::insert_lookup(std::optional inital_dict_cap) if (!dict_sv_i32_contains(&word_cnt_dict, &tok)) { dict_sv_i32_insert_val_arg(&word_cnt_dict, tok, 1, GUF_CPY_VALUE, GUF_CPY_VALUE); word_cnt_map.insert({sv, 1}); + if (TEST_CHECK(!dict_str_i32_contains_val_arg(&word_cnt_dict_str, guf_str_new_readonly(tok)))) { + dict_str_i32_insert_val_arg(&word_cnt_dict_str, guf_str_new(tok, &allocator), 1, GUF_CPY_MOVE, GUF_CPY_VALUE); + } } else { int32_t *cnt = dict_sv_i32_at_val_arg(&word_cnt_dict, tok); if (TEST_CHECK(cnt)) { *cnt += 1; } + int32_t *cnt_2 = dict_str_i32_at_val_arg(&word_cnt_dict_str, guf_str_new_readonly(tok)); + if (TEST_CHECK(cnt_2)) { + *cnt_2 += 1; + } + // else { + // std::cout << "tok: " << std::string_view{tok.str, (size_t)tok.len} << "\n"; + // } word_cnt_map.at(sv) += 1; } // printf("tok_len: %td ", tok.len); // printf("'%.*s'\n", (int)tok.len, tok.str); TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); + TEST_CHECK(dict_str_i32_debug_valid_size(&word_cnt_dict_str)); + } dbuf_str_view_free(&delims, NULL); TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map)); TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); + TEST_CHECK(dict_str_i32_size(&word_cnt_dict_str) == std::ssize(word_cnt_map)); + TEST_CHECK(dict_str_i32_debug_valid_size(&word_cnt_dict_str)); + for (const auto & [word, cnt] : word_cnt_map ) { guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()}; int32_t *res = dict_sv_i32_at(&word_cnt_dict, &sv); + int32_t *res2 = dict_str_i32_at_val_arg(&word_cnt_dict_str, guf_str_new_readonly(sv)); TEST_CHECK(res && *res == cnt); + TEST_CHECK(res2 && *res2 == cnt); } ptrdiff_t i = 0; @@ -104,7 +125,6 @@ void DictSvToIntTest::insert_lookup(std::optional inital_dict_cap) const dict_sv_i32_kv *kv = kv_it.ptr; if (TEST_CHECK(kv)) { const int32_t cnt = kv->val; - // printf("%.*s: %d\n", (int)kv->key.len, kv->key.str, cnt); const std::string_view sv(kv->key.str, kv->key.len); if (TEST_CHECK(word_cnt_map.contains(sv))) { TEST_CHECK(word_cnt_map.at(sv) == cnt); @@ -116,6 +136,24 @@ void DictSvToIntTest::insert_lookup(std::optional inital_dict_cap) TEST_CHECK(i == std::ssize(word_cnt_map)); TEST_CHECK(dict_sv_i32_debug_valid_size(&word_cnt_dict)); + i = 0; + GUF_CNT_FOREACH(&word_cnt_dict_str, dict_str_i32, kv_it) { + const dict_str_i32_kv *kv = kv_it.ptr; + if (TEST_CHECK(kv)) { + const int32_t cnt = kv->val; + const std::string_view sv(guf_str_const_cstr(&kv->key), guf_str_len(&kv->key)); + // std::cout << sv << "\n"; + + if (TEST_CHECK(word_cnt_map.contains(sv))) { + TEST_CHECK(word_cnt_map.at(sv) == cnt); + } + } + ++i; + } + TEST_CHECK(i == dict_str_i32_size(&word_cnt_dict_str)); + TEST_CHECK(i == std::ssize(word_cnt_map)); + TEST_CHECK(dict_str_i32_debug_valid_size(&word_cnt_dict_str)); + // std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << " elem cap: " << word_cnt_dict.kv_elems.capacity << "\n"; // std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n"; // std::cout << "mem usage: " << dict_sv_i32_memory_usage(&word_cnt_dict) << "\n"; @@ -334,7 +372,29 @@ void DictSvToIntTest::insert_lookup(std::optional inital_dict_cap) TEST_CHECK(word_cnt_dict.kv_elems.size == 0); TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == 0); + + std::string str; + for (size_t c = 0; c < GUF_STR_SSO_BUF_CAP * 4; ++c) { + str += c % 2 ? "AAA" : "aaa"; + } + + guf_str str_cpy = guf_str_new(guf_str_view{.len = (ptrdiff_t)str.size(), .str = str.data()}, &allocator); + dict_str_i32_insert_val_arg(&word_cnt_dict_str, str_cpy, 42, GUF_CPY_DEEP, GUF_CPY_VALUE); + int32_t *foo = dict_str_i32_at_val_arg(&word_cnt_dict_str, guf_str_new_readonly(guf_str_view_from_str(&str_cpy))); + if (TEST_CHECK(foo)) { + TEST_CHECK(*foo == 42); + } + guf_str_append(&str_cpy, GUF_CSTR_LIT_TO_VIEW_CPP("Foobar")); + int32_t *foo2 = dict_str_i32_at_val_arg(&word_cnt_dict_str, guf_str_new_readonly(guf_str_view{.len = (ptrdiff_t)str.size(), .str = str.data()})); + if (TEST_CHECK(foo2)) { + TEST_CHECK(*foo2 == 42); + } + + guf_str_free(&str_cpy, NULL); + dict_sv_i32_free(&word_cnt_dict, NULL); + dict_str_i32_free(&word_cnt_dict_str, NULL); + bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size; TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones); } diff --git a/todo.txt b/todo.txt index 3f9a0e1..f05acef 100644 --- a/todo.txt +++ b/todo.txt @@ -1,3 +1,4 @@ +- fix readonly str - make guf_utf8_char 4 bytes (non-null terminated)