From 364dd603cf6819e7ae77594361fde85dd2dbede5 Mon Sep 17 00:00:00 2001 From: jun <83899451+zeichensystem@users.noreply.github.com> Date: Thu, 6 Mar 2025 13:51:32 +0100 Subject: [PATCH] Fix guf_dict size types --- src/guf_common.h | 7 ++++- src/guf_dict.h | 62 ++++++++++++++++++++++++++-------------- src/guf_hash.h | 9 +++--- src/guf_init.h | 15 ++-------- src/test/guf_dict_impl.h | 1 + src/test/test_dbuf.hpp | 4 +-- src/test/test_dict.hpp | 4 +-- src/test/test_utf8.hpp | 8 +++--- 8 files changed, 61 insertions(+), 49 deletions(-) diff --git a/src/guf_common.h b/src/guf_common.h index 93435b3..b8ddfae 100644 --- a/src/guf_common.h +++ b/src/guf_common.h @@ -15,7 +15,12 @@ #elif SIZE_MAX == UINT8_MAX #define GUF_PLATFORM_BITS 8 #else - #error "Could not detect GUF_PLATFORM_BITS" + #define GUF_PLATFORM_BITS 64 + #error "libguf: Could not detect GUF_PLATFORM_BITS" +#endif + +#if GUF_PLATFORM_BITS <= 32 + #define GUF_HASH_32_BIT #endif /* diff --git a/src/guf_dict.h b/src/guf_dict.h index 1fd6a93..ea2e3cd 100755 --- a/src/guf_dict.h +++ b/src/guf_dict.h @@ -43,22 +43,32 @@ #endif #if defined(GUF_DICT_32_BIT) - #define GUF_DICT_SIZE_T uint32_t + #define GUF_DICT_KV_IDX_T uint32_t #define GUF_DICT_KV_META_T guf_dict_kv_meta_32 #define GUF_DICT_KV_IDX_NULL UINT32_MAX #elif defined(GUF_DICT_64_BIT) - #define GUF_DICT_SIZE_T uint64_t + #define GUF_DICT_KV_IDX_T uint64_t #define GUF_DICT_KV_META_T guf_dict_kv_meta_64 #define GUF_DICT_KV_IDX_NULL UINT64_MAX #else - #define GUF_DICT_SIZE_T guf_hash_size_t + #define GUF_DICT_KV_IDX_T guf_hash_size_t #define GUF_DICT_KV_META_T guf_dict_kv_meta #define GUF_DICT_KV_IDX_NULL GUF_HASH_MAX #endif #define GUF_DICT_KV_IDX_TOMBSTONE (GUF_DICT_KV_IDX_NULL - 1) -// TODO -#define GUF_DICT_MAX_SIZE GUF_MIN(GUF_DICT_KV_IDX_TOMBSTONE - 1, PTRDIFF_MAX) + +#if PTRDIFF_MAX <= SIZE_T_MAX + #define GUF_DICT_MAX_PTR PTRDIFF_MAX +#else + #define GUF_DICT_MAX_PTR SIZE_T_MAX +#endif + +#if (GUF_DICT_KV_IDX_TOMBSTONE - 1) <= GUF_DICT_MAX_PTR + #define GUF_DICT_KV_IDX_T_MAX (GUF_DICT_KV_IDX_TOMBSTONE - 1) +#else + #define GUF_DICT_KV_IDX_T_MAX GUF_DICT_MAX_PTR +#endif // #ifndef GUF_DICT_KEY_LOOKUP_T // #define GUF_DICT_KEY_LOOKUP_T GUF_DICT_KEY_T @@ -280,20 +290,20 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC { if (ht->kv_indices_cap <= 0) { *key_exists = false; - return SIZE_MAX; + return SIZE_T_MAX; } - const GUF_DICT_SIZE_T hash = GUF_DICT_KEY_HASH(key); + const GUF_DICT_KV_IDX_T hash = GUF_DICT_KEY_HASH(key); #define GUF_MOD_CAP(a) ((size_t)(a) & (size_t)(ht->kv_indices_cap - 1)) // a % ht->kv_indices_cap (kv_indices_cap must be a power of two > 0) size_t idx = GUF_MOD_CAP(hash); const size_t start_idx = idx; - size_t first_tombstone_idx = SIZE_MAX; + size_t first_tombstone_idx = SIZE_T_MAX; size_t probe_len = 0; // size_t seen_occupied = 0; // This allows us to bail out early once we visited every non-null/non-tombstone kv_idx. do { if (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_NULL) { // 1.) Empty. - if (first_tombstone_idx != SIZE_MAX) { + if (first_tombstone_idx != SIZE_T_MAX) { idx = first_tombstone_idx; } ht->max_probelen = GUF_MAX((ptrdiff_t)probe_len, ht->max_probelen); @@ -301,7 +311,7 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC *key_exists = false; return idx; } else if (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_TOMBSTONE) { // 2.) Tombstone. - if (first_tombstone_idx == SIZE_MAX) { + if (first_tombstone_idx == SIZE_T_MAX) { first_tombstone_idx = idx; } goto probe; @@ -321,12 +331,12 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC } while (idx != start_idx && probe_len < (size_t)ht->kv_indices_cap); *key_exists = false; - if (first_tombstone_idx != SIZE_MAX) { // Edge case: No empty slots, but found tombstone. + if (first_tombstone_idx != SIZE_T_MAX) { // Edge case: No empty slots, but found tombstone. ht->max_probelen = GUF_MAX((ptrdiff_t)probe_len, ht->max_probelen); GUF_ASSERT(ht->kv_indices[first_tombstone_idx].kv_idx == GUF_DICT_KV_IDX_NULL); return first_tombstone_idx; } else { // Failed to find an idx. - return SIZE_MAX; + return SIZE_T_MAX; } #undef GUF_MOD_CAP } @@ -365,6 +375,7 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: New capacity would overflow)")); return; } + // TODO: Not sure if alloc and free is better here than realloc (since we do not copy ht->kv_indices anyway.) GUF_DICT_KV_META_T *new_kv_indices = allocator->alloc(new_size, allocator->ctx); if (new_kv_indices == NULL) { @@ -382,14 +393,15 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu ht->kv_indices[i].key_hash = 0; } + GUF_ASSERT(ht->kv_elems.size <= GUF_DICT_KV_IDX_T_MAX); for (ptrdiff_t kv_idx = 0; kv_idx < ht->kv_elems.size; ++kv_idx) { // Re-insert keys. const GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx); GUF_ASSERT(kv); bool key_exists = false; const size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &kv->key, &key_exists); GUF_ASSERT(!key_exists); - GUF_ASSERT(new_idx < SIZE_MAX); - ht->kv_indices[new_idx].kv_idx = kv_idx; + GUF_ASSERT(new_idx < SIZE_T_MAX && new_idx <= PTRDIFF_MAX); + ht->kv_indices[new_idx].kv_idx = (GUF_DICT_KV_IDX_T)kv_idx; ht->kv_indices[new_idx].key_hash = GUF_DICT_KEY_HASH(&kv->key); } } @@ -407,11 +419,12 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in function dict_try_insert: key or val argument is NULL")); return; } - if ((size_t)ht->kv_elems.size == GUF_DICT_MAX_SIZE) { + + if (ht->kv_elems.size >= GUF_DICT_KV_IDX_T_MAX) { guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in function dict_try_insert: dict has reached its max size (UINT64_MAX - 2 or UINT32_MAX - 2)")); return; } - + // 1.) Grow kv-index-buffer if neccessary (or make the initial allocation.) GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(ht, err); if (err != NULL && *err != GUF_ERR_NONE) { @@ -434,8 +447,9 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D GUF_ASSERT_RELEASE(ht->num_tombstones >= 0); } + GUF_ASSERT(ht->kv_elems.size <= GUF_DICT_KV_IDX_T_MAX); ht->kv_indices[idx].key_hash = GUF_DICT_KEY_HASH(key); - ht->kv_indices[idx].kv_idx = ht->kv_elems.size; + ht->kv_indices[idx].kv_idx = (GUF_DICT_KV_IDX_T)ht->kv_elems.size; GUF_DICT_KEY_T key_cpy; GUF_DICT_KEY_T *key_cpy_res = NULL; @@ -531,7 +545,7 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _insert_val_arg)(GUF_DICT_NAME *ht, G if (!key_exists) { return NULL; } else { - GUF_ASSERT(idx != SIZE_MAX); + GUF_ASSERT(idx != SIZE_T_MAX); GUF_ASSERT((ptrdiff_t)idx < ht->kv_indices_cap); const size_t kv_idx = ht->kv_indices[idx].kv_idx; GUF_ASSERT((ptrdiff_t)kv_idx < ht->kv_elems.size); @@ -555,7 +569,7 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _contains)(GUF_DICT_NAME *ht, const G bool key_exists = false; const size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists); if (key_exists) { - GUF_ASSERT(idx != SIZE_MAX); + GUF_ASSERT(idx != SIZE_T_MAX); GUF_ASSERT(ht->kv_indices[idx].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE); GUF_ASSERT(ht->kv_indices[idx].kv_idx != GUF_DICT_KV_IDX_NULL); } @@ -613,7 +627,8 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _erase)(GUF_DICT_NAME *ht, const GUF_ GUF_ASSERT(last_key_exists && (ptrdiff_t)last_idx < ht->kv_indices_cap); GUF_ASSERT((ptrdiff_t)ht->kv_indices[last_idx].kv_idx == ht->kv_elems.size - 1); GUF_ASSERT(ht->kv_indices[last_idx].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE && ht->kv_indices[last_idx].kv_idx != GUF_DICT_KV_IDX_NULL); - ht->kv_indices[last_idx].kv_idx = kv_idx; + GUF_ASSERT(kv_idx <= GUF_DICT_KV_IDX_T_MAX); + ht->kv_indices[last_idx].kv_idx = (GUF_DICT_KV_IDX_T)kv_idx; } ht->kv_elems.size -= 1; @@ -775,9 +790,12 @@ GUF_DICT_KWRDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val_if #undef GUF_DICT_KV_IDX_NULL #undef GUF_DICT_KV_IDX_TOMBSTONE #undef GUF_DICT_32_BIT -#undef GUF_DICT_SIZE_T -#undef GUF_DICT_MAX_SIZE +#undef GUF_DICT_64_BIT + +#undef GUF_DICT_KV_IDX_T +#undef GUF_DICT_KV_IDX_T_MAX #undef GUF_DICT_KV_META_T +#undef GUF_DICT_MAX_PTR #undef GUF_DICT_NAME #undef GUF_DICT_IS_SET diff --git a/src/guf_hash.h b/src/guf_hash.h index bac568c..97eced0 100644 --- a/src/guf_hash.h +++ b/src/guf_hash.h @@ -14,7 +14,6 @@ cf. http://www.isthe.com/chongo/tech/comp/fnv/ (last retrieved: 2023-11-30) */ - #define GUF_HASH32_INIT UINT32_C(2166136261) #define GUF_HASH64_INIT UINT64_C(14695981039346656037) @@ -25,14 +24,14 @@ GUF_HASH_KWRDS uint64_t guf_hash64(const void *data, ptrdiff_t num_bytes, uint64 typedef uint32_t guf_hash_size_t; #define GUF_HASH_INIT GUF_HASH32_INIT #define GUF_HASH_MAX UINT32_MAX - static inline guf_hash_size_t guf_hash(const void *data, ptrdiff_t num_bytes, uint32_t hash) { + static inline guf_hash_size_t guf_hash(const void *data, ptrdiff_t num_bytes, guf_hash_size_t hash) { return guf_hash32(data, num_bytes, hash); } #else typedef uint64_t guf_hash_size_t; #define GUF_HASH_INIT GUF_HASH64_INIT #define GUF_HASH_MAX UINT64_MAX - static inline guf_hash_size_t guf_hash(const void *data, ptrdiff_t num_bytes, uint64_t hash) { + static inline guf_hash_size_t guf_hash(const void *data, ptrdiff_t num_bytes, guf_hash_size_t hash) { return guf_hash64(data, num_bytes, hash); } #endif @@ -53,7 +52,7 @@ GUF_HASH_KWRDS uint32_t guf_hash32(const void *data, ptrdiff_t num_bytes, uint32 GUF_ASSERT_RELEASE(data); GUF_ASSERT_RELEASE(num_bytes >= 0); const unsigned char *data_bytes = (const unsigned char*)data; // This does not break strict-aliasing rules I think... - const uint32_t FNV_32_PRIME = 16777619ul; + const uint32_t FNV_32_PRIME = UINT32_C(16777619); for (ptrdiff_t i = 0; i < num_bytes; ++i) { hash ^= data_bytes[i]; hash *= FNV_32_PRIME; @@ -66,7 +65,7 @@ GUF_HASH_KWRDS uint64_t guf_hash64(const void *data, ptrdiff_t num_bytes, uint64 GUF_ASSERT_RELEASE(data); GUF_ASSERT_RELEASE(num_bytes >= 0); const unsigned char *data_bytes = (const unsigned char*)data; // This does not break strict-aliasing rules I think... - const uint64_t FNV_64_PRIME = 1099511628211ull; + const uint64_t FNV_64_PRIME = UINT64_C(1099511628211); for (ptrdiff_t i = 0; i < num_bytes; ++i) { hash ^= data_bytes[i]; hash *= FNV_64_PRIME; diff --git a/src/guf_init.h b/src/guf_init.h index f674bac..cee0e1e 100644 --- a/src/guf_init.h +++ b/src/guf_init.h @@ -1,6 +1,8 @@ #ifndef GUF_INIT_H #define GUF_INIT_H +#include "guf_common.h" + // Set up the global panic handler. #define GUF_INIT #include "guf_assert.h" @@ -8,17 +10,4 @@ #define GUF_HASH_IMPL #include "guf_hash.h" -// static inline bool guf_init(void) -// { -// static bool guf_is_init = false; - -// if (guf_is_init) { -// printf("libguf already initialised\n"); -// return true; -// } - -// guf_is_init = true; -// return guf_is_init; -// } - #endif diff --git a/src/test/guf_dict_impl.h b/src/test/guf_dict_impl.h index ab37b63..cf4407a 100644 --- a/src/test/guf_dict_impl.h +++ b/src/test/guf_dict_impl.h @@ -29,6 +29,7 @@ static inline bool int32_eq(const int32_t *a, const int32_t *b) { return *a == *b; } + #define GUF_DICT_KEY_T int32_t #define GUF_DICT_KEY_HASH int32_hash #define GUF_DICT_KEY_T_EQ int32_eq diff --git a/src/test/test_dbuf.hpp b/src/test/test_dbuf.hpp index ff49c9d..d35d99a 100644 --- a/src/test/test_dbuf.hpp +++ b/src/test/test_dbuf.hpp @@ -510,8 +510,8 @@ struct DbufCstringTest : public Test for (int i = 0; i < 512; ++i) { char buf[128]; - memset(buf, '\0', GUF_STATIC_BUF_SIZE(buf)); - snprintf(buf, GUF_STATIC_BUF_SIZE(buf), "This is a pretty guf string (number %d)", i); + memset(buf, '\0', GUF_ARR_SIZE(buf)); + snprintf(buf, GUF_ARR_SIZE(buf), "This is a pretty guf string (number %d)", i); guf_cstr_heap str = buf; dbuf_heap_cstr_push(&str_dbuf, &str, GUF_CPY_DEEP); str_vec.push_back(std::string{buf}); diff --git a/src/test/test_dict.hpp b/src/test/test_dict.hpp index adcf5f6..7d78430 100644 --- a/src/test/test_dict.hpp +++ b/src/test/test_dict.hpp @@ -26,11 +26,11 @@ struct DictSvToIntTest : public Test dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc); dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc); - for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_WHITESPACE); ++i) { + for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) { guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]}; dbuf_str_view_push_val(&delims, d); } - for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) { + for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) { guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]}; dbuf_str_view_push_val(&delims, d); } diff --git a/src/test/test_utf8.hpp b/src/test/test_utf8.hpp index 9e69630..0ffbf45 100644 --- a/src/test/test_utf8.hpp +++ b/src/test/test_utf8.hpp @@ -323,7 +323,7 @@ struct UTF8Test : public Test TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT); char buf[] = {'\x2F', '\xC0', '\xAE', '\x2E', '\x2F'}; - guf_str_view input_str = {.str = buf, .len = GUF_STATIC_BUF_SIZE(buf)}; + guf_str_view input_str = {.str = buf, .len = GUF_ARR_SIZE(buf)}; guf_utf8_char ch = {}; int valid_chars = 0, invalid_chars = 0; for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) { @@ -336,7 +336,7 @@ struct UTF8Test : public Test TEST_CHECK(invalid_chars == 2 && valid_chars == 3); char buf2[] = {'\xE0', '\x80', 'a', 'b', 'c'}; // 1 invalid 3-byte-character, 2 valid 1-byte-characters - input_str = {.str = buf2, .len = GUF_STATIC_BUF_SIZE(buf2)}; + input_str = {.str = buf2, .len = GUF_ARR_SIZE(buf2)}; ch = {}; valid_chars = invalid_chars = 0; for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) { @@ -367,11 +367,11 @@ struct UTF8Test : public Test TEST_CHECK(valid > 16000 && invalid == 0); dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc); - for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_WHITESPACE); ++i) { + for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) { guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]}; dbuf_str_view_push_val(&delims, d); } - for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) { + for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) { guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]}; dbuf_str_view_push_val(&delims, d); }