Fix guf_dict size types

This commit is contained in:
jun 2025-03-06 13:51:32 +01:00
parent d057d76334
commit 364dd603cf
8 changed files with 61 additions and 49 deletions

View File

@ -15,7 +15,12 @@
#elif SIZE_MAX == UINT8_MAX
#define GUF_PLATFORM_BITS 8
#else
#error "Could not detect GUF_PLATFORM_BITS"
#define GUF_PLATFORM_BITS 64
#error "libguf: Could not detect GUF_PLATFORM_BITS"
#endif
#if GUF_PLATFORM_BITS <= 32
#define GUF_HASH_32_BIT
#endif
/*

View File

@ -43,22 +43,32 @@
#endif
#if defined(GUF_DICT_32_BIT)
#define GUF_DICT_SIZE_T uint32_t
#define GUF_DICT_KV_IDX_T uint32_t
#define GUF_DICT_KV_META_T guf_dict_kv_meta_32
#define GUF_DICT_KV_IDX_NULL UINT32_MAX
#elif defined(GUF_DICT_64_BIT)
#define GUF_DICT_SIZE_T uint64_t
#define GUF_DICT_KV_IDX_T uint64_t
#define GUF_DICT_KV_META_T guf_dict_kv_meta_64
#define GUF_DICT_KV_IDX_NULL UINT64_MAX
#else
#define GUF_DICT_SIZE_T guf_hash_size_t
#define GUF_DICT_KV_IDX_T guf_hash_size_t
#define GUF_DICT_KV_META_T guf_dict_kv_meta
#define GUF_DICT_KV_IDX_NULL GUF_HASH_MAX
#endif
#define GUF_DICT_KV_IDX_TOMBSTONE (GUF_DICT_KV_IDX_NULL - 1)
// TODO
#define GUF_DICT_MAX_SIZE GUF_MIN(GUF_DICT_KV_IDX_TOMBSTONE - 1, PTRDIFF_MAX)
#if PTRDIFF_MAX <= SIZE_T_MAX
#define GUF_DICT_MAX_PTR PTRDIFF_MAX
#else
#define GUF_DICT_MAX_PTR SIZE_T_MAX
#endif
#if (GUF_DICT_KV_IDX_TOMBSTONE - 1) <= GUF_DICT_MAX_PTR
#define GUF_DICT_KV_IDX_T_MAX (GUF_DICT_KV_IDX_TOMBSTONE - 1)
#else
#define GUF_DICT_KV_IDX_T_MAX GUF_DICT_MAX_PTR
#endif
// #ifndef GUF_DICT_KEY_LOOKUP_T
// #define GUF_DICT_KEY_LOOKUP_T GUF_DICT_KEY_T
@ -280,20 +290,20 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC
{
if (ht->kv_indices_cap <= 0) {
*key_exists = false;
return SIZE_MAX;
return SIZE_T_MAX;
}
const GUF_DICT_SIZE_T hash = GUF_DICT_KEY_HASH(key);
const GUF_DICT_KV_IDX_T hash = GUF_DICT_KEY_HASH(key);
#define GUF_MOD_CAP(a) ((size_t)(a) & (size_t)(ht->kv_indices_cap - 1)) // a % ht->kv_indices_cap (kv_indices_cap must be a power of two > 0)
size_t idx = GUF_MOD_CAP(hash);
const size_t start_idx = idx;
size_t first_tombstone_idx = SIZE_MAX;
size_t first_tombstone_idx = SIZE_T_MAX;
size_t probe_len = 0;
// size_t seen_occupied = 0; // This allows us to bail out early once we visited every non-null/non-tombstone kv_idx.
do {
if (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_NULL) { // 1.) Empty.
if (first_tombstone_idx != SIZE_MAX) {
if (first_tombstone_idx != SIZE_T_MAX) {
idx = first_tombstone_idx;
}
ht->max_probelen = GUF_MAX((ptrdiff_t)probe_len, ht->max_probelen);
@ -301,7 +311,7 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC
*key_exists = false;
return idx;
} else if (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_TOMBSTONE) { // 2.) Tombstone.
if (first_tombstone_idx == SIZE_MAX) {
if (first_tombstone_idx == SIZE_T_MAX) {
first_tombstone_idx = idx;
}
goto probe;
@ -321,12 +331,12 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC
} while (idx != start_idx && probe_len < (size_t)ht->kv_indices_cap);
*key_exists = false;
if (first_tombstone_idx != SIZE_MAX) { // Edge case: No empty slots, but found tombstone.
if (first_tombstone_idx != SIZE_T_MAX) { // Edge case: No empty slots, but found tombstone.
ht->max_probelen = GUF_MAX((ptrdiff_t)probe_len, ht->max_probelen);
GUF_ASSERT(ht->kv_indices[first_tombstone_idx].kv_idx == GUF_DICT_KV_IDX_NULL);
return first_tombstone_idx;
} else { // Failed to find an idx.
return SIZE_MAX;
return SIZE_T_MAX;
}
#undef GUF_MOD_CAP
}
@ -365,6 +375,7 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu
guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: New capacity would overflow)"));
return;
}
// TODO: Not sure if alloc and free is better here than realloc (since we do not copy ht->kv_indices anyway.)
GUF_DICT_KV_META_T *new_kv_indices = allocator->alloc(new_size, allocator->ctx);
if (new_kv_indices == NULL) {
@ -382,14 +393,15 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu
ht->kv_indices[i].key_hash = 0;
}
GUF_ASSERT(ht->kv_elems.size <= GUF_DICT_KV_IDX_T_MAX);
for (ptrdiff_t kv_idx = 0; kv_idx < ht->kv_elems.size; ++kv_idx) { // Re-insert keys.
const GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx);
GUF_ASSERT(kv);
bool key_exists = false;
const size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &kv->key, &key_exists);
GUF_ASSERT(!key_exists);
GUF_ASSERT(new_idx < SIZE_MAX);
ht->kv_indices[new_idx].kv_idx = kv_idx;
GUF_ASSERT(new_idx < SIZE_T_MAX && new_idx <= PTRDIFF_MAX);
ht->kv_indices[new_idx].kv_idx = (GUF_DICT_KV_IDX_T)kv_idx;
ht->kv_indices[new_idx].key_hash = GUF_DICT_KEY_HASH(&kv->key);
}
}
@ -407,7 +419,8 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in function dict_try_insert: key or val argument is NULL"));
return;
}
if ((size_t)ht->kv_elems.size == GUF_DICT_MAX_SIZE) {
if (ht->kv_elems.size >= GUF_DICT_KV_IDX_T_MAX) {
guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in function dict_try_insert: dict has reached its max size (UINT64_MAX - 2 or UINT32_MAX - 2)"));
return;
}
@ -434,8 +447,9 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
GUF_ASSERT_RELEASE(ht->num_tombstones >= 0);
}
GUF_ASSERT(ht->kv_elems.size <= GUF_DICT_KV_IDX_T_MAX);
ht->kv_indices[idx].key_hash = GUF_DICT_KEY_HASH(key);
ht->kv_indices[idx].kv_idx = ht->kv_elems.size;
ht->kv_indices[idx].kv_idx = (GUF_DICT_KV_IDX_T)ht->kv_elems.size;
GUF_DICT_KEY_T key_cpy;
GUF_DICT_KEY_T *key_cpy_res = NULL;
@ -531,7 +545,7 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _insert_val_arg)(GUF_DICT_NAME *ht, G
if (!key_exists) {
return NULL;
} else {
GUF_ASSERT(idx != SIZE_MAX);
GUF_ASSERT(idx != SIZE_T_MAX);
GUF_ASSERT((ptrdiff_t)idx < ht->kv_indices_cap);
const size_t kv_idx = ht->kv_indices[idx].kv_idx;
GUF_ASSERT((ptrdiff_t)kv_idx < ht->kv_elems.size);
@ -555,7 +569,7 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _contains)(GUF_DICT_NAME *ht, const G
bool key_exists = false;
const size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists);
if (key_exists) {
GUF_ASSERT(idx != SIZE_MAX);
GUF_ASSERT(idx != SIZE_T_MAX);
GUF_ASSERT(ht->kv_indices[idx].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE);
GUF_ASSERT(ht->kv_indices[idx].kv_idx != GUF_DICT_KV_IDX_NULL);
}
@ -613,7 +627,8 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _erase)(GUF_DICT_NAME *ht, const GUF_
GUF_ASSERT(last_key_exists && (ptrdiff_t)last_idx < ht->kv_indices_cap);
GUF_ASSERT((ptrdiff_t)ht->kv_indices[last_idx].kv_idx == ht->kv_elems.size - 1);
GUF_ASSERT(ht->kv_indices[last_idx].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE && ht->kv_indices[last_idx].kv_idx != GUF_DICT_KV_IDX_NULL);
ht->kv_indices[last_idx].kv_idx = kv_idx;
GUF_ASSERT(kv_idx <= GUF_DICT_KV_IDX_T_MAX);
ht->kv_indices[last_idx].kv_idx = (GUF_DICT_KV_IDX_T)kv_idx;
}
ht->kv_elems.size -= 1;
@ -775,9 +790,12 @@ GUF_DICT_KWRDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val_if
#undef GUF_DICT_KV_IDX_NULL
#undef GUF_DICT_KV_IDX_TOMBSTONE
#undef GUF_DICT_32_BIT
#undef GUF_DICT_SIZE_T
#undef GUF_DICT_MAX_SIZE
#undef GUF_DICT_64_BIT
#undef GUF_DICT_KV_IDX_T
#undef GUF_DICT_KV_IDX_T_MAX
#undef GUF_DICT_KV_META_T
#undef GUF_DICT_MAX_PTR
#undef GUF_DICT_NAME
#undef GUF_DICT_IS_SET

View File

@ -14,7 +14,6 @@
cf. http://www.isthe.com/chongo/tech/comp/fnv/ (last retrieved: 2023-11-30)
*/
#define GUF_HASH32_INIT UINT32_C(2166136261)
#define GUF_HASH64_INIT UINT64_C(14695981039346656037)
@ -25,14 +24,14 @@ GUF_HASH_KWRDS uint64_t guf_hash64(const void *data, ptrdiff_t num_bytes, uint64
typedef uint32_t guf_hash_size_t;
#define GUF_HASH_INIT GUF_HASH32_INIT
#define GUF_HASH_MAX UINT32_MAX
static inline guf_hash_size_t guf_hash(const void *data, ptrdiff_t num_bytes, uint32_t hash) {
static inline guf_hash_size_t guf_hash(const void *data, ptrdiff_t num_bytes, guf_hash_size_t hash) {
return guf_hash32(data, num_bytes, hash);
}
#else
typedef uint64_t guf_hash_size_t;
#define GUF_HASH_INIT GUF_HASH64_INIT
#define GUF_HASH_MAX UINT64_MAX
static inline guf_hash_size_t guf_hash(const void *data, ptrdiff_t num_bytes, uint64_t hash) {
static inline guf_hash_size_t guf_hash(const void *data, ptrdiff_t num_bytes, guf_hash_size_t hash) {
return guf_hash64(data, num_bytes, hash);
}
#endif
@ -53,7 +52,7 @@ GUF_HASH_KWRDS uint32_t guf_hash32(const void *data, ptrdiff_t num_bytes, uint32
GUF_ASSERT_RELEASE(data);
GUF_ASSERT_RELEASE(num_bytes >= 0);
const unsigned char *data_bytes = (const unsigned char*)data; // This does not break strict-aliasing rules I think...
const uint32_t FNV_32_PRIME = 16777619ul;
const uint32_t FNV_32_PRIME = UINT32_C(16777619);
for (ptrdiff_t i = 0; i < num_bytes; ++i) {
hash ^= data_bytes[i];
hash *= FNV_32_PRIME;
@ -66,7 +65,7 @@ GUF_HASH_KWRDS uint64_t guf_hash64(const void *data, ptrdiff_t num_bytes, uint64
GUF_ASSERT_RELEASE(data);
GUF_ASSERT_RELEASE(num_bytes >= 0);
const unsigned char *data_bytes = (const unsigned char*)data; // This does not break strict-aliasing rules I think...
const uint64_t FNV_64_PRIME = 1099511628211ull;
const uint64_t FNV_64_PRIME = UINT64_C(1099511628211);
for (ptrdiff_t i = 0; i < num_bytes; ++i) {
hash ^= data_bytes[i];
hash *= FNV_64_PRIME;

View File

@ -1,6 +1,8 @@
#ifndef GUF_INIT_H
#define GUF_INIT_H
#include "guf_common.h"
// Set up the global panic handler.
#define GUF_INIT
#include "guf_assert.h"
@ -8,17 +10,4 @@
#define GUF_HASH_IMPL
#include "guf_hash.h"
// static inline bool guf_init(void)
// {
// static bool guf_is_init = false;
// if (guf_is_init) {
// printf("libguf already initialised\n");
// return true;
// }
// guf_is_init = true;
// return guf_is_init;
// }
#endif

View File

@ -29,6 +29,7 @@ static inline bool int32_eq(const int32_t *a, const int32_t *b)
{
return *a == *b;
}
#define GUF_DICT_KEY_T int32_t
#define GUF_DICT_KEY_HASH int32_hash
#define GUF_DICT_KEY_T_EQ int32_eq

View File

@ -510,8 +510,8 @@ struct DbufCstringTest : public Test
for (int i = 0; i < 512; ++i) {
char buf[128];
memset(buf, '\0', GUF_STATIC_BUF_SIZE(buf));
snprintf(buf, GUF_STATIC_BUF_SIZE(buf), "This is a pretty guf string (number %d)", i);
memset(buf, '\0', GUF_ARR_SIZE(buf));
snprintf(buf, GUF_ARR_SIZE(buf), "This is a pretty guf string (number %d)", i);
guf_cstr_heap str = buf;
dbuf_heap_cstr_push(&str_dbuf, &str, GUF_CPY_DEEP);
str_vec.push_back(std::string{buf});

View File

@ -26,11 +26,11 @@ struct DictSvToIntTest : public Test
dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc);
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_WHITESPACE); ++i) {
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]};
dbuf_str_view_push_val(&delims, d);
}
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) {
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]};
dbuf_str_view_push_val(&delims, d);
}

View File

@ -323,7 +323,7 @@ struct UTF8Test : public Test
TEST_CHECK(guf_utf8_decode(&utf8) == GUF_UTF8_REPLACEMENT_CHAR_CODEPOINT);
char buf[] = {'\x2F', '\xC0', '\xAE', '\x2E', '\x2F'};
guf_str_view input_str = {.str = buf, .len = GUF_STATIC_BUF_SIZE(buf)};
guf_str_view input_str = {.str = buf, .len = GUF_ARR_SIZE(buf)};
guf_utf8_char ch = {};
int valid_chars = 0, invalid_chars = 0;
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
@ -336,7 +336,7 @@ struct UTF8Test : public Test
TEST_CHECK(invalid_chars == 2 && valid_chars == 3);
char buf2[] = {'\xE0', '\x80', 'a', 'b', 'c'}; // 1 invalid 3-byte-character, 2 valid 1-byte-characters
input_str = {.str = buf2, .len = GUF_STATIC_BUF_SIZE(buf2)};
input_str = {.str = buf2, .len = GUF_ARR_SIZE(buf2)};
ch = {};
valid_chars = invalid_chars = 0;
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, &input_str); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, &input_str)) {
@ -367,11 +367,11 @@ struct UTF8Test : public Test
TEST_CHECK(valid > 16000 && invalid == 0);
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_WHITESPACE); ++i) {
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_WHITESPACE); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_WHITESPACE[i]), .str = GUF_UTF8_WHITESPACE[i]};
dbuf_str_view_push_val(&delims, d);
}
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) {
for (size_t i = 0; i < GUF_ARR_SIZE(GUF_UTF8_COMMON_PUNCT); ++i) {
guf_str_view d = {.len = (ptrdiff_t)strlen(GUF_UTF8_COMMON_PUNCT[i]), .str = GUF_UTF8_COMMON_PUNCT[i]};
dbuf_str_view_push_val(&delims, d);
}