From 13abedd2adcfc0b03c21977357782da15fa272ab Mon Sep 17 00:00:00 2001 From: jun <83899451+zeichensystem@users.noreply.github.com> Date: Sun, 23 Feb 2025 09:18:50 +0100 Subject: [PATCH] Implement guf_dict --- src/guf_dict.h | 378 ++++++++++++++++++++++++++++----------- src/guf_hash.h | 9 +- src/guf_test.c | 36 ++-- src/guf_test_dict_impl.c | 8 + src/guf_test_dict_impl.h | 17 ++ 5 files changed, 333 insertions(+), 115 deletions(-) diff --git a/src/guf_dict.h b/src/guf_dict.h index 850252e..71f0e19 100755 --- a/src/guf_dict.h +++ b/src/guf_dict.h @@ -25,18 +25,10 @@ #include "guf_alloc.h" #include "guf_hash.h" - #define GUF_DICT_KV_IDX_NULL GUF_HASH_MAX - #define GUF_DICT_KV_IDX_TOMBSTONE (GUF_HASH_MAX - 1) - typedef struct guf_dict_kv_meta { - guf_hash_size_t kv_idx; // index into the key-value buffer. TODO: uint32_t? + guf_hash_size_t kv_idx; // index into the key-value buffer. TODO: uint64 consisting of hash_fragment + idx? guf_hash_size_t key_hash; } guf_dict_kv_meta; - - // #define GUF_T guf_dict_kv_meta - // #define GUF_CNT_NAME guf_dict_kv_meta_dbuf - // #define GUF_ONLY_TYPES - // #include "guf_dbuf.h" #endif #ifndef GUF_DICT_KEY_T @@ -55,14 +47,14 @@ #define GUF_DICT_IS_SET #endif -#ifndef GUF_DICT_KEY_LOOKUP_T - #define GUF_DICT_KEY_LOOKUP_T GUF_DICT_KEY_T -#else - // GUF_DICT_KEY_LOOKUP_T convert(const GUF_DICT_KEY_T *key) - #ifndef GUF_DICT_KEY_TO_LOOKUP_KEY_CONVERT - #error "GUF_DICT_KEY_TO_LOOKUP_KEY_CONVis must be defined since GUF_DICT_KEY_LOOKUP_T is defined" - #endif -#endif +// #ifndef GUF_DICT_KEY_LOOKUP_T +// #define GUF_DICT_KEY_LOOKUP_T GUF_DICT_KEY_T +// #else +// // GUF_DICT_KEY_LOOKUP_T convert(const GUF_DICT_KEY_T *key) +// #ifndef GUF_DICT_KEY_TO_LOOKUP_KEY_CONVERT +// #error "GUF_DICT_KEY_TO_LOOKUP_KEY_CONVis must be defined since GUF_DICT_KEY_LOOKUP_T is defined" +// #endif +// #endif #ifndef GUF_DICT_NAME #define GUF_DICT_NAME GUF_CAT(dict_, GUF_CAT(GUF_DICT_KEY_T, GUF_CAT(_to_, GUF_DICT_VAL_T))) @@ -78,7 +70,6 @@ // - GUF_T_FREE: free function with signature void free(GUF_T *a, void *ctx) (default: undefined) // - GUF_T_EQ: equality function with signature bool eq(const GUF_T *a, const GUF_T *a) (default: undefined, or equality by value if GUF_T_IS_INTEGRAL_TYPE is defined) - #ifndef GUF_DICT_IMPL typedef struct GUF_DICT_KV_NAME { @@ -94,12 +85,14 @@ typedef struct GUF_DICT_KV_NAME { #include "guf_dbuf.h" typedef struct GUF_DICT_NAME { - guf_dict_kv_meta *kv_meta_buf; - GUF_DICT_KV_DBUF kv_dbuf; - ptrdiff_t kv_meta_buf_cap, num_tombstones; - ptrdiff_t max_probelen; // Debug + GUF_DICT_KV_DBUF kv_elems; // The actual key-value elements (contiguous in memory) + guf_dict_kv_meta *kv_indices; // Indices into the kv_elems dbuf. + ptrdiff_t kv_indices_cap, num_tombstones; + ptrdiff_t max_probelen; // Stores the worst-case probelen. } GUF_DICT_NAME; +typedef GUF_CAT(GUF_DICT_KV_DBUF, _iter) GUF_CAT(GUF_DICT_NAME, _iter); + #endif GUF_DICT_FN_KEYWORDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init)(GUF_DICT_NAME *ht, guf_allocator *alloc, guf_err *err); @@ -109,18 +102,32 @@ GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _free)(GUF_DICT_NAME *ht, void GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt, guf_err *err); GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt); +GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _try_insert_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key, GUF_DICT_VAL_T val, guf_cpy_opt key_opt, guf_cpy_opt val_opt, guf_err *err); +GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _insert_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key, GUF_DICT_VAL_T val, guf_cpy_opt key_opt, guf_cpy_opt val_opt); -GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _try_erase)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key, guf_err *err); -GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _erase)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key); +GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _erase)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key); +GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _erase_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key); #ifdef GUF_DICT_VAL_T - GUF_DICT_FN_KEYWORDS GUF_DICT_VAL_T *GUF_CAT(GUF_DICT_NAME, _at)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_LOOKUP_T *key); + GUF_DICT_FN_KEYWORDS GUF_DICT_VAL_T *GUF_CAT(GUF_DICT_NAME, _at)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key); + GUF_DICT_FN_KEYWORDS GUF_DICT_VAL_T *GUF_CAT(GUF_DICT_NAME, _at_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key); #endif -GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _contains)(const GUF_DICT_NAME *ht, const GUF_DICT_KEY_LOOKUP_T *key); +GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _contains)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key); +GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _contains_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key); GUF_DICT_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _size)(const GUF_DICT_NAME *ht); +/* Iterator functions */ +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _begin)(const GUF_DICT_NAME* ht); +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _end)(const GUF_DICT_NAME* ht); +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _rbegin)(const GUF_DICT_NAME* ht); +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _rend)(const GUF_DICT_NAME* ht); + +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _iter_next)(const GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) it, ptrdiff_t step); +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _iter_at_idx)(const GUF_DICT_NAME *ht, ptrdiff_t idx); +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _reverse_iter_at_idx)(const GUF_DICT_NAME *ht, ptrdiff_t idx); +GUF_DICT_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _iter_to_idx)(const GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) it); // #define GUF_DICT_IMPL /* DEBUGGGGGGGGG */ @@ -128,11 +135,8 @@ GUF_DICT_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _size)(const GUF_DICT_NAME #include "guf_assert.h" -// #define GUF_T guf_dict_kv_meta -// #define GUF_CNT_NAME guf_dict_kv_meta_dbuf -// #define GUF_STATIC -// #define GUF_IMPL -// #include "guf_dbuf.h" +#define GUF_DICT_KV_IDX_NULL GUF_HASH_MAX +#define GUF_DICT_KV_IDX_TOMBSTONE (GUF_HASH_MAX - 1) static inline void GUF_CAT(GUF_DICT_KV_NAME, _free)(GUF_DICT_KV_NAME *kv, void *ctx) { @@ -150,22 +154,19 @@ static inline void GUF_CAT(GUF_DICT_KV_NAME, _free)(GUF_DICT_KV_NAME *kv, void * #define GUF_T GUF_DICT_KV_NAME #define GUF_T_FREE GUF_CAT(GUF_DICT_KV_NAME, _free) #define GUF_CNT_NAME GUF_DICT_KV_DBUF -#ifdef GUF_DICT_STATIC - #define GUF_STATIC -#endif +#define GUF_STATIC #define GUF_IMPL #include "guf_dbuf.h" static inline double GUF_CAT(GUF_DICT_NAME, _load_factor)(const GUF_DICT_NAME *ht) { - if (ht->kv_meta_buf_cap == 0) { + if (ht->kv_indices_cap == 0) { return 1; } - ptrdiff_t occupied_count = ht->kv_dbuf.size + ht->num_tombstones; - return (double)occupied_count / (double)ht->kv_meta_buf_cap; + ptrdiff_t occupied_count = ht->kv_elems.size + ht->num_tombstones; + return (double)occupied_count / (double)ht->kv_indices_cap; } - GUF_DICT_FN_KEYWORDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init)(GUF_DICT_NAME *ht, guf_allocator *alloc, guf_err *err) { if (!ht || !alloc) { @@ -173,14 +174,14 @@ GUF_DICT_FN_KEYWORDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init)(GUF_DICT_N return NULL; } - ht->kv_dbuf = (GUF_DICT_KV_DBUF){0}; - GUF_CAT(GUF_DICT_KV_DBUF, _try_init)(&ht->kv_dbuf, 0, alloc, err); + ht->kv_elems = (GUF_DICT_KV_DBUF){0}; + GUF_CAT(GUF_DICT_KV_DBUF, _try_init)(&ht->kv_elems, 0, alloc, err); if (err != GUF_ERR_NONE) { return NULL; } - ht->kv_meta_buf = NULL; - ht->kv_meta_buf_cap = 0; + ht->kv_indices = NULL; + ht->kv_indices_cap = 0; ht->num_tombstones = 0; ht->max_probelen = 0; @@ -197,29 +198,35 @@ GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _valid)(const GUF_DICT_NAME *ht if (!ht) { return false; } - bool kv_dbuf_valid = GUF_CAT(GUF_DICT_KV_DBUF, _valid)(&ht->kv_dbuf); - bool kv_meta_buf_valid = (!ht->kv_meta_buf && !ht->kv_meta_buf_cap) || (ht->kv_meta_buf && ht->kv_meta_buf_cap); - return kv_dbuf_valid && kv_meta_buf_valid; + bool kv_dbuf_valid = GUF_CAT(GUF_DICT_KV_DBUF, _valid)(&ht->kv_elems); + bool kv_meta_buf_valid = (!ht->kv_indices && !ht->kv_indices_cap) || (ht->kv_indices && ht->kv_indices_cap); + bool count_valid = ht->num_tombstones >= 0 && ht->kv_elems.size >= 0 && ((ht->kv_elems.size + ht->num_tombstones) <= ht->kv_indices_cap); + return kv_dbuf_valid && kv_meta_buf_valid && count_valid; } GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _free)(GUF_DICT_NAME *ht, void *ctx) { (void)ctx; GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); - guf_allocator *allocator = ht->kv_dbuf.allocator; + guf_allocator *allocator = ht->kv_elems.allocator; - if (ht->kv_meta_buf) { - allocator->free(ht->kv_meta_buf, ht->kv_meta_buf_cap * sizeof(guf_dict_kv_meta), allocator->ctx); - ht->kv_meta_buf = NULL; - ht->kv_meta_buf_cap = 0; + if (ht->kv_indices) { + allocator->free(ht->kv_indices, ht->kv_indices_cap * sizeof(guf_dict_kv_meta), allocator->ctx); + ht->kv_indices = NULL; + ht->kv_indices_cap = 0; } - GUF_CAT(GUF_DICT_KV_DBUF, _free)(&ht->kv_dbuf, NULL); + GUF_CAT(GUF_DICT_KV_DBUF, _free)(&ht->kv_elems, NULL); ht->num_tombstones = 0; ht->max_probelen = 0; } +GUF_DICT_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _size)(const GUF_DICT_NAME *ht) +{ + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + return ht->kv_elems.size; +} static inline size_t GUF_CAT(GUF_DICT_NAME, _probe_offset)(size_t probe_len) { @@ -235,51 +242,46 @@ static inline size_t GUF_CAT(GUF_DICT_NAME, _probe_offset)(size_t probe_len) #endif } -static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key, bool *key_exists, bool find_first_free) +static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key, bool *key_exists) { - if (ht->kv_meta_buf_cap == 0) { + if (ht->kv_indices_cap == 0) { return SIZE_MAX; } const guf_hash_size_t hash = GUF_DICT_KEY_HASH(key); - size_t idx = hash % ht->kv_meta_buf_cap; + size_t idx = (size_t)guf_mod_pow2_hash(hash, ht->kv_indices_cap); const size_t start_idx = idx; size_t probe_len = 1; size_t first_tombstone_idx = SIZE_MAX; do { // printf("idx : %zu %td\n", idx, ht->kv_meta_buf_cap); - if (ht->kv_meta_buf[idx].kv_idx == GUF_DICT_KV_IDX_NULL) { // 1.) Empty. + if (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_NULL) { // 1.) Empty. if (first_tombstone_idx != SIZE_MAX) { idx = first_tombstone_idx; } ht->max_probelen = GUF_MAX((ptrdiff_t)probe_len, ht->max_probelen); - GUF_ASSERT((ht->kv_meta_buf[idx].kv_idx == GUF_DICT_KV_IDX_NULL) || (ht->kv_meta_buf[idx].kv_idx == GUF_DICT_KV_IDX_TOMBSTONE)); + GUF_ASSERT((ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_NULL) || (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_TOMBSTONE)); *key_exists = false; return idx; - } else if (ht->kv_meta_buf[idx].kv_idx == GUF_DICT_KV_IDX_TOMBSTONE) { // 2.) Tombstone. + } else if (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_TOMBSTONE) { // 2.) Tombstone. if (first_tombstone_idx == SIZE_MAX) { first_tombstone_idx = idx; } - if (find_first_free) { - goto end; - } else { - goto probe; - } - } else if (hash == ht->kv_meta_buf[idx].key_hash && GUF_DICT_KEY_T_EQ(key, &GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_dbuf, ht->kv_meta_buf[idx].kv_idx)->key)) { // 3.) Key already exists. + goto probe; + } else if (hash == ht->kv_indices[idx].key_hash && GUF_DICT_KEY_T_EQ(key, &GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, ht->kv_indices[idx].kv_idx)->key)) { // 3.) Key already exists. ht->max_probelen = GUF_MAX((ptrdiff_t)probe_len, ht->max_probelen); *key_exists = true; return idx; } else { // 4.) Have to probe due to hash-collision (idx is already occupied, but not by the key). probe: - idx = (idx + GUF_CAT(GUF_DICT_NAME, _probe_offset)(probe_len)) % ht->kv_meta_buf_cap; + idx = (idx + GUF_CAT(GUF_DICT_NAME, _probe_offset)(probe_len)) % ht->kv_indices_cap; ++probe_len; GUF_ASSERT_RELEASE(probe_len < UINT32_MAX); } } while (idx != start_idx); - end: if (first_tombstone_idx != SIZE_MAX) { // Edge case: No empty slots, but found tombstone. ht->max_probelen = GUF_MAX((ptrdiff_t)probe_len, ht->max_probelen); - GUF_ASSERT(ht->kv_meta_buf[first_tombstone_idx].kv_idx == GUF_DICT_KV_IDX_NULL ); + GUF_ASSERT(ht->kv_indices[first_tombstone_idx].kv_idx == GUF_DICT_KV_IDX_NULL ); *key_exists = false; return first_tombstone_idx; } @@ -291,85 +293,92 @@ GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, { GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); - const ptrdiff_t KV_META_START_CAP = 64; + if (!key || !val) { + guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in function dict_try_insert: key or val argument is NULL")); + return; + } + + const ptrdiff_t KV_META_START_CAP = 64; // Must be a power of two. const ptrdiff_t KV_META_GROWTH_FAC = 2; - guf_allocator *allocator = ht->kv_dbuf.allocator; + guf_allocator *allocator = ht->kv_elems.allocator; - if (ht->kv_meta_buf_cap == 0) { - guf_dict_kv_meta *kv_meta_buf = allocator->alloc(KV_META_START_CAP * sizeof(guf_dict_kv_meta), allocator->ctx); - if (kv_meta_buf == NULL) { + if (ht->kv_indices_cap == 0) { // 1.a) Allocate initial kv-index-buffer. + guf_dict_kv_meta *new_kv_indices = allocator->alloc(KV_META_START_CAP * sizeof(guf_dict_kv_meta), allocator->ctx); + if (new_kv_indices == NULL) { guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: Initial allocation failed")); return; } - ht->kv_meta_buf = kv_meta_buf; - ht->kv_meta_buf_cap = KV_META_START_CAP; - for (ptrdiff_t i = 0; i < ht->kv_meta_buf_cap; ++i) { - kv_meta_buf[i].key_hash = 0; - kv_meta_buf[i].kv_idx = GUF_DICT_KV_IDX_NULL; + ht->kv_indices = new_kv_indices; + ht->kv_indices_cap = KV_META_START_CAP; + for (ptrdiff_t i = 0; i < ht->kv_indices_cap; ++i) { + new_kv_indices[i].kv_idx = GUF_DICT_KV_IDX_NULL; + new_kv_indices[i].key_hash = 0; } - } else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > 0.6) { - - const ptrdiff_t old_size = ht->kv_meta_buf_cap * sizeof(guf_dict_kv_meta); + } else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > 0.6) { // 1.b) Grow kv-index-buffer. + GUF_ASSERT(ht->kv_indices); + const ptrdiff_t old_size = ht->kv_indices_cap * sizeof(guf_dict_kv_meta); ptrdiff_t new_size = 0; if (!guf_size_calc_safe(old_size, KV_META_GROWTH_FAC, &new_size)) { guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: New capacity would overflow)")); return; } - guf_dict_kv_meta *kv_meta_buf = allocator->alloc(new_size, allocator->ctx); - if (kv_meta_buf == NULL) { + guf_dict_kv_meta *new_kv_indices = allocator->alloc(new_size, allocator->ctx); + if (new_kv_indices == NULL) { guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: allocation failed")); return; } - const ptrdiff_t new_kv_meta_cap = ht->kv_meta_buf_cap * KV_META_GROWTH_FAC; + const ptrdiff_t new_kv_meta_cap = ht->kv_indices_cap * KV_META_GROWTH_FAC; for (ptrdiff_t i = 0; i < new_kv_meta_cap; ++i) { - kv_meta_buf[i].key_hash = 0; - kv_meta_buf[i].kv_idx = GUF_DICT_KV_IDX_NULL; + new_kv_indices[i].kv_idx = GUF_DICT_KV_IDX_NULL; + new_kv_indices[i].key_hash = 0; } - guf_dict_kv_meta *old_kv_meta = ht->kv_meta_buf; - ptrdiff_t old_kv_meta_cap = ht->kv_meta_buf_cap; + guf_dict_kv_meta *old_kv_indices = ht->kv_indices; + ptrdiff_t old_kv_indices_cap = ht->kv_indices_cap; - ht->kv_meta_buf = kv_meta_buf; - ht->kv_meta_buf_cap = new_kv_meta_cap; + ht->kv_indices = new_kv_indices; + ht->kv_indices_cap = new_kv_meta_cap; ht->num_tombstones = 0; ptrdiff_t cnt = 0; - for (ptrdiff_t i = 0; i < old_kv_meta_cap; ++i) { // Insert into new buffer. - if (old_kv_meta[i].kv_idx != GUF_DICT_KV_IDX_NULL && old_kv_meta[i].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE) { + for (ptrdiff_t i = 0; i < old_kv_indices_cap; ++i) { // Copy old kv-indices into new kv-index-buffer. + if (old_kv_indices[i].kv_idx != GUF_DICT_KV_IDX_NULL && old_kv_indices[i].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE) { bool key_exists = false; - size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists, false); + size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists); GUF_ASSERT(!key_exists); GUF_ASSERT(new_idx < SIZE_MAX); - ht->kv_meta_buf[new_idx] = old_kv_meta[i]; + ht->kv_indices[new_idx] = old_kv_indices[i]; ++cnt; } } - GUF_ASSERT(cnt == ht->kv_dbuf.size); + GUF_ASSERT(cnt == ht->kv_elems.size); + (void)cnt; - allocator->free(old_kv_meta, old_size, allocator->ctx); + allocator->free(old_kv_indices, old_size, allocator->ctx); GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) < 0.6); } - GUF_ASSERT(ht->kv_meta_buf_cap > ht->kv_dbuf.size); + // 2.) Insert new key-value pair. + GUF_ASSERT_RELEASE(ht->kv_indices_cap > ht->kv_elems.size); bool key_exists = false; - size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists, false); + size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists); if (key_exists) { guf_err_set_or_panic(err, GUF_ERR_ALREADY_EXISTS, GUF_ERR_MSG("in function dict_try_insert: Key already exists")); return; } - GUF_ASSERT(idx < (size_t)ht->kv_meta_buf_cap); + GUF_ASSERT_RELEASE(idx < (size_t)ht->kv_indices_cap); - if (ht->kv_meta_buf[idx].kv_idx == GUF_DICT_KV_IDX_TOMBSTONE) { + if (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_TOMBSTONE) { ht->num_tombstones -= 1; GUF_ASSERT_RELEASE(ht->num_tombstones >= 0); } - ht->kv_meta_buf[idx].key_hash = GUF_DICT_KEY_HASH(key); - ht->kv_meta_buf[idx].kv_idx = ht->kv_dbuf.size; + ht->kv_indices[idx].key_hash = GUF_DICT_KEY_HASH(key); + ht->kv_indices[idx].kv_idx = ht->kv_elems.size; GUF_DICT_KEY_T key_cpy; GUF_DICT_KEY_T *key_cpy_res = NULL; @@ -423,14 +432,181 @@ GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, } GUF_DICT_KV_NAME kv = {.key = key_cpy, .val = val_cpy}; - GUF_CAT(GUF_DICT_KV_DBUF, _try_push_val)(&ht->kv_dbuf, kv, err); + GUF_CAT(GUF_DICT_KV_DBUF, _try_push_val)(&ht->kv_elems, kv, err); } +GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt) +{ + GUF_CAT(GUF_DICT_NAME, _try_insert)(ht, key, val, key_opt, val_opt, NULL); +} + +GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _try_insert_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key, GUF_DICT_VAL_T val, guf_cpy_opt key_opt, guf_cpy_opt val_opt, guf_err *err) +{ + GUF_CAT(GUF_DICT_NAME, _try_insert)(ht, &key, &val, key_opt, val_opt, err); +} + +GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _insert_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key, GUF_DICT_VAL_T val, guf_cpy_opt key_opt, guf_cpy_opt val_opt) +{ + GUF_CAT(GUF_DICT_NAME, _insert)(ht, &key, &val, key_opt, val_opt); +} + + +#ifdef GUF_DICT_VAL_T + GUF_DICT_FN_KEYWORDS GUF_DICT_VAL_T *GUF_CAT(GUF_DICT_NAME, _at)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key) + { + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + if (!key) { + return NULL; + } + + bool key_exists = false; + const size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists); + if (!key_exists) { + return NULL; + } else { + GUF_ASSERT(idx != SIZE_MAX); + GUF_ASSERT((ptrdiff_t)idx < ht->kv_indices_cap); + const size_t kv_idx = ht->kv_indices[idx].kv_idx; + GUF_ASSERT((ptrdiff_t)kv_idx < ht->kv_elems.size); + return &GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx)->val; + } + } + + GUF_DICT_FN_KEYWORDS GUF_DICT_VAL_T *GUF_CAT(GUF_DICT_NAME, _at_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key) + { + return GUF_CAT(GUF_DICT_NAME, _at)(ht, &key); + } + +#endif + +GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _contains)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key) +{ + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + if (!key) { + return false; + } + bool key_exists = false; + const size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists); + (void)idx; + return key_exists; +} + +GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _contains_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key) +{ + return GUF_CAT(GUF_DICT_NAME, _contains)(ht, &key); +} + + +GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _erase)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key) +{ + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + + if (!key || ht->kv_elems.size == 0) { + return false; + } + + bool key_exists = false; + const size_t idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists); + if (!key_exists) { + return false; + } + + GUF_ASSERT((ptrdiff_t)idx < ht->kv_indices_cap); + const size_t kv_idx = ht->kv_indices[idx].kv_idx; + + ht->kv_indices[idx].kv_idx = GUF_DICT_KV_IDX_TOMBSTONE; + ht->kv_indices[idx].key_hash = 0; + + GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx); + GUF_ASSERT(kv); + + GUF_CAT(GUF_DICT_KV_NAME, _free)(kv, NULL); + + if (ht->kv_elems.size > 1 && (ptrdiff_t)kv_idx != ht->kv_elems.size - 1) { // Switch last kv-elem into the erased position and update its kv-index accordingly. + // 1.) Switch kv_elem. + GUF_DICT_KV_NAME *last_kv = GUF_CAT(GUF_DICT_KV_DBUF, _back)(&ht->kv_elems); + GUF_ASSERT(last_kv); + *kv = *last_kv; + // 2.) Update kv_index. + bool last_key_exists = false; + const size_t last_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &kv->key, &last_key_exists); + GUF_ASSERT(last_key_exists && (ptrdiff_t)last_idx < ht->kv_indices_cap); + ht->kv_indices[last_idx].kv_idx = kv_idx; + } + + ht->kv_elems.size--; + ht->num_tombstones++; + return true; +} + +GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _erase_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key) +{ + return GUF_CAT(GUF_DICT_NAME, _erase)(ht, &key); +} + + +/* Iterator functions */ + +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _begin)(const GUF_DICT_NAME* ht) +{ + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + GUF_CAT(GUF_DICT_KV_DBUF, _iter) kv_it = GUF_CAT(GUF_DICT_KV_DBUF, _begin)(&ht->kv_elems); + return (GUF_CAT(GUF_DICT_NAME, _iter)){.ptr = kv_it.ptr, .base = kv_it.base}; +} + +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _end)(const GUF_DICT_NAME* ht) +{ + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + GUF_CAT(GUF_DICT_KV_DBUF, _iter) kv_it = GUF_CAT(GUF_DICT_KV_DBUF, _end)(&ht->kv_elems); + return (GUF_CAT(GUF_DICT_NAME, _iter)){.ptr = kv_it.ptr, .base = kv_it.base}; +} + +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _rbegin)(const GUF_DICT_NAME* ht) +{ + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + GUF_CAT(GUF_DICT_KV_DBUF, _iter) kv_it = GUF_CAT(GUF_DICT_KV_DBUF, _rbegin)(&ht->kv_elems); + return (GUF_CAT(GUF_DICT_NAME, _iter)){.ptr = kv_it.ptr, .base = kv_it.base}; +} + +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _rend)(const GUF_DICT_NAME* ht) +{ + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + GUF_CAT(GUF_DICT_KV_DBUF, _iter) kv_it = GUF_CAT(GUF_DICT_KV_DBUF, _rend)(&ht->kv_elems); + return (GUF_CAT(GUF_DICT_NAME, _iter)){.ptr = kv_it.ptr, .base = kv_it.base}; +} + +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _iter_next)(const GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) it, ptrdiff_t step) +{ + GUF_CAT(GUF_DICT_KV_DBUF, _iter) kv_it = GUF_CAT(GUF_DICT_KV_DBUF, _iter_next)(&ht->kv_elems, it, step); + return (GUF_CAT(GUF_DICT_NAME, _iter)){.ptr = kv_it.ptr, .base = kv_it.base}; +} +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _iter_at_idx)(const GUF_DICT_NAME *ht, ptrdiff_t idx) +{ + GUF_CAT(GUF_DICT_KV_DBUF, _iter) kv_it = GUF_CAT(GUF_DICT_KV_DBUF, _iter_at_idx)(&ht->kv_elems, idx); + return (GUF_CAT(GUF_DICT_NAME, _iter)){.ptr = kv_it.ptr, .base = kv_it.base}; +} + +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _reverse_iter_at_idx)(const GUF_DICT_NAME *ht, ptrdiff_t idx) +{ + GUF_CAT(GUF_DICT_KV_DBUF, _iter) kv_it = GUF_CAT(GUF_DICT_KV_DBUF, _reverse_iter_at_idx)(&ht->kv_elems, idx); + return (GUF_CAT(GUF_DICT_NAME, _iter)){.ptr = kv_it.ptr, .base = kv_it.base}; +} + +GUF_DICT_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _iter_to_idx)(const GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) it) +{ + return GUF_CAT(GUF_DICT_KV_DBUF, _iter_to_idx)(&ht->kv_elems, it); +} + + #endif /* end GUF_IMPL/GUF_IMPL_STATIC */ +#undef GUF_DICT_KV_IDX_NULL +#undef GUF_DICT_KV_IDX_TOMBSTONE + #undef GUF_DICT_NAME #undef GUF_DICT_IS_SET #undef GUF_DICT_PROBE_LINEAR +#undef GUF_DICT_PROBE_QUADRATIC #undef GUF_DICT_KEY_T #undef GUF_DICT_KEY_T_IS_INTEGRAL_TYPE diff --git a/src/guf_hash.h b/src/guf_hash.h index 8b0dd3a..3310817 100644 --- a/src/guf_hash.h +++ b/src/guf_hash.h @@ -36,6 +36,11 @@ GUF_FN_KEYWORDS uint64_t guf_hash64(const void *data, ptrdiff_t num_bytes, uint6 } #endif +// a mod pow2 (with pow2 being a power of two.) +static inline uint32_t guf_mod_pow2_u32(uint32_t a, uint32_t pow2) {return a & (pow2 - 1);} +static inline uint64_t guf_mod_pow2_u64(uint64_t a, uint64_t pow2) {return a & (pow2 - 1);} +static inline guf_hash_size_t guf_mod_pow2_hash(guf_hash_size_t a, guf_hash_size_t pow2) {return a & (pow2 - 1);} + #endif #if defined(GUF_IMPL) || defined(GUF_IMPL_STATIC) @@ -45,7 +50,7 @@ GUF_FN_KEYWORDS uint64_t guf_hash64(const void *data, ptrdiff_t num_bytes, uint6 GUF_FN_KEYWORDS uint32_t guf_hash32(const void *data, ptrdiff_t num_bytes, uint32_t hash) { GUF_ASSERT_RELEASE(data); - GUF_ASSERT_RELEASE(num_bytes > 0); + GUF_ASSERT_RELEASE(num_bytes >= 0); const unsigned char *data_bytes = (const unsigned char*)data; // This does not break strict-aliasing rules I think... const uint32_t FNV_32_PRIME = 16777619ul; for (ptrdiff_t i = 0; i < num_bytes; ++i) { @@ -58,7 +63,7 @@ GUF_FN_KEYWORDS uint32_t guf_hash32(const void *data, ptrdiff_t num_bytes, uint3 GUF_FN_KEYWORDS uint64_t guf_hash64(const void *data, ptrdiff_t num_bytes, uint64_t hash) { GUF_ASSERT_RELEASE(data); - GUF_ASSERT_RELEASE(num_bytes > 0); + GUF_ASSERT_RELEASE(num_bytes >= 0); const unsigned char *data_bytes = (const unsigned char*)data; // This does not break strict-aliasing rules I think... const uint64_t FNV_64_PRIME = 1099511628211ull; for (ptrdiff_t i = 0; i < num_bytes; ++i) { diff --git a/src/guf_test.c b/src/guf_test.c index ed30068..0db5463 100644 --- a/src/guf_test.c +++ b/src/guf_test.c @@ -50,23 +50,35 @@ int main(void) { - dict_cstr_int ht; - dict_cstr_int_try_init(&ht, &guf_allocator_libc, NULL); - - dict_cstr_int_kv kv = {.key = "Hello", .val = 42}; - dict_cstr_int_try_insert(&ht, &kv.key, &kv.val, GUF_CPY_VALUE, GUF_CPY_VALUE, NULL); - - kv = (dict_cstr_int_kv){.key = "World", .val = 64}; - dict_cstr_int_try_insert(&ht, &kv.key, &kv.val, GUF_CPY_VALUE, GUF_CPY_VALUE, NULL); - - dict_cstr_int_free(&ht, NULL); - - printf("libguf test: compiled with C %ld\n", __STDC_VERSION__); guf_allocator test_allocator = guf_allocator_libc; guf_libc_alloc_ctx test_allocator_ctx = {.alloc_type_id = 0, .thread_id = 0, .zero_init = true}; test_allocator.ctx = &test_allocator_ctx; + dict_cstr_int ht; + dict_cstr_int_init(&ht, &test_allocator); + + dict_cstr_int_insert_val_arg(&ht, "Hello", 42, GUF_CPY_VALUE, GUF_CPY_VALUE); + dict_cstr_int_insert_val_arg(&ht, "World", 64, GUF_CPY_VALUE, GUF_CPY_VALUE); + + guf_cstr_const key = "World"; + int *res = dict_cstr_int_at_val_arg(&ht, "World"); + if (res) { + printf("%s: %d\n", key, *res); + } else { + printf("key '%s' not found\n", key); + } + + GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "World")); + GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "Hello")); + GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "hello") == NULL); + GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "") == NULL); + + GUF_ASSERT(dict_cstr_int_contains_val_arg(&ht, "World")); + GUF_ASSERT(dict_cstr_int_contains_val_arg(&ht, "Hello")); + + dict_cstr_int_free(&ht, NULL); + GUF_CNT_LIFETIME_BLOCK(dbuf_float, floats, { floats = dbuf_float_new(&guf_allocator_libc); diff --git a/src/guf_test_dict_impl.c b/src/guf_test_dict_impl.c index 0c6260c..ac33dca 100644 --- a/src/guf_test_dict_impl.c +++ b/src/guf_test_dict_impl.c @@ -7,3 +7,11 @@ #define GUF_DICT_NAME dict_cstr_int #define GUF_IMPL #include "guf_dict.h" + +#define GUF_DICT_KEY_T int32_t +#define GUF_DICT_KEY_HASH int32_hash +#define GUF_DICT_KEY_T_EQ int32_eq +#define GUF_DICT_VAL_T bool +#define GUF_DICT_NAME dict_i32_bool +#define GUF_IMPL +#include "guf_dict.h" diff --git a/src/guf_test_dict_impl.h b/src/guf_test_dict_impl.h index 47ed821..c6df759 100644 --- a/src/guf_test_dict_impl.h +++ b/src/guf_test_dict_impl.h @@ -11,4 +11,21 @@ #define GUF_DICT_NAME dict_cstr_int #include "guf_dict.h" +static inline guf_hash_size_t int32_hash(const int32_t *a) +{ + return guf_hash(a, sizeof(int32_t), GUF_HASH_INIT); +} + +static inline bool int32_eq(const int32_t *a, const int32_t *b) +{ + return *a == *b; +} + +#define GUF_DICT_KEY_T int32_t +#define GUF_DICT_KEY_HASH int32_hash +#define GUF_DICT_KEY_T_EQ int32_eq +#define GUF_DICT_VAL_T bool +#define GUF_DICT_NAME dict_i32_bool +#include "guf_dict.h" + #endif