From 4c35d180e8c1c06249295e1fe4b9dc67e912752d Mon Sep 17 00:00:00 2001 From: jun <83899451+zeichensystem@users.noreply.github.com> Date: Sun, 23 Feb 2025 13:23:23 +0100 Subject: [PATCH] Improve iterator functions --- src/guf_common.h | 7 +- src/guf_dbuf.h | 36 ++++++--- src/guf_dict.h | 167 ++++++++++++++++++++++++++++++++------- src/guf_hash.h | 2 +- src/guf_test.c | 24 ++++-- src/guf_test_dict_impl.c | 2 + src/guf_test_dict_impl.h | 2 + 7 files changed, 187 insertions(+), 53 deletions(-) diff --git a/src/guf_common.h b/src/guf_common.h index e8cd455..0dfea9f 100644 --- a/src/guf_common.h +++ b/src/guf_common.h @@ -36,15 +36,18 @@ typedef enum guf_cpy_opt { // The GUF_CAT/GUF_TOK_CAT indirection is necessary because the ## operation alone does not evaluate the macro arguments. #define GUF_TOK_CAT(a, b) a##b #define GUF_CAT(a, b) GUF_TOK_CAT(a, b) + // See comment above. #define GUF_TOK_STRINGIFY(x) #x #define GUF_STRINGIFY(x) GUF_TOK_STRINGIFY(x) +#define GUF_CNT_NPOS PTRDIFF_MIN + #define GUF_CNT_FOREACH(CNT_PTR, CNT_TYPE, IT_NAME) for (GUF_CAT(CNT_TYPE, _iter) IT_NAME = GUF_CAT(CNT_TYPE, _begin)(CNT_PTR); IT_NAME.ptr != GUF_CAT(CNT_TYPE, _end)(CNT_PTR).ptr; IT_NAME = GUF_CAT(CNT_TYPE, _iter_next)(CNT_PTR, IT_NAME, 1)) #define GUF_CNT_FOREACH_STEP(CNT_PTR, CNT_TYPE, IT_NAME, STEP) for (GUF_CAT(CNT_TYPE, _iter) IT_NAME = GUF_CAT(CNT_TYPE, _begin)(CNT_PTR); IT_NAME.ptr != GUF_CAT(CNT_TYPE, _end)(CNT_PTR).ptr; IT_NAME = GUF_CAT(CNT_TYPE, _iter_next)(CNT_PTR, IT_NAME, STEP)) -#define GUF_CNT_FOREACH_REVERSE(CNT_PTR, CNT_TYPE, IT_NAME) for (GUF_CAT(CNT_TYPE, _iter) IT_NAME = GUF_CAT(CNT_TYPE, _rbegin)(CNT_PTR); IT_NAME.ptr != GUF_CAT(CNT_TYPE, _rend)(CNT_PTR).ptr; it = GUF_CAT(CNT_TYPE, _iter_next)(CNT_PTR, IT_NAME, 1)) -#define GUF_CNT_FOREACH_REVERSE_STEP(CNT_PTR, CNT_TYPE, IT_NAME, STEP) for (GUF_CAT(CNT_TYPE, _iter) IT_NAME = GUF_CAT(CNT_TYPE, _begin)(CNT_PTR); IT_NAME.ptr != GUF_CAT(CNT_TYPE, _end)(CNT_PTR).ptr; it = GUF_CAT(CNT_TYPE, _iter_next)(CNT_PTR, IT_NAME, STEP)) +#define GUF_CNT_FOREACH_REVERSE(CNT_PTR, CNT_TYPE, IT_NAME) for (GUF_CAT(CNT_TYPE, _iter) IT_NAME = GUF_CAT(CNT_TYPE, _rbegin)(CNT_PTR); IT_NAME.ptr != GUF_CAT(CNT_TYPE, _rend)(CNT_PTR).ptr; IT_NAME = GUF_CAT(CNT_TYPE, _iter_next)(CNT_PTR, IT_NAME, 1)) +#define GUF_CNT_FOREACH_REVERSE_STEP(CNT_PTR, CNT_TYPE, IT_NAME, STEP) for (GUF_CAT(CNT_TYPE, _iter) IT_NAME = GUF_CAT(CNT_TYPE, _rbegin)(CNT_PTR); IT_NAME.ptr != GUF_CAT(CNT_TYPE, _rend)(CNT_PTR).ptr; IT_NAME = GUF_CAT(CNT_TYPE, _iter_next)(CNT_PTR, IT_NAME, STEP)) #define GUF_CNT_LIFETIME_BLOCK(GUF_CNT_TYPE, CNT_VARNAME, CODE) do { \ GUF_CNT_TYPE CNT_VARNAME; \ diff --git a/src/guf_dbuf.h b/src/guf_dbuf.h index 587627b..2e0321b 100644 --- a/src/guf_dbuf.h +++ b/src/guf_dbuf.h @@ -152,6 +152,8 @@ GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _begin)(const GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _end)(const GUF_CNT_NAME* dbuf); GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _rbegin)(const GUF_CNT_NAME* dbuf); GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _rend)(const GUF_CNT_NAME* dbuf); +GUF_FN_KEYWORDS bool GUF_CAT(GUF_CNT_NAME, _iter_is_end)(const GUF_CNT_NAME* dbuf, GUF_CAT(GUF_CNT_NAME, _iter) it); + GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _iter_next)(const GUF_CNT_NAME *dbuf, GUF_CAT(GUF_CNT_NAME, _iter) it, ptrdiff_t step); GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _iter_at_idx)(const GUF_CNT_NAME* dbuf, ptrdiff_t idx); GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _reverse_iter_at_idx)(const GUF_CNT_NAME* dbuf, ptrdiff_t idx); @@ -160,10 +162,11 @@ GUF_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_CNT_NAME, _iter_to_idx)(const GUF_CNT_NAME #if defined(GUF_T_IS_INTEGRAL_TYPE) || defined(GUF_T_EQ) GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _find)(GUF_CNT_NAME *dbuf, GUF_CAT(GUF_CNT_NAME, _iter) begin, GUF_CAT(GUF_CNT_NAME, _iter) end, const GUF_T *needle); GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _find_val)(GUF_CNT_NAME *dbuf, GUF_CAT(GUF_CNT_NAME, _iter) begin, GUF_CAT(GUF_CNT_NAME, _iter) end, GUF_T needle_val); -GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _find_if)(GUF_CNT_NAME *dbuf, GUF_CAT(GUF_CNT_NAME, _iter) begin, GUF_CAT(GUF_CNT_NAME, _iter) end, bool (*predicate)(const GUF_T *)); GUF_FN_KEYWORDS bool GUF_CAT(GUF_CNT_NAME, _contains)(GUF_CNT_NAME *dbuf, const GUF_T *needle); GUF_FN_KEYWORDS bool GUF_CAT(GUF_CNT_NAME, _contains_val)(GUF_CNT_NAME *dbuf, GUF_T needle); #endif +GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _find_if)(GUF_CNT_NAME *dbuf, GUF_CAT(GUF_CNT_NAME, _iter) begin, GUF_CAT(GUF_CNT_NAME, _iter) end, bool (*predicate)(const GUF_T *)); + #if defined(GUF_IMPL) || defined(GUF_IMPL_STATIC) @@ -828,6 +831,13 @@ GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _rend)(const }; } +GUF_FN_KEYWORDS bool GUF_CAT(GUF_CNT_NAME, _iter_is_end)(const GUF_CNT_NAME* dbuf, GUF_CAT(GUF_CNT_NAME, _iter) it) +{ + const bool is_reverse_it = it.base != NULL; + const GUF_CAT(GUF_CNT_NAME, _iter) dbuf_end_it = is_reverse_it ? GUF_CAT(GUF_CNT_NAME, _rend)(dbuf) : GUF_CAT(GUF_CNT_NAME, _end)(dbuf); + return it.ptr == dbuf_end_it.ptr; +} + GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _iter_at_idx)(const GUF_CNT_NAME* dbuf, ptrdiff_t idx) { GUF_ASSERT(GUF_CAT(GUF_CNT_NAME, _valid)(dbuf)); @@ -874,21 +884,23 @@ GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _reverse_ite return it; } -static const ptrdiff_t GUF_CAT(GUF_CNT_NAME, _npos) = PTRDIFF_MIN; - -GUF_FN_KEYWORDS ptrdiff_t -GUF_CAT(GUF_CNT_NAME, _iter_to_idx)(const GUF_CNT_NAME* dbuf, GUF_CAT(GUF_CNT_NAME, _iter) it) +GUF_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_CNT_NAME, _iter_to_idx)(const GUF_CNT_NAME* dbuf, GUF_CAT(GUF_CNT_NAME, _iter) it) { GUF_ASSERT(GUF_CAT(GUF_CNT_NAME, _valid)(dbuf)); if ((!it.ptr && !it.base) || !dbuf->data || !dbuf->size) { - return GUF_CAT(GUF_CNT_NAME, _npos); + return GUF_CNT_NPOS; } - bool is_reverse_it = it.base != NULL; + const bool is_reverse_it = it.base != NULL; + const GUF_CAT(GUF_CNT_NAME, _iter) end_it = is_reverse_it ? GUF_CAT(GUF_CNT_NAME, _rend)(dbuf) : GUF_CAT(GUF_CNT_NAME, _end)(dbuf); + + if (it.ptr == end_it.ptr) { + return is_reverse_it ? -1 : dbuf->size; + } if (is_reverse_it) { - return (ptrdiff_t)(it.base - dbuf->data) - 1; + return (ptrdiff_t)(it.base - dbuf->data) - 1; } else { return (ptrdiff_t)(it.ptr - dbuf->data); } @@ -945,7 +957,7 @@ GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _find)(GUF_CN return dbuf_end_it; } - for (GUF_CAT(GUF_CNT_NAME, _iter) it = begin; it.ptr != end.ptr; it = GUF_CAT(GUF_CNT_NAME, _iter_next)(dbuf, it, 1)) { + for (GUF_CAT(GUF_CNT_NAME, _iter) it = begin; it.ptr != end.ptr && it.ptr != NULL; it = GUF_CAT(GUF_CNT_NAME, _iter_next)(dbuf, it, 1)) { #ifdef GUF_T_EQ if (GUF_T_EQ(it.ptr, needle)) { return it; @@ -956,7 +968,7 @@ GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _find)(GUF_CN } #endif } - return GUF_CAT(GUF_CNT_NAME, _end)(dbuf); + return dbuf_end_it; } GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _find_val)(GUF_CNT_NAME *dbuf, GUF_CAT(GUF_CNT_NAME, _iter) begin, GUF_CAT(GUF_CNT_NAME, _iter) end, GUF_T needle_val) @@ -975,6 +987,7 @@ GUF_FN_KEYWORDS bool GUF_CAT(GUF_CNT_NAME, _contains_val)(GUF_CNT_NAME *dbuf, GU { return GUF_CAT(GUF_CNT_NAME, _contains)(dbuf, &needle); } +#endif GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _find_if)(GUF_CNT_NAME *dbuf, GUF_CAT(GUF_CNT_NAME, _iter) begin, GUF_CAT(GUF_CNT_NAME, _iter) end, bool (*predicate)(const GUF_T *)) { @@ -992,14 +1005,13 @@ GUF_FN_KEYWORDS GUF_CAT(GUF_CNT_NAME, _iter) GUF_CAT(GUF_CNT_NAME, _find_if)(GUF return dbuf_end_it; } - for (GUF_CAT(GUF_CNT_NAME, _iter) it = begin; it.ptr != end.ptr; GUF_CAT(GUF_CNT_NAME, _iter_next)(dbuf, it, 1)) { + for (GUF_CAT(GUF_CNT_NAME, _iter) it = begin; it.ptr != end.ptr && it.ptr != NULL; GUF_CAT(GUF_CNT_NAME, _iter_next)(dbuf, it, 1)) { if (predicate(it.ptr)) { return it; } } return GUF_CAT(GUF_CNT_NAME, _end)(dbuf); } -#endif #endif /* end #ifdef GUF_IMPL */ diff --git a/src/guf_dict.h b/src/guf_dict.h index 71f0e19..29151d7 100755 --- a/src/guf_dict.h +++ b/src/guf_dict.h @@ -23,12 +23,16 @@ #define GUF_DICT_H #include "guf_common.h" #include "guf_alloc.h" - #include "guf_hash.h" - typedef struct guf_dict_kv_meta { - guf_hash_size_t kv_idx; // index into the key-value buffer. TODO: uint64 consisting of hash_fragment + idx? - guf_hash_size_t key_hash; - } guf_dict_kv_meta; + typedef struct guf_dict_kv_meta_32 { + uint32_t kv_idx; // Index into the kv_elems dbuf. + uint32_t key_hash; + } guf_dict_kv_meta_32; + + typedef struct guf_dict_kv_meta_64 { + uint64_t kv_idx; // Index into the kv_elems dbuf. + uint64_t key_hash; + } guf_dict_kv_meta_64; #endif #ifndef GUF_DICT_KEY_T @@ -47,6 +51,19 @@ #define GUF_DICT_IS_SET #endif +#ifdef GUF_DICT_32_BIT + #define GUF_DICT_SIZE_T uint32_t + #define GUF_DICT_KV_META_T guf_dict_kv_meta_32 + #define GUF_DICT_KV_IDX_NULL UINT32_MAX +#else + #define GUF_DICT_SIZE_T uint64_t + #define GUF_DICT_KV_META_T guf_dict_kv_meta_64 + #define GUF_DICT_KV_IDX_NULL UINT64_MAX +#endif + +#define GUF_DICT_KV_IDX_TOMBSTONE (GUF_DICT_KV_IDX_NULL - 1) +#define GUF_DICT_MAX_SIZE (GUF_DICT_KV_IDX_TOMBSTONE - 1) + // #ifndef GUF_DICT_KEY_LOOKUP_T // #define GUF_DICT_KEY_LOOKUP_T GUF_DICT_KEY_T // #else @@ -85,10 +102,10 @@ typedef struct GUF_DICT_KV_NAME { #include "guf_dbuf.h" typedef struct GUF_DICT_NAME { - GUF_DICT_KV_DBUF kv_elems; // The actual key-value elements (contiguous in memory) - guf_dict_kv_meta *kv_indices; // Indices into the kv_elems dbuf. + GUF_DICT_KV_DBUF kv_elems; // The actual key-value elements (contiguous in memory) + GUF_DICT_KV_META_T *kv_indices; // Indices into the kv_elems dbuf. ptrdiff_t kv_indices_cap, num_tombstones; - ptrdiff_t max_probelen; // Stores the worst-case probelen. + ptrdiff_t max_probelen; // Stores the worst-case probe-length (for performance measurement) } GUF_DICT_NAME; typedef GUF_CAT(GUF_DICT_KV_DBUF, _iter) GUF_CAT(GUF_DICT_NAME, _iter); @@ -117,27 +134,34 @@ GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _contains)(GUF_DICT_NAME *ht, c GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _contains_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T key); GUF_DICT_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _size)(const GUF_DICT_NAME *ht); +GUF_DICT_FN_KEYWORDS double GUF_CAT(GUF_DICT_NAME, _load_factor)(const GUF_DICT_NAME *ht); /* Iterator functions */ GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _begin)(const GUF_DICT_NAME* ht); GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _end)(const GUF_DICT_NAME* ht); GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _rbegin)(const GUF_DICT_NAME* ht); GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _rend)(const GUF_DICT_NAME* ht); +GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _iter_is_end)(const GUF_DICT_NAME* ht, GUF_CAT(GUF_DICT_NAME, _iter) it); GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _iter_next)(const GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) it, ptrdiff_t step); GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _iter_at_idx)(const GUF_DICT_NAME *ht, ptrdiff_t idx); GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _reverse_iter_at_idx)(const GUF_DICT_NAME *ht, ptrdiff_t idx); GUF_DICT_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _iter_to_idx)(const GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) it); +#if defined(GUF_DICT_VAL_T) && (defined(GUF_DICT_VAL_T_EQ) || defined(GUF_DICT_VAL_T_IS_INTEGRAL_TYPE)) + GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val)(GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) begin, GUF_CAT(GUF_DICT_NAME, _iter) end, const GUF_DICT_VAL_T *needle_val); + GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val_val_arg)(GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) begin, GUF_CAT(GUF_DICT_NAME, _iter) end, GUF_DICT_VAL_T needle_val); +#endif +#if defined(GUF_DICT_VAL_T) + GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val_if)(GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) begin, GUF_CAT(GUF_DICT_NAME, _iter) end, bool (*predicate)(const GUF_DICT_VAL_T *)); +#endif + // #define GUF_DICT_IMPL /* DEBUGGGGGGGGG */ #if defined(GUF_DICT_IMPL) || defined(GUF_DICT_IMPL_STATIC) #include "guf_assert.h" -#define GUF_DICT_KV_IDX_NULL GUF_HASH_MAX -#define GUF_DICT_KV_IDX_TOMBSTONE (GUF_HASH_MAX - 1) - static inline void GUF_CAT(GUF_DICT_KV_NAME, _free)(GUF_DICT_KV_NAME *kv, void *ctx) { (void)ctx; @@ -158,12 +182,13 @@ static inline void GUF_CAT(GUF_DICT_KV_NAME, _free)(GUF_DICT_KV_NAME *kv, void * #define GUF_IMPL #include "guf_dbuf.h" -static inline double GUF_CAT(GUF_DICT_NAME, _load_factor)(const GUF_DICT_NAME *ht) +GUF_DICT_FN_KEYWORDS double GUF_CAT(GUF_DICT_NAME, _load_factor)(const GUF_DICT_NAME *ht) { if (ht->kv_indices_cap == 0) { return 1; } ptrdiff_t occupied_count = ht->kv_elems.size + ht->num_tombstones; + GUF_ASSERT(occupied_count <= ht->kv_indices_cap); return (double)occupied_count / (double)ht->kv_indices_cap; } @@ -211,7 +236,7 @@ GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _free)(GUF_DICT_NAME *ht, void guf_allocator *allocator = ht->kv_elems.allocator; if (ht->kv_indices) { - allocator->free(ht->kv_indices, ht->kv_indices_cap * sizeof(guf_dict_kv_meta), allocator->ctx); + allocator->free(ht->kv_indices, ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T), allocator->ctx); ht->kv_indices = NULL; ht->kv_indices_cap = 0; } @@ -244,16 +269,19 @@ static inline size_t GUF_CAT(GUF_DICT_NAME, _probe_offset)(size_t probe_len) static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DICT_KEY_T *key, bool *key_exists) { - if (ht->kv_indices_cap == 0) { + if (ht->kv_indices_cap <= 0) { + *key_exists = false; return SIZE_MAX; } - const guf_hash_size_t hash = GUF_DICT_KEY_HASH(key); - size_t idx = (size_t)guf_mod_pow2_hash(hash, ht->kv_indices_cap); + const GUF_DICT_SIZE_T hash = GUF_DICT_KEY_HASH(key); + + #define GUF_MOD_CAP(a) ((size_t)(a) & (size_t)(ht->kv_indices_cap - 1)) // a % ht->kv_indices_cap (kv_indices_cap must be a power of two > 0) + + size_t idx = GUF_MOD_CAP(hash); const size_t start_idx = idx; - size_t probe_len = 1; size_t first_tombstone_idx = SIZE_MAX; + size_t probe_len = 1; do { - // printf("idx : %zu %td\n", idx, ht->kv_meta_buf_cap); if (ht->kv_indices[idx].kv_idx == GUF_DICT_KV_IDX_NULL) { // 1.) Empty. if (first_tombstone_idx != SIZE_MAX) { idx = first_tombstone_idx; @@ -273,20 +301,21 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC return idx; } else { // 4.) Have to probe due to hash-collision (idx is already occupied, but not by the key). probe: - idx = (idx + GUF_CAT(GUF_DICT_NAME, _probe_offset)(probe_len)) % ht->kv_indices_cap; + idx = GUF_MOD_CAP(idx + GUF_CAT(GUF_DICT_NAME, _probe_offset)(probe_len)); ++probe_len; - GUF_ASSERT_RELEASE(probe_len < UINT32_MAX); + GUF_ASSERT((ptrdiff_t)probe_len <= (ht->kv_elems.size + ht->num_tombstones)); } } while (idx != start_idx); + *key_exists = false; if (first_tombstone_idx != SIZE_MAX) { // Edge case: No empty slots, but found tombstone. ht->max_probelen = GUF_MAX((ptrdiff_t)probe_len, ht->max_probelen); - GUF_ASSERT(ht->kv_indices[first_tombstone_idx].kv_idx == GUF_DICT_KV_IDX_NULL ); - *key_exists = false; + GUF_ASSERT(ht->kv_indices[first_tombstone_idx].kv_idx == GUF_DICT_KV_IDX_NULL); return first_tombstone_idx; + } else { // Failed to find an idx. + return SIZE_MAX; } - *key_exists = false; - return SIZE_MAX; // Failed to find an idx. + #undef GUF_MOD_CAP } GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt, guf_err *err) @@ -297,14 +326,18 @@ GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in function dict_try_insert: key or val argument is NULL")); return; } + if ((size_t)ht->kv_elems.size == GUF_DICT_MAX_SIZE) { + guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in function dict_try_insert: dict has reached its max size (UINT64_MAX - 2 or UINT32_MAX - 2)")); + return; + } - const ptrdiff_t KV_META_START_CAP = 64; // Must be a power of two. - const ptrdiff_t KV_META_GROWTH_FAC = 2; + const ptrdiff_t KV_META_START_CAP = 64; // Must be a power of two > 0. + const ptrdiff_t KV_META_GROWTH_FAC = 2; // Must be a power of two > 0. guf_allocator *allocator = ht->kv_elems.allocator; if (ht->kv_indices_cap == 0) { // 1.a) Allocate initial kv-index-buffer. - guf_dict_kv_meta *new_kv_indices = allocator->alloc(KV_META_START_CAP * sizeof(guf_dict_kv_meta), allocator->ctx); + GUF_DICT_KV_META_T *new_kv_indices = allocator->alloc(KV_META_START_CAP * sizeof(GUF_DICT_KV_META_T), allocator->ctx); if (new_kv_indices == NULL) { guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: Initial allocation failed")); return; @@ -317,13 +350,13 @@ GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, } } else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > 0.6) { // 1.b) Grow kv-index-buffer. GUF_ASSERT(ht->kv_indices); - const ptrdiff_t old_size = ht->kv_indices_cap * sizeof(guf_dict_kv_meta); + const ptrdiff_t old_size = ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T); ptrdiff_t new_size = 0; if (!guf_size_calc_safe(old_size, KV_META_GROWTH_FAC, &new_size)) { guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: New capacity would overflow)")); return; } - guf_dict_kv_meta *new_kv_indices = allocator->alloc(new_size, allocator->ctx); + GUF_DICT_KV_META_T *new_kv_indices = allocator->alloc(new_size, allocator->ctx); if (new_kv_indices == NULL) { guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: allocation failed")); return; @@ -335,7 +368,7 @@ GUF_DICT_FN_KEYWORDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, new_kv_indices[i].key_hash = 0; } - guf_dict_kv_meta *old_kv_indices = ht->kv_indices; + GUF_DICT_KV_META_T *old_kv_indices = ht->kv_indices; ptrdiff_t old_kv_indices_cap = ht->kv_indices_cap; ht->kv_indices = new_kv_indices; @@ -575,6 +608,13 @@ GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _rend) return (GUF_CAT(GUF_DICT_NAME, _iter)){.ptr = kv_it.ptr, .base = kv_it.base}; } +GUF_DICT_FN_KEYWORDS bool GUF_CAT(GUF_DICT_NAME, _iter_is_end)(const GUF_DICT_NAME* ht, GUF_CAT(GUF_DICT_NAME, _iter) it) +{ + const bool is_reverse_it = it.base != NULL; + const GUF_CAT(GUF_DICT_NAME, _iter) dbuf_end_it = is_reverse_it ? GUF_CAT(GUF_DICT_NAME, _rend)(ht) : GUF_CAT(GUF_DICT_NAME, _end)(ht); + return it.ptr == dbuf_end_it.ptr; +} + GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _iter_next)(const GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) it, ptrdiff_t step) { GUF_CAT(GUF_DICT_KV_DBUF, _iter) kv_it = GUF_CAT(GUF_DICT_KV_DBUF, _iter_next)(&ht->kv_elems, it, step); @@ -597,11 +637,77 @@ GUF_DICT_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _iter_to_idx)(const GUF_DI return GUF_CAT(GUF_DICT_KV_DBUF, _iter_to_idx)(&ht->kv_elems, it); } +#if defined(GUF_DICT_VAL_T) && (defined(GUF_DICT_VAL_T_EQ) || defined(GUF_DICT_VAL_T_IS_INTEGRAL_TYPE)) +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val)(GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) begin, GUF_CAT(GUF_DICT_NAME, _iter) end, const GUF_DICT_VAL_T *needle) +{ + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + GUF_ASSERT_RELEASE(needle); + + const bool is_reverse_it = begin.base != NULL; + GUF_ASSERT_RELEASE(is_reverse_it == (end.base != NULL)); // begin and end must be the same iterator type. + const GUF_CAT(GUF_DICT_NAME, _iter) dbuf_end_it = is_reverse_it ? GUF_CAT(GUF_DICT_NAME, _rend)(ht) : GUF_CAT(GUF_DICT_NAME, _end)(ht); + + if (!ht->kv_elems.data || !ht->kv_elems.size || (!begin.ptr && !begin.base) || (!end.ptr && !end.base)) { + return dbuf_end_it; + } + if ((begin.ptr == dbuf_end_it.ptr) || (!is_reverse_it && begin.ptr >= end.ptr) || (is_reverse_it && begin.base <= end.base)) { + return dbuf_end_it; + } + + for (GUF_CAT(GUF_DICT_NAME, _iter) it = begin; it.ptr != end.ptr && it.ptr != NULL; it = GUF_CAT(GUF_DICT_NAME, _iter_next)(ht, it, 1)) { + #ifdef GUF_DICT_VAL_T_EQ + if (GUF_DICT_VAL_T_EQ(&(it.ptr->val), needle)) { + return it; + } + #else + if (it.ptr->val == *needle) { + return it; + } + #endif + } + return dbuf_end_it; +} + +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val_val_arg)(GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) begin, GUF_CAT(GUF_DICT_NAME, _iter) end, GUF_DICT_VAL_T needle) +{ + return GUF_CAT(GUF_DICT_NAME, _find_val)(ht, begin, end, &needle); +} +#endif + +#if defined(GUF_DICT_VAL_T) +GUF_DICT_FN_KEYWORDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val_if)(GUF_DICT_NAME *ht, GUF_CAT(GUF_DICT_NAME, _iter) begin, GUF_CAT(GUF_DICT_NAME, _iter) end, bool (*predicate)(const GUF_DICT_VAL_T *)) +{ + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + GUF_ASSERT_RELEASE(predicate); + + const bool is_reverse_it = begin.base != NULL; + GUF_ASSERT_RELEASE(is_reverse_it == (end.base != NULL)); // begin and end must be the same iterator type. + const GUF_CAT(GUF_DICT_NAME, _iter) dbuf_end_it = is_reverse_it ? GUF_CAT(GUF_DICT_NAME, _rend)(ht) : GUF_CAT(GUF_DICT_NAME, _end)(ht); + + if (!ht->kv_elems.data || !ht->kv_elems.size || (!begin.ptr && !begin.base) || (!end.ptr && !end.base)) { + return dbuf_end_it; + } + if ((begin.ptr == dbuf_end_it.ptr) || (!is_reverse_it && begin.ptr >= end.ptr) || (is_reverse_it && begin.base <= end.base)) { + return dbuf_end_it; + } + + for (GUF_CAT(GUF_DICT_NAME, _iter) it = begin; it.ptr != end.ptr && it.ptr != NULL; it = GUF_CAT(GUF_DICT_NAME, _iter_next)(ht, it, 1)) { + if (predicate(&(it.ptr->val))) { + return it; + } + } + return dbuf_end_it; +} +#endif #endif /* end GUF_IMPL/GUF_IMPL_STATIC */ #undef GUF_DICT_KV_IDX_NULL #undef GUF_DICT_KV_IDX_TOMBSTONE +#undef GUF_DICT_32_BIT +#undef GUF_DICT_SIZE_T +#undef GUF_DICT_MAX_SIZE +#undef GUF_DICT_KV_META_T #undef GUF_DICT_NAME #undef GUF_DICT_IS_SET @@ -618,6 +724,7 @@ GUF_DICT_FN_KEYWORDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _iter_to_idx)(const GUF_DI #undef GUF_DICT_KEY_HASH #undef GUF_DICT_VAL_T +#undef GUF_DICT_VAL_T_IS_INTEGRAL_TYPE #undef GUF_DICT_VAL_T_EQ #undef GUF_DICT_VAL_T_FREE #undef GUF_DICT_VAL_T_CMP diff --git a/src/guf_hash.h b/src/guf_hash.h index 3310817..beabac8 100644 --- a/src/guf_hash.h +++ b/src/guf_hash.h @@ -36,7 +36,7 @@ GUF_FN_KEYWORDS uint64_t guf_hash64(const void *data, ptrdiff_t num_bytes, uint6 } #endif -// a mod pow2 (with pow2 being a power of two.) +// Calculate a mod pow2 (with pow2 being a power of two != 0) static inline uint32_t guf_mod_pow2_u32(uint32_t a, uint32_t pow2) {return a & (pow2 - 1);} static inline uint64_t guf_mod_pow2_u64(uint64_t a, uint64_t pow2) {return a & (pow2 - 1);} static inline guf_hash_size_t guf_mod_pow2_hash(guf_hash_size_t a, guf_hash_size_t pow2) {return a & (pow2 - 1);} diff --git a/src/guf_test.c b/src/guf_test.c index bcfd054..6d61b1c 100644 --- a/src/guf_test.c +++ b/src/guf_test.c @@ -51,7 +51,7 @@ int main(void) { printf("libguf test: compiled with C %ld\n", __STDC_VERSION__); - + guf_allocator test_allocator = guf_allocator_libc; guf_libc_alloc_ctx test_allocator_ctx = {.alloc_type_id = 0, .thread_id = 0, .zero_init = true}; test_allocator.ctx = &test_allocator_ctx; @@ -75,16 +75,24 @@ int main(void) printf("key '%s' not found\n", key); } - // GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "World")); - // GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "Hello")); - // GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "hello") == NULL); - // GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "") == NULL); + GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "World")); + GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "Hello")); + GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "hello") == NULL); + GUF_ASSERT(dict_cstr_int_at_val_arg(&ht, "") == NULL); - // GUF_ASSERT(dict_cstr_int_contains_val_arg(&ht, "World")); - // GUF_ASSERT(dict_cstr_int_contains_val_arg(&ht, "Hello")); + GUF_ASSERT(dict_cstr_int_contains_val_arg(&ht, "World")); + GUF_ASSERT(dict_cstr_int_contains_val_arg(&ht, "Hello")); + + const int ht_needle_val = 64; + const dict_cstr_int_iter ht_it = dict_cstr_int_find_val(&ht, dict_cstr_int_begin(&ht), dict_cstr_int_end(&ht), &ht_needle_val); + + if (!dict_cstr_int_iter_is_end(&ht, ht_it)) { + printf("found value %d (key %s)\n", ht_needle_val, ht_it.ptr->key); + } dict_cstr_int_free(&ht, NULL); + GUF_CNT_LIFETIME_BLOCK(dbuf_float, floats, { floats = dbuf_float_new(&guf_allocator_libc); @@ -118,7 +126,7 @@ int main(void) dbuf_heap_cstr_iter beg = dbuf_heap_cstr_begin(&strings); dbuf_heap_cstr_iter end = dbuf_heap_cstr_end(&strings); dbuf_heap_cstr_iter fnd_it = dbuf_heap_cstr_find(&strings, beg, end, &findme); - if (fnd_it.ptr != dbuf_heap_cstr_end(&strings).ptr) { + if (!dbuf_heap_cstr_iter_is_end(&strings, fnd_it)) { printf("%s found in range [%td, %td) at idx %td\n", findme, dbuf_heap_cstr_iter_to_idx(&strings, beg), dbuf_heap_cstr_iter_to_idx(&strings, end), dbuf_heap_cstr_iter_to_idx(&strings, fnd_it)); } else { printf("%s not found in range [%td, %td) at idx %td\n", findme, dbuf_heap_cstr_iter_to_idx(&strings, beg), dbuf_heap_cstr_iter_to_idx(&strings, end), dbuf_heap_cstr_iter_to_idx(&strings, fnd_it)); diff --git a/src/guf_test_dict_impl.c b/src/guf_test_dict_impl.c index ac33dca..96c59c4 100644 --- a/src/guf_test_dict_impl.c +++ b/src/guf_test_dict_impl.c @@ -4,6 +4,7 @@ #define GUF_DICT_KEY_T_EQ guf_cstr_const_eq #define GUF_DICT_KEY_HASH guf_cstr_const_hash #define GUF_DICT_VAL_T int +#define GUF_DICT_VAL_T_IS_INTEGRAL_TYPE #define GUF_DICT_NAME dict_cstr_int #define GUF_IMPL #include "guf_dict.h" @@ -12,6 +13,7 @@ #define GUF_DICT_KEY_HASH int32_hash #define GUF_DICT_KEY_T_EQ int32_eq #define GUF_DICT_VAL_T bool +#define GUF_DICT_VAL_T_IS_INTEGRAL_TYPE #define GUF_DICT_NAME dict_i32_bool #define GUF_IMPL #include "guf_dict.h" diff --git a/src/guf_test_dict_impl.h b/src/guf_test_dict_impl.h index c6df759..c8245ca 100644 --- a/src/guf_test_dict_impl.h +++ b/src/guf_test_dict_impl.h @@ -8,6 +8,7 @@ #define GUF_DICT_KEY_HASH guf_cstr_const_hash #define GUF_DICT_KEY_T_EQ guf_cstr_const_eq #define GUF_DICT_VAL_T int +#define GUF_DICT_VAL_T_IS_INTEGRAL_TYPE #define GUF_DICT_NAME dict_cstr_int #include "guf_dict.h" @@ -25,6 +26,7 @@ static inline bool int32_eq(const int32_t *a, const int32_t *b) #define GUF_DICT_KEY_HASH int32_hash #define GUF_DICT_KEY_T_EQ int32_eq #define GUF_DICT_VAL_T bool +#define GUF_DICT_VAL_T_IS_INTEGRAL_TYPE #define GUF_DICT_NAME dict_i32_bool #include "guf_dict.h"