From 21d1b04e6b02db0a9618a37dd3c15deeff907350 Mon Sep 17 00:00:00 2001 From: jun <83899451+zeichensystem@users.noreply.github.com> Date: Thu, 20 Mar 2025 20:29:21 +0100 Subject: [PATCH] Add changeable load factor --- src/guf_common.h | 8 ++++ src/guf_dict.h | 98 ++++++++++++++++++++++++++++++++++-------------- src/guf_str.h | 2 +- 3 files changed, 79 insertions(+), 29 deletions(-) diff --git a/src/guf_common.h b/src/guf_common.h index 08ac3cd..fbeb92c 100644 --- a/src/guf_common.h +++ b/src/guf_common.h @@ -26,6 +26,14 @@ #define GUF_HASH_32_BIT #endif +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) + #define GUF_STDC_AT_LEAST_C11 +#endif + +#if (defined(__cplusplus) && __cplusplus >= 201103L) + #define GUF_STDCPP_AT_LEAST_CPP11 +#endif + /* // Copy- and move constructors: GUF_T_COPY: GUF_T *(*copy)(GUF_T *dst, const GUF_T *src, void *ctx); diff --git a/src/guf_dict.h b/src/guf_dict.h index 134477c..b0d51e9 100755 --- a/src/guf_dict.h +++ b/src/guf_dict.h @@ -9,6 +9,8 @@ #include "guf_common.h" #include "guf_alloc.h" #include "guf_hash.h" + // MAX_LOAD_FACTOR must be in range [0.1, 0.9] + #define GUF_DICT_MAX_LOAD_FACTOR_DEFAULT 0.666 #endif #ifndef GUF_DICT_KEY_T @@ -91,6 +93,14 @@ #define GUF_DICT_KV_NAME GUF_CAT(GUF_DICT_NAME, _kv) #endif +#if defined(GUF_DICT_MAX_LOAD_FACTOR) + #if defined(GUF_STDC_AT_LEAST_C11) || defined(GUF_STDCPP_AT_LEAST_CPP11) + static_assert(GUF_DICT_MAX_LOAD_FACTOR >= 0.1 && GUF_DICT_MAX_LOAD_FACTOR <= 0.9, "guf_dict.h: GUF_DICT_MAX_LOAD_FACTOR must be in range [0.1, 0.9]"); + #endif +#else + #define GUF_DICT_MAX_LOAD_FACTOR GUF_DICT_MAX_LOAD_FACTOR_DEFAULT +#endif + #define GUF_DICT_KV_DBUF GUF_CAT(GUF_DICT_KV_NAME, _dbuf) // - GUF_T_COPY: cpy function with signature GUF_T *copy(GUF_T *dst, const GUF_T *src, void *ctx) (default: copy by value) @@ -148,6 +158,7 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _contains_val_arg)(GUF_DICT_NAME *ht, GUF_DICT_KWRDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _size)(const GUF_DICT_NAME *ht); GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor)(const GUF_DICT_NAME *ht); +GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor_without_tombstones)(const GUF_DICT_NAME *ht); /* Iterator functions */ GUF_DICT_KWRDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _begin)(const GUF_DICT_NAME* ht); @@ -223,8 +234,19 @@ GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor)(const GUF_DICT_NAME * return (double)occupied_count / (double)ht->kv_indices_cap; } +GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor_without_tombstones)(const GUF_DICT_NAME *ht) +{ + if (ht->kv_indices_cap == 0) { + return 1; + } + GUF_ASSERT(ht->kv_elems.size <= ht->kv_indices_cap); + return (double)ht->kv_elems.size / (double)ht->kv_indices_cap; +} + + GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init)(GUF_DICT_NAME *ht, guf_allocator *alloc, guf_err *err) { + GUF_ASSERT(GUF_DICT_MAX_LOAD_FACTOR >= 0.1 && GUF_DICT_MAX_LOAD_FACTOR <= 0.9); if (!ht || !alloc) { guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in dict_try_init: ht or alloc NULL")); return NULL; @@ -440,15 +462,44 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC #undef GUF_MOD_CAP } +static void GUF_CAT(GUF_DICT_NAME, _reinsert_elems_)(GUF_DICT_NAME *ht) +{ + GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + GUF_ASSERT_RELEASE(ht->kv_indices && ht->kv_indices_cap > 0); + + for (ptrdiff_t i = 0; i < ht->kv_indices_cap; ++i) { + ht->kv_indices[i] = GUF_DICT_KV_META_IDX_NULL; + } + ht->num_tombstones = 0; + + GUF_ASSERT((size_t)ht->kv_elems.size < GUF_DICT_KV_META_IDX_MAX); + for (ptrdiff_t kv_idx = 0; kv_idx < ht->kv_elems.size; ++kv_idx) { + const GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx); + GUF_ASSERT(kv); + bool key_exists = false; + const GUF_DICT_HASH_T key_hash = GUF_DICT_KEY_HASH(&kv->key); + const size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &kv->key, key_hash, &key_exists); + GUF_ASSERT(!key_exists); + GUF_ASSERT(new_idx < SIZE_MAX && new_idx < (size_t)ht->kv_indices_cap); + GUF_ASSERT((GUF_DICT_HASH_T_GET_HASHFRAG(key_hash) & (GUF_DICT_KV_META_T)kv_idx) == 0); + ht->kv_indices[new_idx] = GUF_DICT_HASH_T_GET_HASHFRAG(key_hash) | (GUF_DICT_KV_META_T)kv_idx; + } +} + +GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _rehash_without_resize)(GUF_DICT_NAME *ht) +{ + GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + GUF_CAT(GUF_DICT_NAME, _reinsert_elems_)(ht); + GUF_ASSERT(ht->num_tombstones == 0); +} static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, guf_err *err) { - GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); - #ifdef GUF_DICT_PROBE_LINEAR - const double MAX_LOAD_FAC = 0.6; - #else - const double MAX_LOAD_FAC = 0.5; - #endif + GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + + const double MAX_LOAD_FAC = GUF_DICT_MAX_LOAD_FACTOR; + GUF_ASSERT(MAX_LOAD_FAC >= 0.1 && MAX_LOAD_FAC <= 0.9); + const ptrdiff_t KV_META_START_CAP = 32; // Must be a power of two > 0. const ptrdiff_t KV_META_GROWTH_FAC = (ht->kv_indices_cap <= 128) ? 4 : 2; // Must be a power of two > 1. @@ -465,7 +516,7 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu for (ptrdiff_t i = 0; i < ht->kv_indices_cap; ++i) { new_kv_indices[i] = GUF_DICT_KV_META_IDX_NULL; } - } else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > MAX_LOAD_FAC) { // 1.b) Grow kv-index-buffer if necessary. + } else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) >= MAX_LOAD_FAC) { // 1.b) Grow kv-index-buffer if necessary. GUF_ASSERT(ht->kv_indices); GUF_ASSERT((size_t)ht->kv_indices_cap <= GUF_ALLOC_MAX_CAPACITY(GUF_DICT_KV_META_T)); const ptrdiff_t old_size_bytes = (size_t)ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T); @@ -474,6 +525,12 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu const size_t MAX_SIZE_BYTES = (size_t)GUF_ALLOC_MAX_BYTES(GUF_DICT_KV_META_T); const size_t new_size_bytes_test = (size_t)old_size_bytes * (size_t)KV_META_GROWTH_FAC; if (guf_mul_is_overflow_size_t(old_size_bytes, KV_META_GROWTH_FAC) || new_size_bytes_test > MAX_SIZE_BYTES) { // Handle overflow (Remember: capacities have to be powers of two) + if (GUF_CAT(GUF_DICT_NAME, _load_factor_without_tombstones)(ht) < MAX_LOAD_FAC) { // Check if just removing tombstones without resizing would decrease the load factor enough. + GUF_CAT(GUF_DICT_NAME, _reinsert_elems_)(ht); + GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) < MAX_LOAD_FAC); + guf_err_set_if_not_null(err, GUF_ERR_NONE); + return; + } guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: New kv_indices_capacity would overflow)")); return; } else { @@ -494,29 +551,13 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu ht->kv_indices_cap = ht->kv_indices_cap * KV_META_GROWTH_FAC;; GUF_ASSERT(guf_is_pow2_size_t(ht->kv_indices_cap)); GUF_ASSERT(new_size_bytes / sizeof(GUF_DICT_KV_META_T) == ht->kv_indices_cap); - ht->num_tombstones = 0; // ht->max_probelen = 0; - - for (ptrdiff_t i = 0; i < ht->kv_indices_cap; ++i) { - ht->kv_indices[i] = GUF_DICT_KV_META_IDX_NULL; - } - - GUF_ASSERT((size_t)ht->kv_elems.size < GUF_DICT_KV_META_IDX_MAX); - for (ptrdiff_t kv_idx = 0; kv_idx < ht->kv_elems.size; ++kv_idx) { // Re-insert keys. - const GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx); - GUF_ASSERT(kv); - bool key_exists = false; - const GUF_DICT_HASH_T key_hash = GUF_DICT_KEY_HASH(&kv->key); // TODO: might be expensive... - const size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &kv->key, key_hash, &key_exists); - GUF_ASSERT(!key_exists); - GUF_ASSERT(new_idx < SIZE_MAX && new_idx < (size_t)ht->kv_indices_cap); - GUF_ASSERT((GUF_DICT_HASH_T_GET_HASHFRAG(key_hash) & (GUF_DICT_KV_META_T)kv_idx) == 0); - ht->kv_indices[new_idx] = GUF_DICT_HASH_T_GET_HASHFRAG(key_hash) | (GUF_DICT_KV_META_T)kv_idx; - } + GUF_CAT(GUF_DICT_NAME, _reinsert_elems_)(ht); + GUF_ASSERT(ht->num_tombstones == 0); } guf_err_set_if_not_null(err, GUF_ERR_NONE); - GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) <= MAX_LOAD_FAC); + GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) < MAX_LOAD_FAC); } GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt, guf_err *err) @@ -746,7 +787,7 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _erase)(GUF_DICT_NAME *ht, const GUF_ // GUF_ASSERT(!GUF_CAT(GUF_DICT_NAME, _contains)(ht, key)); - if (ht->kv_elems.size == 0 && ht->num_tombstones > 0) { // Optimisation: We can delete all tombstones here. + if (ht->kv_elems.size == 0 && ht->num_tombstones > 0) { // Optimisation: We can delete all tombstones here (TODO: not sure if actually a good idea...) ptrdiff_t del_tombstone_cnt = 0; for (ptrdiff_t i = 0; i < ht->kv_indices_cap && del_tombstone_cnt < ht->num_tombstones; ++i) { const GUF_DICT_KV_META_T kv_del_idx = GUF_DICT_META_GET_IDX(ht->kv_indices[i]); @@ -761,7 +802,7 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _erase)(GUF_DICT_NAME *ht, const GUF_ } GUF_ASSERT(del_tombstone_cnt == ht->num_tombstones); ht->num_tombstones = 0; - } + } return true; } @@ -918,6 +959,7 @@ GUF_DICT_KWRDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val_if #undef GUF_DICT_IS_SET #undef GUF_DICT_PROBE_LINEAR #undef GUF_DICT_PROBE_QUADRATIC +#undef GUF_DICT_MAX_LOAD_FACTOR #undef GUF_DICT_KEY_T #undef GUF_DICT_KEY_T_IS_INTEGRAL_TYPE diff --git a/src/guf_str.h b/src/guf_str.h index 1198515..3f67e63 100644 --- a/src/guf_str.h +++ b/src/guf_str.h @@ -22,7 +22,7 @@ typedef struct guf_str_internal_long_ { #define GUF_STR_SSO_BUF_CAP (sizeof(guf_str_internal_long_) - sizeof(unsigned char)) /* 23 bytes on 64-bit platforms, 11 bytes on 32-bit platforms */ -#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || (defined(__cplusplus) && __cplusplus >= 201103L) +#if defined(GUF_STDC_AT_LEAST_C11) || defined(GUF_STDCPP_AT_LEAST_CPP11) static_assert(GUF_STR_SSO_BUF_CAP > 0, "GUF_STR_SSO_BUF_CAP < 0 (this is very weird)"); // Basically cannot fail. static_assert(GUF_STR_SSO_BUF_CAP < 0x80, "GUF_STR_SSO_BUF_CAP >= 128 (no support for platforms with wordsize >= 512-bits)"); // Could fail on hypothetical platforms with 512-bit wordsize (and above). #endif