Add changeable load factor

This commit is contained in:
jun 2025-03-20 20:29:21 +01:00
parent 534ff261ca
commit 21d1b04e6b
3 changed files with 79 additions and 29 deletions

View File

@ -26,6 +26,14 @@
#define GUF_HASH_32_BIT
#endif
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
#define GUF_STDC_AT_LEAST_C11
#endif
#if (defined(__cplusplus) && __cplusplus >= 201103L)
#define GUF_STDCPP_AT_LEAST_CPP11
#endif
/*
// Copy- and move constructors:
GUF_T_COPY: GUF_T *(*copy)(GUF_T *dst, const GUF_T *src, void *ctx);

View File

@ -9,6 +9,8 @@
#include "guf_common.h"
#include "guf_alloc.h"
#include "guf_hash.h"
// MAX_LOAD_FACTOR must be in range [0.1, 0.9]
#define GUF_DICT_MAX_LOAD_FACTOR_DEFAULT 0.666
#endif
#ifndef GUF_DICT_KEY_T
@ -91,6 +93,14 @@
#define GUF_DICT_KV_NAME GUF_CAT(GUF_DICT_NAME, _kv)
#endif
#if defined(GUF_DICT_MAX_LOAD_FACTOR)
#if defined(GUF_STDC_AT_LEAST_C11) || defined(GUF_STDCPP_AT_LEAST_CPP11)
static_assert(GUF_DICT_MAX_LOAD_FACTOR >= 0.1 && GUF_DICT_MAX_LOAD_FACTOR <= 0.9, "guf_dict.h: GUF_DICT_MAX_LOAD_FACTOR must be in range [0.1, 0.9]");
#endif
#else
#define GUF_DICT_MAX_LOAD_FACTOR GUF_DICT_MAX_LOAD_FACTOR_DEFAULT
#endif
#define GUF_DICT_KV_DBUF GUF_CAT(GUF_DICT_KV_NAME, _dbuf)
// - GUF_T_COPY: cpy function with signature GUF_T *copy(GUF_T *dst, const GUF_T *src, void *ctx) (default: copy by value)
@ -148,6 +158,7 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _contains_val_arg)(GUF_DICT_NAME *ht,
GUF_DICT_KWRDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _size)(const GUF_DICT_NAME *ht);
GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor)(const GUF_DICT_NAME *ht);
GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor_without_tombstones)(const GUF_DICT_NAME *ht);
/* Iterator functions */
GUF_DICT_KWRDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _begin)(const GUF_DICT_NAME* ht);
@ -223,8 +234,19 @@ GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor)(const GUF_DICT_NAME *
return (double)occupied_count / (double)ht->kv_indices_cap;
}
GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor_without_tombstones)(const GUF_DICT_NAME *ht)
{
if (ht->kv_indices_cap == 0) {
return 1;
}
GUF_ASSERT(ht->kv_elems.size <= ht->kv_indices_cap);
return (double)ht->kv_elems.size / (double)ht->kv_indices_cap;
}
GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init)(GUF_DICT_NAME *ht, guf_allocator *alloc, guf_err *err)
{
GUF_ASSERT(GUF_DICT_MAX_LOAD_FACTOR >= 0.1 && GUF_DICT_MAX_LOAD_FACTOR <= 0.9);
if (!ht || !alloc) {
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in dict_try_init: ht or alloc NULL"));
return NULL;
@ -440,15 +462,44 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC
#undef GUF_MOD_CAP
}
static void GUF_CAT(GUF_DICT_NAME, _reinsert_elems_)(GUF_DICT_NAME *ht)
{
GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _valid)(ht));
GUF_ASSERT_RELEASE(ht->kv_indices && ht->kv_indices_cap > 0);
for (ptrdiff_t i = 0; i < ht->kv_indices_cap; ++i) {
ht->kv_indices[i] = GUF_DICT_KV_META_IDX_NULL;
}
ht->num_tombstones = 0;
GUF_ASSERT((size_t)ht->kv_elems.size < GUF_DICT_KV_META_IDX_MAX);
for (ptrdiff_t kv_idx = 0; kv_idx < ht->kv_elems.size; ++kv_idx) {
const GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx);
GUF_ASSERT(kv);
bool key_exists = false;
const GUF_DICT_HASH_T key_hash = GUF_DICT_KEY_HASH(&kv->key);
const size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &kv->key, key_hash, &key_exists);
GUF_ASSERT(!key_exists);
GUF_ASSERT(new_idx < SIZE_MAX && new_idx < (size_t)ht->kv_indices_cap);
GUF_ASSERT((GUF_DICT_HASH_T_GET_HASHFRAG(key_hash) & (GUF_DICT_KV_META_T)kv_idx) == 0);
ht->kv_indices[new_idx] = GUF_DICT_HASH_T_GET_HASHFRAG(key_hash) | (GUF_DICT_KV_META_T)kv_idx;
}
}
GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _rehash_without_resize)(GUF_DICT_NAME *ht)
{
GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _valid)(ht));
GUF_CAT(GUF_DICT_NAME, _reinsert_elems_)(ht);
GUF_ASSERT(ht->num_tombstones == 0);
}
static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, guf_err *err)
{
GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht));
#ifdef GUF_DICT_PROBE_LINEAR
const double MAX_LOAD_FAC = 0.6;
#else
const double MAX_LOAD_FAC = 0.5;
#endif
GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _valid)(ht));
const double MAX_LOAD_FAC = GUF_DICT_MAX_LOAD_FACTOR;
GUF_ASSERT(MAX_LOAD_FAC >= 0.1 && MAX_LOAD_FAC <= 0.9);
const ptrdiff_t KV_META_START_CAP = 32; // Must be a power of two > 0.
const ptrdiff_t KV_META_GROWTH_FAC = (ht->kv_indices_cap <= 128) ? 4 : 2; // Must be a power of two > 1.
@ -465,7 +516,7 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu
for (ptrdiff_t i = 0; i < ht->kv_indices_cap; ++i) {
new_kv_indices[i] = GUF_DICT_KV_META_IDX_NULL;
}
} else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > MAX_LOAD_FAC) { // 1.b) Grow kv-index-buffer if necessary.
} else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) >= MAX_LOAD_FAC) { // 1.b) Grow kv-index-buffer if necessary.
GUF_ASSERT(ht->kv_indices);
GUF_ASSERT((size_t)ht->kv_indices_cap <= GUF_ALLOC_MAX_CAPACITY(GUF_DICT_KV_META_T));
const ptrdiff_t old_size_bytes = (size_t)ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T);
@ -474,6 +525,12 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu
const size_t MAX_SIZE_BYTES = (size_t)GUF_ALLOC_MAX_BYTES(GUF_DICT_KV_META_T);
const size_t new_size_bytes_test = (size_t)old_size_bytes * (size_t)KV_META_GROWTH_FAC;
if (guf_mul_is_overflow_size_t(old_size_bytes, KV_META_GROWTH_FAC) || new_size_bytes_test > MAX_SIZE_BYTES) { // Handle overflow (Remember: capacities have to be powers of two)
if (GUF_CAT(GUF_DICT_NAME, _load_factor_without_tombstones)(ht) < MAX_LOAD_FAC) { // Check if just removing tombstones without resizing would decrease the load factor enough.
GUF_CAT(GUF_DICT_NAME, _reinsert_elems_)(ht);
GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) < MAX_LOAD_FAC);
guf_err_set_if_not_null(err, GUF_ERR_NONE);
return;
}
guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: New kv_indices_capacity would overflow)"));
return;
} else {
@ -494,29 +551,13 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu
ht->kv_indices_cap = ht->kv_indices_cap * KV_META_GROWTH_FAC;;
GUF_ASSERT(guf_is_pow2_size_t(ht->kv_indices_cap));
GUF_ASSERT(new_size_bytes / sizeof(GUF_DICT_KV_META_T) == ht->kv_indices_cap);
ht->num_tombstones = 0;
// ht->max_probelen = 0;
for (ptrdiff_t i = 0; i < ht->kv_indices_cap; ++i) {
ht->kv_indices[i] = GUF_DICT_KV_META_IDX_NULL;
}
GUF_ASSERT((size_t)ht->kv_elems.size < GUF_DICT_KV_META_IDX_MAX);
for (ptrdiff_t kv_idx = 0; kv_idx < ht->kv_elems.size; ++kv_idx) { // Re-insert keys.
const GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, kv_idx);
GUF_ASSERT(kv);
bool key_exists = false;
const GUF_DICT_HASH_T key_hash = GUF_DICT_KEY_HASH(&kv->key); // TODO: might be expensive...
const size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &kv->key, key_hash, &key_exists);
GUF_ASSERT(!key_exists);
GUF_ASSERT(new_idx < SIZE_MAX && new_idx < (size_t)ht->kv_indices_cap);
GUF_ASSERT((GUF_DICT_HASH_T_GET_HASHFRAG(key_hash) & (GUF_DICT_KV_META_T)kv_idx) == 0);
ht->kv_indices[new_idx] = GUF_DICT_HASH_T_GET_HASHFRAG(key_hash) | (GUF_DICT_KV_META_T)kv_idx;
}
GUF_CAT(GUF_DICT_NAME, _reinsert_elems_)(ht);
GUF_ASSERT(ht->num_tombstones == 0);
}
guf_err_set_if_not_null(err, GUF_ERR_NONE);
GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) <= MAX_LOAD_FAC);
GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) < MAX_LOAD_FAC);
}
GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt, guf_err *err)
@ -746,7 +787,7 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _erase)(GUF_DICT_NAME *ht, const GUF_
// GUF_ASSERT(!GUF_CAT(GUF_DICT_NAME, _contains)(ht, key));
if (ht->kv_elems.size == 0 && ht->num_tombstones > 0) { // Optimisation: We can delete all tombstones here.
if (ht->kv_elems.size == 0 && ht->num_tombstones > 0) { // Optimisation: We can delete all tombstones here (TODO: not sure if actually a good idea...)
ptrdiff_t del_tombstone_cnt = 0;
for (ptrdiff_t i = 0; i < ht->kv_indices_cap && del_tombstone_cnt < ht->num_tombstones; ++i) {
const GUF_DICT_KV_META_T kv_del_idx = GUF_DICT_META_GET_IDX(ht->kv_indices[i]);
@ -918,6 +959,7 @@ GUF_DICT_KWRDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _find_val_if
#undef GUF_DICT_IS_SET
#undef GUF_DICT_PROBE_LINEAR
#undef GUF_DICT_PROBE_QUADRATIC
#undef GUF_DICT_MAX_LOAD_FACTOR
#undef GUF_DICT_KEY_T
#undef GUF_DICT_KEY_T_IS_INTEGRAL_TYPE

View File

@ -22,7 +22,7 @@ typedef struct guf_str_internal_long_ {
#define GUF_STR_SSO_BUF_CAP (sizeof(guf_str_internal_long_) - sizeof(unsigned char)) /* 23 bytes on 64-bit platforms, 11 bytes on 32-bit platforms */
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || (defined(__cplusplus) && __cplusplus >= 201103L)
#if defined(GUF_STDC_AT_LEAST_C11) || defined(GUF_STDCPP_AT_LEAST_CPP11)
static_assert(GUF_STR_SSO_BUF_CAP > 0, "GUF_STR_SSO_BUF_CAP < 0 (this is very weird)"); // Basically cannot fail.
static_assert(GUF_STR_SSO_BUF_CAP < 0x80, "GUF_STR_SSO_BUF_CAP >= 128 (no support for platforms with wordsize >= 512-bits)"); // Could fail on hypothetical platforms with 512-bit wordsize (and above).
#endif