From 3ff4cd7572bfe4f8b1b90d8c944bce594e003b6c Mon Sep 17 00:00:00 2001 From: jun <83899451+zeichensystem@users.noreply.github.com> Date: Fri, 21 Mar 2025 18:29:19 +0100 Subject: [PATCH] Add more dict functions --- src/guf_dbuf.h | 2 +- src/guf_dict.h | 90 ++++++++++++++++++++++++++++++++++++++------------ src/guf_str.h | 41 +++++++---------------- 3 files changed, 81 insertions(+), 52 deletions(-) diff --git a/src/guf_dbuf.h b/src/guf_dbuf.h index 1f63a29..2599934 100644 --- a/src/guf_dbuf.h +++ b/src/guf_dbuf.h @@ -942,7 +942,7 @@ GUF_DBUF_KWRDS GUF_CAT(GUF_DBUF_NAME, _iter) GUF_CAT(GUF_DBUF_NAME, _iter_next)( if (is_reverse_it) { if (step < 0) { - GUF_ASSERT_RELEASE(step > PTRDIFF_MIN); // Catch overflow. + GUF_ASSERT(step > PTRDIFF_MIN); // Catch overflow. } step = -step; } diff --git a/src/guf_dict.h b/src/guf_dict.h index bc9f432..d9fd676 100755 --- a/src/guf_dict.h +++ b/src/guf_dict.h @@ -97,12 +97,8 @@ #define GUF_DICT_KV_NAME GUF_CAT(GUF_DICT_NAME, _kv) #endif -#if defined(GUF_DICT_MAX_LOAD_FACTOR) - #if defined(GUF_STDC_AT_LEAST_C11) || defined(GUF_STDCPP_AT_LEAST_CPP11) - static_assert(GUF_DICT_MAX_LOAD_FACTOR >= 0.1 && GUF_DICT_MAX_LOAD_FACTOR <= 0.9, "guf_dict.h: GUF_DICT_MAX_LOAD_FACTOR must be in range [0.1, 0.9]"); - #endif -#else - #define GUF_DICT_MAX_LOAD_FACTOR GUF_DICT_MAX_LOAD_FACTOR_DEFAULT +#ifndef GUF_DICT_MAX_LOAD_FACTOR + #define GUF_DICT_MAX_LOAD_FACTOR GUF_DICT_MAX_LOAD_FACTOR_DEFAULT #endif #define GUF_DICT_KV_DBUF GUF_CAT(GUF_DICT_KV_NAME, _dbuf) @@ -137,6 +133,8 @@ typedef GUF_CAT(GUF_DICT_KV_DBUF, _iter) GUF_CAT(GUF_DICT_NAME, _iter); #endif +GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init_with_capacity)(GUF_DICT_NAME *ht, guf_allocator *alloc, ptrdiff_t kv_elem_capacity, guf_err *err); +GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _init_with_capacity)(GUF_DICT_NAME *ht, guf_allocator *alloc, ptrdiff_t kv_elem_capacity); GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init)(GUF_DICT_NAME *ht, guf_allocator *alloc, guf_err *err); GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _init)(GUF_DICT_NAME *ht, guf_allocator *alloc); @@ -164,6 +162,9 @@ GUF_DICT_KWRDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _size)(const GUF_DICT_NAME *ht); GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor)(const GUF_DICT_NAME *ht); GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor_without_tombstones)(const GUF_DICT_NAME *ht); +GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_rehash_and_grow)(GUF_DICT_NAME *ht, guf_err *err); +GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _rehash_without_growth)(GUF_DICT_NAME *ht); + /* Iterator functions */ GUF_DICT_KWRDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _begin)(const GUF_DICT_NAME* ht); GUF_DICT_KWRDS GUF_CAT(GUF_DICT_NAME, _iter) GUF_CAT(GUF_DICT_NAME, _end)(const GUF_DICT_NAME* ht); @@ -248,28 +249,67 @@ GUF_DICT_KWRDS double GUF_CAT(GUF_DICT_NAME, _load_factor_without_tombstones)(co } -GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init)(GUF_DICT_NAME *ht, guf_allocator *alloc, guf_err *err) +GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init_with_capacity)(GUF_DICT_NAME *ht, guf_allocator *alloc, ptrdiff_t kv_elem_capacity, guf_err *err) { GUF_ASSERT(GUF_DICT_MAX_LOAD_FACTOR >= 0.1 && GUF_DICT_MAX_LOAD_FACTOR <= 0.9); if (!ht || !alloc) { - guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in dict_try_init: ht or alloc NULL")); + guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in dict_try_init_with_capacity: ht or alloc NULL")); return NULL; - } - - ht->kv_elems = (GUF_DICT_KV_DBUF){0}; - GUF_CAT(GUF_DICT_KV_DBUF, _try_init)(&ht->kv_elems, 0, alloc, err); - if (err != GUF_ERR_NONE) { + } else if (kv_elem_capacity < 0 || kv_elem_capacity > GUF_CAT(GUF_DICT_NAME, _max_capacity)()) { + guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in dict_try_init_with_capacity: kv_elem_capacity < 0 or kv_elem_capacity > max_capacity")); return NULL; - } + } ht->kv_indices = NULL; ht->kv_indices_cap = 0; + ht->num_tombstones = ht->max_probelen = 0; - ht->num_tombstones = 0; - ht->max_probelen = 0; + ht->kv_elems = (GUF_DICT_KV_DBUF){0}; + GUF_CAT(GUF_DICT_KV_DBUF, _try_init)(&ht->kv_elems, kv_elem_capacity, alloc, err); + if (err != GUF_ERR_NONE) { + return NULL; + } + + if (kv_elem_capacity > 0) { + const size_t MAX_IDX_CAP = GUF_ALLOC_MAX_CAPACITY(GUF_DICT_KV_META_T); + const size_t desired_idx_cap = (size_t)guf_min_f64(kv_elem_capacity * 1.0 / GUF_DICT_MAX_LOAD_FACTOR, MAX_IDX_CAP); + // Capacities must be powers of two. + size_t kv_idx_cap = 1; + while ((kv_idx_cap <= MAX_IDX_CAP / 2) && ((kv_idx_cap << 1) <= desired_idx_cap)) { + kv_idx_cap <<= 1; + } + GUF_ASSERT_RELEASE(kv_idx_cap >= (size_t)ht->kv_elems.capacity && kv_idx_cap <= MAX_IDX_CAP); + GUF_ASSERT_RELEASE(guf_is_pow2_size_t(kv_idx_cap)); + const size_t num_bytes = kv_idx_cap * sizeof(GUF_DICT_KV_META_T); + GUF_ASSERT_RELEASE(!guf_mul_is_overflow_size_t(kv_idx_cap, sizeof(GUF_DICT_KV_META_T)) && num_bytes <= PTRDIFF_MAX); + + GUF_DICT_KV_META_T *kv_indices = ht->kv_elems.allocator->alloc(num_bytes, ht->kv_elems.allocator->ctx); + if (!kv_indices) { + guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in dict_try_init_with_capacity: allocation of ht->kv_indices failed")); + GUF_CAT(GUF_DICT_KV_DBUF, _free)(&ht->kv_elems, NULL); + return NULL; + } + ht->kv_indices = kv_indices; + ht->kv_indices_cap = kv_idx_cap; + GUF_ASSERT(ht->kv_indices_cap > ht->kv_elems.capacity); + for (ptrdiff_t i = 0; i < ht->kv_indices_cap; ++i) { + ht->kv_indices[i] = GUF_DICT_KV_META_IDX_NULL; + } + } return ht; } +GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _init_with_capacity)(GUF_DICT_NAME *ht, guf_allocator *alloc, ptrdiff_t kv_elem_capacity) +{ + return GUF_CAT(GUF_DICT_NAME, _try_init_with_capacity)(ht, alloc, kv_elem_capacity, NULL); +} + + +GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _try_init)(GUF_DICT_NAME *ht, guf_allocator *alloc, guf_err *err) +{ + return GUF_CAT(GUF_DICT_NAME, _try_init_with_capacity)(ht, alloc, 0, err); +} + GUF_DICT_KWRDS GUF_DICT_NAME *GUF_CAT(GUF_DICT_NAME, _init)(GUF_DICT_NAME *ht, guf_allocator *alloc) { return GUF_CAT(GUF_DICT_NAME, _try_init)(ht, alloc, NULL); @@ -490,19 +530,19 @@ static void GUF_CAT(GUF_DICT_NAME, _reinsert_elems_)(GUF_DICT_NAME *ht) } } -GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _rehash_without_resize)(GUF_DICT_NAME *ht) +GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _rehash_without_growth)(GUF_DICT_NAME *ht) { - GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); GUF_CAT(GUF_DICT_NAME, _reinsert_elems_)(ht); GUF_ASSERT(ht->num_tombstones == 0); } -static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, guf_err *err) +static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary_)(GUF_DICT_NAME *ht, bool always_grow, guf_err *err) { GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); const double MAX_LOAD_FAC = GUF_DICT_MAX_LOAD_FACTOR; - GUF_ASSERT(MAX_LOAD_FAC >= 0.1 && MAX_LOAD_FAC <= 0.9); + GUF_ASSERT(GUF_DICT_MAX_LOAD_FACTOR >= 0.1 && GUF_DICT_MAX_LOAD_FACTOR <= 0.9); const ptrdiff_t KV_META_START_CAP = 32; // Must be a power of two > 0. const ptrdiff_t KV_META_GROWTH_FAC = (ht->kv_indices_cap <= 128) ? 4 : 2; // Must be a power of two > 1. @@ -520,7 +560,7 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu for (ptrdiff_t i = 0; i < ht->kv_indices_cap; ++i) { new_kv_indices[i] = GUF_DICT_KV_META_IDX_NULL; } - } else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) >= MAX_LOAD_FAC) { // 1.b) Grow kv-index-buffer if necessary. + } else if ((GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) >= MAX_LOAD_FAC) || always_grow) { // 1.b) Grow kv-index-buffer if necessary. GUF_ASSERT(ht->kv_indices); GUF_ASSERT((size_t)ht->kv_indices_cap <= GUF_ALLOC_MAX_CAPACITY(GUF_DICT_KV_META_T)); const ptrdiff_t old_size_bytes = (size_t)ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T); @@ -564,6 +604,12 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu GUF_ASSERT(GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) < MAX_LOAD_FAC); } +GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_rehash_and_grow)(GUF_DICT_NAME *ht, guf_err *err) +{ + GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); + GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary_)(ht, true, err); +} + GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_DICT_KEY_T *key, GUF_DICT_VAL_T *val, guf_cpy_opt key_opt, guf_cpy_opt val_opt, guf_err *err) { GUF_ASSERT_RELEASE(GUF_CAT(GUF_DICT_NAME, _valid)(ht)); @@ -580,7 +626,7 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D } // 1.) Grow kv-index-buffer if neccessary (or make the initial allocation.) - GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(ht, err); + GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary_)(ht, false, err); if (err != NULL && *err != GUF_ERR_NONE) { guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in function dict_try_insert: try_grow failed.")); return; diff --git a/src/guf_str.h b/src/guf_str.h index 1ff2cc7..bcabcf2 100644 --- a/src/guf_str.h +++ b/src/guf_str.h @@ -66,9 +66,6 @@ GUF_STR_KWRDS bool guf_str_view_is_valid(guf_str_view sv); GUF_STR_KWRDS guf_str guf_str_substr_cpy(guf_str_view str, ptrdiff_t pos, size_t count); // not necessary GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrdiff_t count); -GUF_STR_KWRDS guf_str_view guf_str_view_trim_left(guf_str_view str); -GUF_STR_KWRDS guf_str_view guf_str_view_trim_right(guf_str_view str); - GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv); GUF_STR_KWRDS uint64_t guf_str_view_hash64(const guf_str_view *sv); GUF_STR_KWRDS uint32_t guf_str_view_hash32(const guf_str_view *sv); @@ -77,6 +74,9 @@ GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* GUF_STR_KWRDS bool guf_str_view_equal_val_arg(guf_str_view a_val, guf_str_view b_val); GUF_STR_KWRDS int guf_str_view_cmp(const void *str_view_a, const void *str_view_b); // For qsort etc. +GUF_STR_KWRDS guf_str_view guf_str_view_trim_right_ascii(guf_str_view sv); +GUF_STR_KWRDS guf_str_view guf_str_view_trim_left_ascii(guf_str_view sv); + GUF_STR_KWRDS guf_str_view guf_str_next_tok(guf_str_view *input, const guf_str_view *delims, ptrdiff_t num_delims, const guf_str_view *preserved_delims, ptrdiff_t num_preserved_delims); // guf_str: @@ -99,8 +99,9 @@ GUF_STR_KWRDS bool guf_str_equal(const guf_str *a, const guf_str *b); GUF_STR_KWRDS int guf_str_cmp(const guf_str *a, const guf_str *b); // TODO: -GUF_STR_KWRDS guf_str guf_str_try_new_substr(guf_str_view str_view, ptrdiff_t pos, ptrdiff_t len, guf_allocator *alloc, guf_err *err); -GUF_STR_KWRDS guf_str guf_str_new_substr(guf_str_view str_view, ptrdiff_t pos, ptrdiff_t len, guf_allocator *alloc); +GUF_STR_KWRDS char *guf_str_at(guf_str *str, ptrdiff_t idx); +GUF_STR_KWRDS char *guf_str_back(guf_str *str); +GUF_STR_KWRDS char *guf_str_front(guf_str *str); // DONE: GUF_STR_KWRDS guf_str *guf_str_try_append_char(guf_str *str, char c, ptrdiff_t times, guf_err *err); @@ -114,6 +115,10 @@ GUF_STR_KWRDS guf_str *guf_str_append(guf_str *str, guf_str_view sv); GUF_STR_KWRDS guf_str *guf_str_try_append_cstr(guf_str *str, const char *c_str, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_append_cstr(guf_str *str, const char *c_str); +// TODO: +GUF_STR_KWRDS char guf_str_pop_back(guf_str *str); +GUF_STR_KWRDS char guf_str_pop_front(guf_str *str); + // DONE: GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t min_capacity, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_reserve(guf_str *str, ptrdiff_t min_capacity); @@ -122,15 +127,6 @@ GUF_STR_KWRDS guf_str *guf_str_reserve(guf_str *str, ptrdiff_t min_capacity); GUF_STR_KWRDS guf_str *guf_str_try_shrink_to_fit(guf_str *str, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_shrink_to_fit(guf_str *str); -// TODO: -GUF_STR_KWRDS char guf_str_pop_back(guf_str *str); -GUF_STR_KWRDS char guf_str_pop_front(guf_str *str); - -// TODO: -GUF_STR_KWRDS char *guf_str_at(guf_str *str, size_t idx); -GUF_STR_KWRDS char *guf_str_back(guf_str *str); -GUF_STR_KWRDS char *guf_str_front(guf_str *str); - // DONE: GUF_STR_KWRDS const char *guf_str_const_cstr(const guf_str *str); GUF_STR_KWRDS char *guf_str_try_get_cstr(guf_str *str, guf_err *err); // Error if str is readonly. @@ -350,7 +346,7 @@ GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t new_cap_min, if (times_two_cap > new_cap_min_with_null) { new_cap_min_with_null = times_two_cap; } - GUF_ASSERT(new_cap_min_with_null >= len_with_null && new_cap_min_with_null <= PTRDIFF_MAX); + GUF_ASSERT(new_cap_min_with_null > len_with_null && new_cap_min_with_null <= PTRDIFF_MAX); const size_t space_remaining = (new_cap_min_with_null - len_with_null); if (new_cap_min_with_null < (PTRDIFF_MAX - 8) && space_remaining < 4) { @@ -879,6 +875,7 @@ GUF_STR_KWRDS guf_str_view guf_str_next_tok(guf_str_view *input, const guf_str_v return tok; } + GUF_STR_KWRDS guf_str_view guf_str_view_trim_left_ascii(guf_str_view sv) { if (sv.len <= 0 || sv.str == NULL) { @@ -909,20 +906,6 @@ GUF_STR_KWRDS guf_str_view guf_str_view_trim_right_ascii(guf_str_view sv) return sv; } -GUF_STR_KWRDS guf_str_view guf_str_view_trim_right(guf_str_view sv) -{ - if (sv.len <= 0 || sv.str == NULL) { - return sv; - } - char c = sv.str[sv.len - 1]; - while (sv.len > 0 && sv.str && c != ' ' && c != '\n' && c != '\t' && c != '\v' && c != '\f' && c != '\r') { - --sv.len; - ++sv.str; - c = sv.str[0]; - } - return sv; -} - GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrdiff_t count) { GUF_ASSERT(str.str);