Fix guf_dict growth bug
This commit is contained in:
parent
cd1c1cd5db
commit
4d97725bba
@ -18,10 +18,10 @@ if (NOT DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY)
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin)
|
||||
endif ()
|
||||
|
||||
add_executable(libguf_example src/test/example.c src/test/guf_dict_impl.c)
|
||||
add_executable(libguf_example src/test/example.c src/test/guf_str_impl.c src/test/guf_dict_impl.c)
|
||||
target_include_directories(libguf_example PRIVATE src src/test)
|
||||
|
||||
add_executable(libguf_test src/test/test.cpp src/test/guf_dbuf_impl.c src/test/guf_dict_impl.c src/test/guf_rand_impl.c src/test/guf_sort_impl.c src/test/guf_str_impl.c)
|
||||
add_executable(libguf_test src/test/test.cpp src/test/guf_dbuf_impl.c src/test/guf_str_impl.c src/test/guf_dict_impl.c src/test/guf_rand_impl.c src/test/guf_sort_impl.c)
|
||||
target_include_directories(libguf_test PRIVATE src src/test)
|
||||
|
||||
if (NOT DEFINED MSVC)
|
||||
|
||||
@ -300,6 +300,7 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC
|
||||
probe:
|
||||
idx = GUF_MOD_CAP(idx + GUF_CAT(GUF_DICT_NAME, _probe_offset)(probe_len));
|
||||
++probe_len;
|
||||
// printf("sz %td cap %td proble-len %td\n", ht->kv_elems.size, ht->kv_indices_cap, probe_len);
|
||||
GUF_ASSERT((ptrdiff_t)probe_len <= (ht->kv_elems.size + ht->num_tombstones));
|
||||
}
|
||||
} while (idx != start_idx);
|
||||
@ -328,7 +329,8 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
|
||||
return;
|
||||
}
|
||||
|
||||
const ptrdiff_t KV_META_START_CAP = 64; // Must be a power of two > 0.
|
||||
const double MAX_LOAD_FAC = 0.66;
|
||||
const ptrdiff_t KV_META_START_CAP = 32; // Must be a power of two > 0.
|
||||
const ptrdiff_t KV_META_GROWTH_FAC = 2; // Must be a power of two > 0.
|
||||
|
||||
guf_allocator *allocator = ht->kv_elems.allocator;
|
||||
@ -345,7 +347,7 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
|
||||
new_kv_indices[i].kv_idx = GUF_DICT_KV_IDX_NULL;
|
||||
new_kv_indices[i].key_hash = 0;
|
||||
}
|
||||
} else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > 0.6) { // 1.b) Grow kv-index-buffer.
|
||||
} else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > MAX_LOAD_FAC) { // 1.b) Grow kv-index-buffer.
|
||||
GUF_ASSERT(ht->kv_indices);
|
||||
const ptrdiff_t old_size = ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T);
|
||||
ptrdiff_t new_size = 0;
|
||||
@ -371,12 +373,15 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
|
||||
ht->kv_indices = new_kv_indices;
|
||||
ht->kv_indices_cap = new_kv_meta_cap;
|
||||
ht->num_tombstones = 0;
|
||||
ht->max_probelen = 0;
|
||||
|
||||
ptrdiff_t cnt = 0;
|
||||
for (ptrdiff_t i = 0; i < old_kv_indices_cap; ++i) { // Copy old kv-indices into new kv-index-buffer.
|
||||
if (old_kv_indices[i].kv_idx != GUF_DICT_KV_IDX_NULL && old_kv_indices[i].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE) {
|
||||
bool key_exists = false;
|
||||
size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists);
|
||||
const GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, old_kv_indices[i].kv_idx);
|
||||
GUF_ASSERT(kv);
|
||||
size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &kv->key, &key_exists);
|
||||
GUF_ASSERT(!key_exists);
|
||||
GUF_ASSERT(new_idx < SIZE_MAX);
|
||||
ht->kv_indices[new_idx] = old_kv_indices[i];
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
#include "guf_alloc.h"
|
||||
#include "guf_str_view_type.h"
|
||||
#include "guf_utf8.h"
|
||||
#include "guf_hash.h"
|
||||
|
||||
typedef enum guf_str_state {
|
||||
GUF_STR_STATE_INIT = 0,
|
||||
@ -85,6 +86,8 @@ GUF_STR_KWRDS bool guf_str_is_stack_allocated(const guf_str *str);
|
||||
GUF_STR_KWRDS bool guf_str_is_valid(const guf_str *str);
|
||||
GUF_STR_KWRDS bool guf_str_alloc_success(const guf_str *str);
|
||||
|
||||
GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv);
|
||||
|
||||
// Comparison:
|
||||
GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b);
|
||||
GUF_STR_KWRDS bool guf_str_equal(const guf_str *a, const guf_str *b);
|
||||
@ -194,20 +197,32 @@ GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrd
|
||||
return (guf_str_view){.str = str.str + pos, .len = substr_len};
|
||||
}
|
||||
|
||||
GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv)
|
||||
{
|
||||
GUF_ASSERT(sv);
|
||||
if (!sv->str || sv->len <= 0) {
|
||||
return GUF_HASH_INIT;
|
||||
}
|
||||
|
||||
return guf_hash(sv->str, sv->len, GUF_HASH_INIT);
|
||||
}
|
||||
|
||||
// Comparison:
|
||||
GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b)
|
||||
{
|
||||
GUF_ASSERT_RELEASE(a && b);
|
||||
GUF_ASSERT_RELEASE(a->str && b->str);
|
||||
if (a->len != b->len) {
|
||||
return false;
|
||||
}
|
||||
|
||||
GUF_ASSERT_RELEASE(a->len >= 0);
|
||||
if (a->len == 0) {
|
||||
return a->str == b->str; // Compare pointers by value here.
|
||||
if ((!a->str && b->str) || (!b->str && a->str)) {
|
||||
return false;
|
||||
} else if (!a->str && !b->str) {
|
||||
return a->len == b->len;
|
||||
}
|
||||
|
||||
GUF_ASSERT_RELEASE(a->len >= 0);
|
||||
|
||||
return 0 == memcmp(a->str, b->str, a->len);
|
||||
}
|
||||
|
||||
|
||||
@ -133,7 +133,7 @@ GUF_UTF8_KWRDS bool guf_utf8_encode(guf_utf8_char *result, uint32_t cp)
|
||||
for (int byte_n = num_bytes - 1; byte_n >= 0 && cp > 0; --byte_n) {
|
||||
const int bits = (byte_n == 0) ? first_byte_bits : tail_byte_bits;
|
||||
const uint32_t cp_mask = (UINT32_C(1) << bits) - 1;
|
||||
result->bytes[byte_n] = (unsigned char)result->bytes[byte_n] | (cp & cp_mask);
|
||||
result->bytes[byte_n] = (char)((unsigned char)result->bytes[byte_n] | (cp & cp_mask));
|
||||
cp = cp >> bits;
|
||||
cp_bits += bits;
|
||||
}
|
||||
|
||||
@ -9,6 +9,15 @@
|
||||
#define GUF_DICT_IMPL
|
||||
#include "guf_dict.h"
|
||||
|
||||
#define GUF_DICT_KEY_T guf_str_view
|
||||
#define GUF_DICT_KEY_HASH guf_str_view_hash
|
||||
#define GUF_DICT_KEY_T_EQ guf_str_view_equal
|
||||
#define GUF_DICT_VAL_T int32_t
|
||||
#define GUF_DICT_VAL_T_IS_INTEGRAL_TYPE
|
||||
#define GUF_DICT_NAME dict_sv_i32
|
||||
#define GUF_DICT_IMPL
|
||||
#include "guf_dict.h"
|
||||
|
||||
#define GUF_DICT_KEY_T int32_t
|
||||
#define GUF_DICT_KEY_HASH int32_hash
|
||||
#define GUF_DICT_KEY_T_EQ int32_eq
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
#include "guf_common.h"
|
||||
#include "guf_cstr.h"
|
||||
#include "guf_str.h"
|
||||
|
||||
#define GUF_DICT_KEY_T guf_cstr_const
|
||||
#define GUF_DICT_KEY_HASH guf_cstr_const_hash
|
||||
@ -12,6 +13,14 @@
|
||||
#define GUF_DICT_NAME dict_cstr_int
|
||||
#include "guf_dict.h"
|
||||
|
||||
#define GUF_DICT_KEY_T guf_str_view
|
||||
#define GUF_DICT_KEY_HASH guf_str_view_hash
|
||||
#define GUF_DICT_KEY_T_EQ guf_str_view_equal
|
||||
#define GUF_DICT_VAL_T int32_t
|
||||
#define GUF_DICT_VAL_T_IS_INTEGRAL_TYPE
|
||||
#define GUF_DICT_NAME dict_sv_i32
|
||||
#include "guf_dict.h"
|
||||
|
||||
static inline guf_hash_size_t int32_hash(const int32_t *a)
|
||||
{
|
||||
return guf_hash(a, sizeof(int32_t), GUF_HASH_INIT);
|
||||
|
||||
@ -23,7 +23,7 @@ void init_tests()
|
||||
GUF_ASSERT_RELEASE(test.get());
|
||||
g_tests.insert(std::move(test));
|
||||
|
||||
test = std::make_unique<DictCstrToIntTest>("DictCstrToIntTest");
|
||||
test = std::make_unique<DictSvToIntTest>("DictSvToIntTest");
|
||||
GUF_ASSERT_RELEASE(test.get());
|
||||
g_tests.insert(std::move(test));
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
|
||||
#include "guf_common.h"
|
||||
|
||||
#define TEST_CHECK(COND) check((COND), GUF_STRINGIFY(COND), __LINE__, __FILE__)
|
||||
#define TEST_CHECK(COND) (check((COND), GUF_STRINGIFY(COND), __LINE__, __FILE__))
|
||||
|
||||
struct Test
|
||||
{
|
||||
|
||||
@ -9,10 +9,10 @@ extern "C"
|
||||
#include "guf_str.h"
|
||||
}
|
||||
|
||||
struct DictCstrToIntTest : public Test
|
||||
struct DictSvToIntTest : public Test
|
||||
{
|
||||
|
||||
DictCstrToIntTest(const std::string& name) : Test(name) {};
|
||||
DictSvToIntTest(const std::string& name) : Test(name) {};
|
||||
|
||||
private:
|
||||
|
||||
@ -21,9 +21,9 @@ struct DictCstrToIntTest : public Test
|
||||
|
||||
void insert_lookup()
|
||||
{
|
||||
std::unordered_map<std::string, int> word_cnt_map {};
|
||||
dict_cstr_int word_cnt_dict {};
|
||||
dict_cstr_int_init(&word_cnt_dict, &guf_allocator_libc);
|
||||
std::unordered_map<std::string_view, int32_t> word_cnt_map {};
|
||||
dict_sv_i32 word_cnt_dict {};
|
||||
dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc);
|
||||
|
||||
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
|
||||
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_WHITESPACE); ++i) {
|
||||
@ -37,14 +37,56 @@ struct DictCstrToIntTest : public Test
|
||||
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
|
||||
guf_str_view tok;
|
||||
while ((tok = guf_str_next_tok(&input_str, delims.data, delims.size, NULL, -1)).len) {
|
||||
if (tok.len <= 0) {
|
||||
continue;
|
||||
}
|
||||
std::string_view sv(tok.str, tok.len);
|
||||
TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &tok) == word_cnt_map.contains(sv));
|
||||
if (!dict_sv_i32_contains(&word_cnt_dict, &tok)) {
|
||||
dict_sv_i32_insert_val_arg(&word_cnt_dict, tok, 1, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
||||
word_cnt_map.insert({sv, 1});
|
||||
} else {
|
||||
int32_t *cnt = dict_sv_i32_at_val_arg(&word_cnt_dict, tok);
|
||||
if (TEST_CHECK(cnt)) {
|
||||
*cnt += 1;
|
||||
}
|
||||
word_cnt_map.at(sv) += 1;
|
||||
}
|
||||
// printf("tok_len: %td ", tok.len);
|
||||
// printf("'%.*s'\n", (int)tok.len, tok.str);
|
||||
}
|
||||
dbuf_str_view_free(&delims, NULL);
|
||||
|
||||
dict_cstr_int_free(&word_cnt_dict, NULL);
|
||||
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map));
|
||||
|
||||
for (const auto & [word, cnt] : word_cnt_map ) {
|
||||
guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()};
|
||||
int32_t *res = dict_sv_i32_at(&word_cnt_dict, &sv);
|
||||
TEST_CHECK(res && *res == cnt);
|
||||
}
|
||||
|
||||
ptrdiff_t i = 0;
|
||||
GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) {
|
||||
const dict_sv_i32_kv *kv = kv_it.ptr;
|
||||
if (TEST_CHECK(kv)) {
|
||||
const int32_t cnt = kv->val;
|
||||
// printf("%.*s: %d\n", (int)kv->key.len, kv->key.str, cnt);
|
||||
const std::string_view sv(kv->key.str, kv->key.len);
|
||||
if (TEST_CHECK(word_cnt_map.contains(sv))) {
|
||||
TEST_CHECK(word_cnt_map.at(sv) == cnt);
|
||||
}
|
||||
}
|
||||
++i;
|
||||
}
|
||||
TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict));
|
||||
TEST_CHECK(i == std::ssize(word_cnt_map));
|
||||
|
||||
// std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << "\n";
|
||||
// std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n";
|
||||
|
||||
dict_sv_i32_free(&word_cnt_dict, NULL);
|
||||
bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size;
|
||||
TEST_CHECK(!dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones);
|
||||
TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones);
|
||||
}
|
||||
|
||||
bool load_file()
|
||||
|
||||
@ -386,5 +386,4 @@ struct UTF8Test : public Test
|
||||
passed = (num_failed_checks == 0);
|
||||
return passed;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user