Fix guf_dict growth bug
This commit is contained in:
parent
cd1c1cd5db
commit
4d97725bba
@ -18,10 +18,10 @@ if (NOT DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY)
|
|||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin)
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
add_executable(libguf_example src/test/example.c src/test/guf_dict_impl.c)
|
add_executable(libguf_example src/test/example.c src/test/guf_str_impl.c src/test/guf_dict_impl.c)
|
||||||
target_include_directories(libguf_example PRIVATE src src/test)
|
target_include_directories(libguf_example PRIVATE src src/test)
|
||||||
|
|
||||||
add_executable(libguf_test src/test/test.cpp src/test/guf_dbuf_impl.c src/test/guf_dict_impl.c src/test/guf_rand_impl.c src/test/guf_sort_impl.c src/test/guf_str_impl.c)
|
add_executable(libguf_test src/test/test.cpp src/test/guf_dbuf_impl.c src/test/guf_str_impl.c src/test/guf_dict_impl.c src/test/guf_rand_impl.c src/test/guf_sort_impl.c)
|
||||||
target_include_directories(libguf_test PRIVATE src src/test)
|
target_include_directories(libguf_test PRIVATE src src/test)
|
||||||
|
|
||||||
if (NOT DEFINED MSVC)
|
if (NOT DEFINED MSVC)
|
||||||
|
|||||||
@ -300,6 +300,7 @@ static size_t GUF_CAT(GUF_DICT_NAME, _find_idx)(GUF_DICT_NAME *ht, const GUF_DIC
|
|||||||
probe:
|
probe:
|
||||||
idx = GUF_MOD_CAP(idx + GUF_CAT(GUF_DICT_NAME, _probe_offset)(probe_len));
|
idx = GUF_MOD_CAP(idx + GUF_CAT(GUF_DICT_NAME, _probe_offset)(probe_len));
|
||||||
++probe_len;
|
++probe_len;
|
||||||
|
// printf("sz %td cap %td proble-len %td\n", ht->kv_elems.size, ht->kv_indices_cap, probe_len);
|
||||||
GUF_ASSERT((ptrdiff_t)probe_len <= (ht->kv_elems.size + ht->num_tombstones));
|
GUF_ASSERT((ptrdiff_t)probe_len <= (ht->kv_elems.size + ht->num_tombstones));
|
||||||
}
|
}
|
||||||
} while (idx != start_idx);
|
} while (idx != start_idx);
|
||||||
@ -328,7 +329,8 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ptrdiff_t KV_META_START_CAP = 64; // Must be a power of two > 0.
|
const double MAX_LOAD_FAC = 0.66;
|
||||||
|
const ptrdiff_t KV_META_START_CAP = 32; // Must be a power of two > 0.
|
||||||
const ptrdiff_t KV_META_GROWTH_FAC = 2; // Must be a power of two > 0.
|
const ptrdiff_t KV_META_GROWTH_FAC = 2; // Must be a power of two > 0.
|
||||||
|
|
||||||
guf_allocator *allocator = ht->kv_elems.allocator;
|
guf_allocator *allocator = ht->kv_elems.allocator;
|
||||||
@ -345,7 +347,7 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
|
|||||||
new_kv_indices[i].kv_idx = GUF_DICT_KV_IDX_NULL;
|
new_kv_indices[i].kv_idx = GUF_DICT_KV_IDX_NULL;
|
||||||
new_kv_indices[i].key_hash = 0;
|
new_kv_indices[i].key_hash = 0;
|
||||||
}
|
}
|
||||||
} else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > 0.6) { // 1.b) Grow kv-index-buffer.
|
} else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > MAX_LOAD_FAC) { // 1.b) Grow kv-index-buffer.
|
||||||
GUF_ASSERT(ht->kv_indices);
|
GUF_ASSERT(ht->kv_indices);
|
||||||
const ptrdiff_t old_size = ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T);
|
const ptrdiff_t old_size = ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T);
|
||||||
ptrdiff_t new_size = 0;
|
ptrdiff_t new_size = 0;
|
||||||
@ -371,12 +373,15 @@ GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _try_insert)(GUF_DICT_NAME *ht, GUF_D
|
|||||||
ht->kv_indices = new_kv_indices;
|
ht->kv_indices = new_kv_indices;
|
||||||
ht->kv_indices_cap = new_kv_meta_cap;
|
ht->kv_indices_cap = new_kv_meta_cap;
|
||||||
ht->num_tombstones = 0;
|
ht->num_tombstones = 0;
|
||||||
|
ht->max_probelen = 0;
|
||||||
|
|
||||||
ptrdiff_t cnt = 0;
|
ptrdiff_t cnt = 0;
|
||||||
for (ptrdiff_t i = 0; i < old_kv_indices_cap; ++i) { // Copy old kv-indices into new kv-index-buffer.
|
for (ptrdiff_t i = 0; i < old_kv_indices_cap; ++i) { // Copy old kv-indices into new kv-index-buffer.
|
||||||
if (old_kv_indices[i].kv_idx != GUF_DICT_KV_IDX_NULL && old_kv_indices[i].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE) {
|
if (old_kv_indices[i].kv_idx != GUF_DICT_KV_IDX_NULL && old_kv_indices[i].kv_idx != GUF_DICT_KV_IDX_TOMBSTONE) {
|
||||||
bool key_exists = false;
|
bool key_exists = false;
|
||||||
size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, key, &key_exists);
|
const GUF_DICT_KV_NAME *kv = GUF_CAT(GUF_DICT_KV_DBUF, _at)(&ht->kv_elems, old_kv_indices[i].kv_idx);
|
||||||
|
GUF_ASSERT(kv);
|
||||||
|
size_t new_idx = GUF_CAT(GUF_DICT_NAME, _find_idx)(ht, &kv->key, &key_exists);
|
||||||
GUF_ASSERT(!key_exists);
|
GUF_ASSERT(!key_exists);
|
||||||
GUF_ASSERT(new_idx < SIZE_MAX);
|
GUF_ASSERT(new_idx < SIZE_MAX);
|
||||||
ht->kv_indices[new_idx] = old_kv_indices[i];
|
ht->kv_indices[new_idx] = old_kv_indices[i];
|
||||||
|
|||||||
@ -10,6 +10,7 @@
|
|||||||
#include "guf_alloc.h"
|
#include "guf_alloc.h"
|
||||||
#include "guf_str_view_type.h"
|
#include "guf_str_view_type.h"
|
||||||
#include "guf_utf8.h"
|
#include "guf_utf8.h"
|
||||||
|
#include "guf_hash.h"
|
||||||
|
|
||||||
typedef enum guf_str_state {
|
typedef enum guf_str_state {
|
||||||
GUF_STR_STATE_INIT = 0,
|
GUF_STR_STATE_INIT = 0,
|
||||||
@ -85,6 +86,8 @@ GUF_STR_KWRDS bool guf_str_is_stack_allocated(const guf_str *str);
|
|||||||
GUF_STR_KWRDS bool guf_str_is_valid(const guf_str *str);
|
GUF_STR_KWRDS bool guf_str_is_valid(const guf_str *str);
|
||||||
GUF_STR_KWRDS bool guf_str_alloc_success(const guf_str *str);
|
GUF_STR_KWRDS bool guf_str_alloc_success(const guf_str *str);
|
||||||
|
|
||||||
|
GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv);
|
||||||
|
|
||||||
// Comparison:
|
// Comparison:
|
||||||
GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b);
|
GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b);
|
||||||
GUF_STR_KWRDS bool guf_str_equal(const guf_str *a, const guf_str *b);
|
GUF_STR_KWRDS bool guf_str_equal(const guf_str *a, const guf_str *b);
|
||||||
@ -194,20 +197,32 @@ GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrd
|
|||||||
return (guf_str_view){.str = str.str + pos, .len = substr_len};
|
return (guf_str_view){.str = str.str + pos, .len = substr_len};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv)
|
||||||
|
{
|
||||||
|
GUF_ASSERT(sv);
|
||||||
|
if (!sv->str || sv->len <= 0) {
|
||||||
|
return GUF_HASH_INIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
return guf_hash(sv->str, sv->len, GUF_HASH_INIT);
|
||||||
|
}
|
||||||
|
|
||||||
// Comparison:
|
// Comparison:
|
||||||
GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b)
|
GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b)
|
||||||
{
|
{
|
||||||
GUF_ASSERT_RELEASE(a && b);
|
GUF_ASSERT_RELEASE(a && b);
|
||||||
GUF_ASSERT_RELEASE(a->str && b->str);
|
|
||||||
if (a->len != b->len) {
|
if (a->len != b->len) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
GUF_ASSERT_RELEASE(a->len >= 0);
|
if ((!a->str && b->str) || (!b->str && a->str)) {
|
||||||
if (a->len == 0) {
|
return false;
|
||||||
return a->str == b->str; // Compare pointers by value here.
|
} else if (!a->str && !b->str) {
|
||||||
|
return a->len == b->len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GUF_ASSERT_RELEASE(a->len >= 0);
|
||||||
|
|
||||||
return 0 == memcmp(a->str, b->str, a->len);
|
return 0 == memcmp(a->str, b->str, a->len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -133,7 +133,7 @@ GUF_UTF8_KWRDS bool guf_utf8_encode(guf_utf8_char *result, uint32_t cp)
|
|||||||
for (int byte_n = num_bytes - 1; byte_n >= 0 && cp > 0; --byte_n) {
|
for (int byte_n = num_bytes - 1; byte_n >= 0 && cp > 0; --byte_n) {
|
||||||
const int bits = (byte_n == 0) ? first_byte_bits : tail_byte_bits;
|
const int bits = (byte_n == 0) ? first_byte_bits : tail_byte_bits;
|
||||||
const uint32_t cp_mask = (UINT32_C(1) << bits) - 1;
|
const uint32_t cp_mask = (UINT32_C(1) << bits) - 1;
|
||||||
result->bytes[byte_n] = (unsigned char)result->bytes[byte_n] | (cp & cp_mask);
|
result->bytes[byte_n] = (char)((unsigned char)result->bytes[byte_n] | (cp & cp_mask));
|
||||||
cp = cp >> bits;
|
cp = cp >> bits;
|
||||||
cp_bits += bits;
|
cp_bits += bits;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -9,6 +9,15 @@
|
|||||||
#define GUF_DICT_IMPL
|
#define GUF_DICT_IMPL
|
||||||
#include "guf_dict.h"
|
#include "guf_dict.h"
|
||||||
|
|
||||||
|
#define GUF_DICT_KEY_T guf_str_view
|
||||||
|
#define GUF_DICT_KEY_HASH guf_str_view_hash
|
||||||
|
#define GUF_DICT_KEY_T_EQ guf_str_view_equal
|
||||||
|
#define GUF_DICT_VAL_T int32_t
|
||||||
|
#define GUF_DICT_VAL_T_IS_INTEGRAL_TYPE
|
||||||
|
#define GUF_DICT_NAME dict_sv_i32
|
||||||
|
#define GUF_DICT_IMPL
|
||||||
|
#include "guf_dict.h"
|
||||||
|
|
||||||
#define GUF_DICT_KEY_T int32_t
|
#define GUF_DICT_KEY_T int32_t
|
||||||
#define GUF_DICT_KEY_HASH int32_hash
|
#define GUF_DICT_KEY_HASH int32_hash
|
||||||
#define GUF_DICT_KEY_T_EQ int32_eq
|
#define GUF_DICT_KEY_T_EQ int32_eq
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include "guf_common.h"
|
#include "guf_common.h"
|
||||||
#include "guf_cstr.h"
|
#include "guf_cstr.h"
|
||||||
|
#include "guf_str.h"
|
||||||
|
|
||||||
#define GUF_DICT_KEY_T guf_cstr_const
|
#define GUF_DICT_KEY_T guf_cstr_const
|
||||||
#define GUF_DICT_KEY_HASH guf_cstr_const_hash
|
#define GUF_DICT_KEY_HASH guf_cstr_const_hash
|
||||||
@ -12,6 +13,14 @@
|
|||||||
#define GUF_DICT_NAME dict_cstr_int
|
#define GUF_DICT_NAME dict_cstr_int
|
||||||
#include "guf_dict.h"
|
#include "guf_dict.h"
|
||||||
|
|
||||||
|
#define GUF_DICT_KEY_T guf_str_view
|
||||||
|
#define GUF_DICT_KEY_HASH guf_str_view_hash
|
||||||
|
#define GUF_DICT_KEY_T_EQ guf_str_view_equal
|
||||||
|
#define GUF_DICT_VAL_T int32_t
|
||||||
|
#define GUF_DICT_VAL_T_IS_INTEGRAL_TYPE
|
||||||
|
#define GUF_DICT_NAME dict_sv_i32
|
||||||
|
#include "guf_dict.h"
|
||||||
|
|
||||||
static inline guf_hash_size_t int32_hash(const int32_t *a)
|
static inline guf_hash_size_t int32_hash(const int32_t *a)
|
||||||
{
|
{
|
||||||
return guf_hash(a, sizeof(int32_t), GUF_HASH_INIT);
|
return guf_hash(a, sizeof(int32_t), GUF_HASH_INIT);
|
||||||
|
|||||||
@ -23,7 +23,7 @@ void init_tests()
|
|||||||
GUF_ASSERT_RELEASE(test.get());
|
GUF_ASSERT_RELEASE(test.get());
|
||||||
g_tests.insert(std::move(test));
|
g_tests.insert(std::move(test));
|
||||||
|
|
||||||
test = std::make_unique<DictCstrToIntTest>("DictCstrToIntTest");
|
test = std::make_unique<DictSvToIntTest>("DictSvToIntTest");
|
||||||
GUF_ASSERT_RELEASE(test.get());
|
GUF_ASSERT_RELEASE(test.get());
|
||||||
g_tests.insert(std::move(test));
|
g_tests.insert(std::move(test));
|
||||||
|
|
||||||
|
|||||||
@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
#include "guf_common.h"
|
#include "guf_common.h"
|
||||||
|
|
||||||
#define TEST_CHECK(COND) check((COND), GUF_STRINGIFY(COND), __LINE__, __FILE__)
|
#define TEST_CHECK(COND) (check((COND), GUF_STRINGIFY(COND), __LINE__, __FILE__))
|
||||||
|
|
||||||
struct Test
|
struct Test
|
||||||
{
|
{
|
||||||
|
|||||||
@ -9,10 +9,10 @@ extern "C"
|
|||||||
#include "guf_str.h"
|
#include "guf_str.h"
|
||||||
}
|
}
|
||||||
|
|
||||||
struct DictCstrToIntTest : public Test
|
struct DictSvToIntTest : public Test
|
||||||
{
|
{
|
||||||
|
|
||||||
DictCstrToIntTest(const std::string& name) : Test(name) {};
|
DictSvToIntTest(const std::string& name) : Test(name) {};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
@ -21,9 +21,9 @@ struct DictCstrToIntTest : public Test
|
|||||||
|
|
||||||
void insert_lookup()
|
void insert_lookup()
|
||||||
{
|
{
|
||||||
std::unordered_map<std::string, int> word_cnt_map {};
|
std::unordered_map<std::string_view, int32_t> word_cnt_map {};
|
||||||
dict_cstr_int word_cnt_dict {};
|
dict_sv_i32 word_cnt_dict {};
|
||||||
dict_cstr_int_init(&word_cnt_dict, &guf_allocator_libc);
|
dict_sv_i32_init(&word_cnt_dict, &guf_allocator_libc);
|
||||||
|
|
||||||
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
|
dbuf_str_view delims = dbuf_str_view_new(&guf_allocator_libc);
|
||||||
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_WHITESPACE); ++i) {
|
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(GUF_UTF8_WHITESPACE); ++i) {
|
||||||
@ -37,14 +37,56 @@ struct DictCstrToIntTest : public Test
|
|||||||
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
|
guf_str_view input_str = {.str = text_buf.data, .len = text_buf.size};
|
||||||
guf_str_view tok;
|
guf_str_view tok;
|
||||||
while ((tok = guf_str_next_tok(&input_str, delims.data, delims.size, NULL, -1)).len) {
|
while ((tok = guf_str_next_tok(&input_str, delims.data, delims.size, NULL, -1)).len) {
|
||||||
|
if (tok.len <= 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
std::string_view sv(tok.str, tok.len);
|
||||||
|
TEST_CHECK(dict_sv_i32_contains(&word_cnt_dict, &tok) == word_cnt_map.contains(sv));
|
||||||
|
if (!dict_sv_i32_contains(&word_cnt_dict, &tok)) {
|
||||||
|
dict_sv_i32_insert_val_arg(&word_cnt_dict, tok, 1, GUF_CPY_VALUE, GUF_CPY_VALUE);
|
||||||
|
word_cnt_map.insert({sv, 1});
|
||||||
|
} else {
|
||||||
|
int32_t *cnt = dict_sv_i32_at_val_arg(&word_cnt_dict, tok);
|
||||||
|
if (TEST_CHECK(cnt)) {
|
||||||
|
*cnt += 1;
|
||||||
|
}
|
||||||
|
word_cnt_map.at(sv) += 1;
|
||||||
|
}
|
||||||
// printf("tok_len: %td ", tok.len);
|
// printf("tok_len: %td ", tok.len);
|
||||||
// printf("'%.*s'\n", (int)tok.len, tok.str);
|
// printf("'%.*s'\n", (int)tok.len, tok.str);
|
||||||
}
|
}
|
||||||
dbuf_str_view_free(&delims, NULL);
|
dbuf_str_view_free(&delims, NULL);
|
||||||
|
|
||||||
dict_cstr_int_free(&word_cnt_dict, NULL);
|
TEST_CHECK(dict_sv_i32_size(&word_cnt_dict) == std::ssize(word_cnt_map));
|
||||||
|
|
||||||
|
for (const auto & [word, cnt] : word_cnt_map ) {
|
||||||
|
guf_str_view sv = {.str = word.data(), .len = (ptrdiff_t)word.size()};
|
||||||
|
int32_t *res = dict_sv_i32_at(&word_cnt_dict, &sv);
|
||||||
|
TEST_CHECK(res && *res == cnt);
|
||||||
|
}
|
||||||
|
|
||||||
|
ptrdiff_t i = 0;
|
||||||
|
GUF_CNT_FOREACH(&word_cnt_dict, dict_sv_i32, kv_it) {
|
||||||
|
const dict_sv_i32_kv *kv = kv_it.ptr;
|
||||||
|
if (TEST_CHECK(kv)) {
|
||||||
|
const int32_t cnt = kv->val;
|
||||||
|
// printf("%.*s: %d\n", (int)kv->key.len, kv->key.str, cnt);
|
||||||
|
const std::string_view sv(kv->key.str, kv->key.len);
|
||||||
|
if (TEST_CHECK(word_cnt_map.contains(sv))) {
|
||||||
|
TEST_CHECK(word_cnt_map.at(sv) == cnt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
TEST_CHECK(i == dict_sv_i32_size(&word_cnt_dict));
|
||||||
|
TEST_CHECK(i == std::ssize(word_cnt_map));
|
||||||
|
|
||||||
|
// std::cout << "load fac: " << dict_sv_i32_load_factor(&word_cnt_dict) << ", cap: " << word_cnt_dict.kv_indices_cap << "\n";
|
||||||
|
// std::cout << "size: " << dict_sv_i32_size(&word_cnt_dict) << ", max probelen: " << word_cnt_dict.max_probelen << "\n";
|
||||||
|
|
||||||
|
dict_sv_i32_free(&word_cnt_dict, NULL);
|
||||||
bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size;
|
bool dbuf_null = !word_cnt_dict.kv_elems.data && !word_cnt_dict.kv_elems.allocator && !word_cnt_dict.kv_elems.capacity && !word_cnt_dict.kv_elems.size;
|
||||||
TEST_CHECK(!dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones);
|
TEST_CHECK(dbuf_null && !word_cnt_dict.kv_indices && !word_cnt_dict.kv_indices_cap && !word_cnt_dict.max_probelen && !word_cnt_dict.num_tombstones);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool load_file()
|
bool load_file()
|
||||||
|
|||||||
@ -386,5 +386,4 @@ struct UTF8Test : public Test
|
|||||||
passed = (num_failed_checks == 0);
|
passed = (num_failed_checks == 0);
|
||||||
return passed;
|
return passed;
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user