Initial commit

This commit is contained in:
jun 2025-01-03 10:38:57 +01:00
commit 0751726fc5
20 changed files with 3409 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
bin/
build/
.vscode/
**/.DS_Store
**/__pycache__

51
CMakeLists.txt Executable file
View File

@ -0,0 +1,51 @@
cmake_minimum_required(VERSION 3.12)
set(PROJECT_NAME libguf)
project(${PROJECT_NAME})
set(SOURCES src/guf_common.c src/guf_str.c src/guf_dict.c src/guf_dbuf.c src/guf_obj.c)
add_library(${PROJECT_NAME} STATIC ${SOURCES})
target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}}/lib/guf)
# target_include_directories(${PROJECT_NAME} PRIVATE src)
add_executable(libguf_test ${SOURCES} src/guf_test.c)
target_include_directories(libguf_test PRIVATE src)
set(CMAKE_C_STANDARD_REQUIRED ON)
set(CMAKE_C_EXTENSIONS OFF)
set(CMAKE_C_STANDARD 99)
if (TARGET libguf_test)
message("-- Configure libguf_test...")
set(CMAKE_DEBUG_POSTFIX _dbg)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/bin)
if (APPLE OR UNIX OR LINUX)
set(WARNING_FLAGS_C -Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef -Wmisleading-indentation -Wnull-dereference -Wswitch-default -Wno-newline-eof -Wno-unused-function -Wno-unused-parameter)
endif ()
set_target_properties(libguf_test PROPERTIES DEBUG_POSTFIX ${CMAKE_DEBUG_POSTFIX})
if (APPLE OR UNIX OR LINUX)
set(DBG_FLAGS -fsanitize=undefined,address -g3 -glldb -Og)
else ()
set(DBG_FLAGS /fsanitize=address)
endif()
target_compile_options(libguf_test PRIVATE ${WARNING_FLAGS_C} $<$<CONFIG:Debug>: ${DBG_FLAGS}>) # Note: no higher optimisations at all for debugger to work...
target_link_options(libguf_test PRIVATE ${WARNING_FLAGS_C} $<$<CONFIG:Debug>: ${DBG_FLAGS}> )
include(CheckIPOSupported)
check_ipo_supported(RESULT ipo_available)
if (ipo_available AND (CMAKE_BUILD_TYPE STREQUAL "Release" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo"))
message(STATUS "LTO enabled")
set_target_properties(${PROJECT_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(STATUS "LTO disabled")
endif()
message("-- Configured libguf_test")
endif()

190
dbuf_tests.py Normal file
View File

@ -0,0 +1,190 @@
from functools import partial
from testgen import gen_test_struct, gen_res_str
DEFAULT_N = 4
test_funs = list()
def test_push(is_str = False, n = DEFAULT_N):
buf = list()
for i in range(n):
if is_str:
buf.append("element at index " + str(i))
else:
buf.append(i)
name = ""
if is_str:
name = "str"
return gen_test_struct(f"tst_push{name}", f"guf_dbuf_test: push {name}", gen_res_str(buf))
test_funs.append(partial(test_push, False))
test_funs.append(partial(test_push, True))
def test_insert_empty_front():
buf = list()
buf.insert(0, 3141)
return gen_test_struct("tst_insert_empty_front", "guf_dbuf_test: insert empty front", gen_res_str(buf))
test_funs.append(test_insert_empty_front)
def test_insert_empty_back():
buf = list()
buf.insert(1, 3141)
return gen_test_struct("tst_insert_empty_back", "guf_dbuf_test: insert empty back", gen_res_str(buf))
test_funs.append(test_insert_empty_back)
def test_insert(is_str = False, n = DEFAULT_N):
buf = list()
for i in range(n):
if i % 7 == 0:
idx = len(buf)
elif i % 2 == 0:
idx = 1
else:
assert(len(buf) > 0)
idx = len(buf) - 1
if is_str:
buf.insert(idx, f"element at index {idx}")
else:
buf.insert(idx, i)
if is_str:
start = "pi" * 64
end = "euler" * 64
else:
start = 3141
end = 2718
buf.insert(0, start)
buf.insert(len(buf), end)
buf.insert(1, start * 2)
buf.insert(len(buf) - 1, end * 2)
name = "int"
if is_str:
name = "str"
return gen_test_struct(f"tst_insert{name}", f"guf_dbuf_test: insert {name}", gen_res_str(buf))
test_funs.append(partial(test_insert, False))
test_funs.append(partial(test_insert, True))
def test_erase(is_str = False, n = DEFAULT_N):
buf = list()
for i in range(n):
if is_str:
buf.append("element at index " + str(i))
else:
buf.append(i)
for i, elem in enumerate(buf):
if i % 2 == 0:
del elem
name = "int"
if is_str:
name = "str"
return gen_test_struct(f"tst_erase{name}", f"guf_dbuf_test: erase {name}", gen_res_str(buf))
test_funs.append(partial(test_erase, False))
test_funs.append(partial(test_erase, True))
def test_erase_all(is_str = False, n = DEFAULT_N):
buf = list()
for i in range(n):
if is_str:
buf.append("element at index " + str(i))
else:
buf.append(i)
for i, elem in enumerate(buf):
del elem
name = "int"
if is_str:
name = "str"
return gen_test_struct(f"tst_remove{name}", f"guf_dbuf_test: erase {name} all", gen_res_str(buf))
test_funs.append(partial(test_erase_all, False))
test_funs.append(partial(test_erase_all, True))
def test_pop(is_str = False, n = DEFAULT_N):
buf = list()
for i in range(n):
if is_str:
buf.append("element at index " + str(i))
else:
buf.append(i)
new_buf = list()
for i in range(len(buf)):
new_buf.append(buf.pop())
new_buf.append(len(buf))
name = "int"
if is_str:
name = "str"
return gen_test_struct(f"tst_pop{name}", f"guf_dbuf_test: pop {name}", gen_res_str(buf))
test_funs.append(partial(test_pop, False))
test_funs.append(partial(test_pop, True))
def test_front_back(is_str = False, n = DEFAULT_N):
buf = list()
new_buf = list()
for i in range(n):
if is_str:
buf.append("element at index " + str(i))
else:
buf.append(i)
if i % 2:
new_buf.append(buf[0]) # front
else:
new_buf.append(buf[-1]) # back
if is_str:
new_buf[0] = "first elem"
new_buf[-1] = "last elem"
else:
new_buf[0] = 12345
new_buf[-1] = 54321
new_buf.append(len(new_buf))
name = "int"
if is_str:
name = "str"
return gen_test_struct(f"tst_front_back{name}", f"guf_dbuf_test: front() back() {name}", gen_res_str(new_buf))
test_funs.append(partial(test_front_back, False))
test_funs.append(partial(test_front_back, True))
def test_at(is_str = False, n = DEFAULT_N):
buf = list()
for i in range(n):
if is_str:
buf.append("element at index " + str(i))
else:
buf.append(i)
new_buf = list()
for elem in reversed(buf):
new_buf.append(elem * 2)
name = "int"
if is_str:
name = "str"
return gen_test_struct(f"tst_at{name}", f"guf_dbuf_test: at() {name}", gen_res_str(new_buf))
test_funs.append(partial(test_at, False))
test_funs.append(partial(test_at, True))
def all_tests():
return test_funs

BIN
doc/guf_dict-diagram.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

3
src/guf_assert.h Normal file
View File

@ -0,0 +1,3 @@
#include <stdlib.h>
#include <stdio.h>

177
src/guf_common.c Normal file
View File

@ -0,0 +1,177 @@
#include "guf_common.h"
#include <stdlib.h>
#include <stdarg.h>
#include "guf_dict.h"
#include "guf_str.h"
bool guf_is_big_endian(void)
{
unsigned i = 1;
const char *bytes = (const char*)&i;
return bytes[0] != 1;
}
// typedef struct alloc_info {
// size_t num_alloc, num_free;
// } alloc_info;
// static bool init = false;
// static guf_dict alloc_table;
// static guf_dict pointer_cnt;
// bool guf_alloc_init(void)
// {
// alloc_table = GUF_DICT_UNINITIALISED;
// pointer_cnt = GUF_DICT_UNINITIALISED;
// guf_dict_kv_funcs alloc_info_funcs = GUF_DICT_FUNCS_NULL;
// alloc_info_funcs.type_size = sizeof(alloc_info);
// bool success = guf_dict_init(&alloc_table, 64, &GUF_DICT_FUNCS_GUF_STR, &alloc_info_funcs);
// if (!success) {
// return false;
// }
// guf_dict_kv_funcs void_ptr_funcs = GUF_DICT_FUNCS_NULL;
// void_ptr_funcs.type_size = sizeof(void*);
// guf_dict_kv_funcs size_t_funcs = GUF_DICT_FUNCS_NULL;
// size_t_funcs.type_size = sizeof(size_t);
// success = guf_dict_init(&pointer_cnt, 128, &void_ptr_funcs, &size_t_funcs);
// if (!success) {
// return false;
// }
// if (success) {
// init = true;
// }
// return success;
// }
// static void track_alloc(void *ptr, const char *name)
// {
// if (!init) {
// return;
// }
// if (guf_dict_contains_key(&pointer_cnt, &ptr)) {
// GUF_ASSERT_RELEASE(false);
// } else {
// size_t cnt = 1;
// bool succ = guf_dict_insert(&pointer_cnt, &ptr, &cnt, GUF_DICT_CPY_KEY_VAL);
// GUF_ASSERT_RELEASE(succ);
// }
// guf_str name_str = guf_str_new_view_from_cstr(name);
// if (guf_dict_contains_key(&alloc_table, &name_str)) {
// alloc_info *ai = guf_dict_get_val(&alloc_table, &name_str);
// GUF_ASSERT_RELEASE(ai);
// ai->num_alloc += 1;
// return;
// } else {
// guf_str new_str = guf_str_new(name);
// GUF_ASSERT_RELEASE(guf_str_is_valid(&new_str));
// alloc_info ai = {.num_alloc = 1, .num_free = 0};
// bool succ = guf_dict_insert(&alloc_table, &new_str, &ai, 0);
// GUF_ASSERT_RELEASE(succ);
// GUF_ASSERT(guf_dict_contains_key(&alloc_table, &name_str));
// }
// }
// static void track_free(void *ptr, const char *name)
// {
// if (!init) {
// return;
// }
// GUF_ASSERT_RELEASE(guf_dict_contains_key(&pointer_cnt, &ptr));
// size_t *cnt = guf_dict_get_val(&pointer_cnt, &ptr);
// GUF_ASSERT_RELEASE(cnt);
// if (*cnt == 0) {
// fprintf(stderr, "Double free for %s\n", name);
// GUF_ASSERT_RELEASE(false);
// } else{
// GUF_ASSERT(*cnt == 1);
// *cnt = 0;
// }
// const guf_str name_str = guf_str_new_view_from_cstr(name);
// if (guf_dict_contains_key(&alloc_table, &name_str)) {
// alloc_info *ai = guf_dict_get_val(&alloc_table, &name_str);
// GUF_ASSERT_RELEASE(ai);
// GUF_ASSERT_RELEASE(ai->num_alloc > 0);
// ai->num_free += 1;
// }
// }
// void *guf_malloc(size_t size, const char *name)
// {
// void *ptr = malloc(size);
// if (!ptr) {
// return ptr;
// }
// track_alloc(ptr, name);
// return ptr;
// }
// void *guf_calloc(size_t count, size_t size, const char *name)
// {
// void *ptr = calloc(count, size);
// if (!ptr) {
// return ptr;
// }
// track_alloc(ptr, name);
// return ptr;
// }
// void *guf_realloc(void *ptr, size_t size, const char *name)
// {
// void *new_ptr = realloc(ptr, size);
// if (!ptr) {
// return new_ptr;
// }
// track_alloc(ptr, name);
// return new_ptr;
// }
// void guf_free(void *ptr, const char *name)
// {
// if (!ptr) {
// return;
// }
// track_free(ptr, name);
// free(ptr);
// return;
// }
// void guf_alloc_print(void)
// {
// if (!init) {
// printf("guf_alloc_print: guf_alloc not initialised\n");
// return;
// }
// printf("size: %zu\n", alloc_table.size);
// for (guf_dict_iter it = guf_dict_iter_begin(&alloc_table); !guf_dict_iter_is_end(&it); guf_dict_iter_advance(&it)) {
// const guf_str *key = it.key;
// alloc_info *val = it.val;
// // printf("idx: %zu elem %zu\n", it.idx, it.elems_seen);
// printf("'%s':\n - %zu alloc(s)\n - %zu free(s)\n\n", guf_str_get_const_c_str(key), val->num_alloc, val->num_free);
// }
// }

72
src/guf_common.h Normal file
View File

@ -0,0 +1,72 @@
#ifndef GUF_COMMON_H
#define GUF_COMMON_H
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include "guf_assert.h"
#define GUF_DICT_USE_32_BIT_HASH
#ifdef GUF_DICT_USE_32_BIT_HASH
typedef uint32_t guf_hash_size_t;
#else
typedef uint64_t guf_hash_size_t;
#endif
#define GUF_ASSERT(COND) assert(COND)
#define GUF_ASSERT_RELEASE(COND) do { \
if (!(COND)) { \
fprintf(stderr, "libguf release assertion failed: " #COND ", file " __FILE__ ", line %d\n", __LINE__); \
exit(EXIT_FAILURE); \
} \
} while (0);
#define GUF_STATIC_BUF_SIZE(BUF) (sizeof((BUF)) / (sizeof((BUF)[0])))
#define GUF_ABS(X) ((X) >= 0 ? (X) : -(X))
#define GUF_MIN(X, Y) ((X) <= (Y) ? (X) : (Y))
#define GUF_MAX(X, Y) ((X) >= (Y) ? (X) : (Y))
#define GUF_CLAMP(X, MIN, MAX) GUF_MAX(GUF_MIN((X), (MAX)), (MIN))
static inline bool guf_is_mul_overflow_size_t(size_t a, size_t b)
{
size_t c = a * b;
return a != 0 && ((c / a) != b);
}
static inline size_t guf_safe_mul_size_t(size_t a, size_t b)
{
GUF_ASSERT_RELEASE(!guf_is_mul_overflow_size_t(a, b));
return a * b;
}
static inline bool guf_is_safe_size_calc(ptrdiff_t count, ptrdiff_t sizeof_elem)
{
if (count < 0 || sizeof_elem <= 0) {
return false;
}
size_t size = guf_safe_mul_size_t(count, sizeof_elem);
return size <= PTRDIFF_MAX;
}
static inline ptrdiff_t guf_safe_size_calc(ptrdiff_t count, ptrdiff_t sizeof_elem)
{
GUF_ASSERT_RELEASE(count >= 0);
GUF_ASSERT_RELEASE(sizeof_elem > 0);
size_t size = guf_safe_mul_size_t(count, sizeof_elem);
GUF_ASSERT_RELEASE(size <= PTRDIFF_MAX);
return size;
}
bool guf_is_big_endian(void);
bool guf_alloc_init(void);
void *guf_malloc(size_t size, const char *name);
void *guf_calloc(size_t count, size_t size, const char *name);
void *guf_realloc(void *ptr, size_t size, const char *name);
void guf_free(void *ptr, const char *name);
void guf_alloc_print(void);
#endif

234
src/guf_darr.h Normal file
View File

@ -0,0 +1,234 @@
#ifndef GUF_DARR_H
#define GUF_DARR_H
#include <stddef.h>
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include "guf_assert.h"
#define GUF_DARR_NEW_CAPACITY(CAP) ((CAP) * 2)
#define GUF_DARR_FOREACH(ARR, ELEM_T, ELEM_PTR) assert((ARR).capacity); for (ELEM_T *ELEM_PTR = (ARR).data, *end = (ARR).data + (ARR).size; ELEM_PTR != end; ++ELEM_PTR)
// TODO: move and copy semantics? (TYPE vs pointer to TYPE); append_val vs append_ptr
// cpy makes only sense for ptrs or ref/handle types
#define GUF_DARR_DEFINE(TYPE, TYPENAME, ELEM_CPY, ELEM_FREE) \
typedef struct guf_darr_##TYPENAME { \
TYPE *data; \
size_t size, capacity; \
TYPE (*elem_cpy)(const TYPE elem); /* Can be NULL */ \
void (*elem_free)(TYPE elem); /* Can be NULL */ \
} guf_darr_##TYPENAME; \
\
bool guf_darr_##TYPENAME##_init(guf_darr_##TYPENAME *arr, size_t start_cap) { \
assert(arr); \
if (!arr) { \
return false; \
} \
if (start_cap < 1) { \
start_cap = 1; \
} \
const size_t buf_size = start_cap * sizeof(TYPE); \
if (buf_size < start_cap) { /* Overflow */ \
return false; \
} \
arr->data = malloc(buf_size); \
if (!arr->data) { \
arr->size = arr->capacity = 0; \
return false; \
} \
arr->size = 0; \
arr->capacity = start_cap; \
arr->elem_cpy = ELEM_CPY; \
arr->elem_free = ELEM_FREE; \
return true; \
}\
\
bool guf_darr_##TYPENAME##_append(guf_darr_##TYPENAME *arr, TYPE elem) { \
bool valid = arr && arr->data && arr->capacity && arr->size <= arr->capacity; \
assert(valid); \
if (!valid) { \
return false; \
}\
if (arr->size == arr->capacity) { \
const size_t new_cap = GUF_DARR_NEW_CAPACITY(arr->capacity); \
if (new_cap <= arr->capacity) { /* Overflow */ \
return false; \
} \
const size_t buf_size = new_cap * sizeof(TYPE); \
if (buf_size < new_cap) { /* Overflow */ \
return false; \
} \
TYPE *data_new = realloc(arr->data, buf_size); \
if (!data_new) { \
return false; \
} \
arr->data = data_new; \
arr->capacity = new_cap; \
} \
assert(arr->size < arr->capacity); \
if (arr->elem_cpy) { \
arr->data[arr->size++] = arr->elem_cpy(elem); \
} else { \
arr->data[arr->size++] = elem; \
} \
return true; \
}\
\
bool guf_darr_##TYPENAME##_insert_at(guf_darr_##TYPENAME *arr, TYPE elem, size_t idx) { \
bool valid = arr && arr->data && arr->capacity && arr->size <= arr->capacity; \
assert(valid); \
if (!valid) { \
return false; \
}\
assert(idx < arr->size); \
if (idx >= arr->size) { \
return false; \
} \
assert(arr->size != 0); \
if (arr->size == arr->capacity) { \
const size_t new_cap = GUF_DARR_NEW_CAPACITY(arr->capacity); \
if (new_cap <= arr->capacity) { /* Overflow */ \
return false; \
} \
const size_t buf_size = new_cap * sizeof(TYPE); \
if (buf_size < new_cap) { /* Overflow */ \
return false; \
} \
TYPE *data_new = realloc(arr->data, buf_size); \
if (!data_new) { \
return false; \
} \
arr->data = data_new; \
arr->capacity = new_cap; \
} \
assert(arr->size < arr->capacity); \
const size_t new_last_idx = arr->size; \
for (size_t i = new_last_idx; i > idx; --i) { \
arr->data[i] = arr->data[i - 1]; \
} \
if (arr->elem_cpy) { \
arr->data[idx] = arr->elem_cpy(elem); \
} else { \
arr->data[idx] = elem; \
} \
++arr->size; \
return true; \
}\
\
void guf_darr_##TYPENAME##_pop_back(guf_darr_##TYPENAME *arr) { \
bool valid = arr && arr->data && arr->capacity && arr->size <= arr->capacity; \
assert(valid); \
if (!valid) { \
return; \
}\
if (arr->size == 0) { \
return; \
} \
if (arr->elem_free) { \
arr->elem_free(arr->data[arr->size - 1]); \
} \
--arr->size; \
}\
\
TYPE *guf_darr_##TYPENAME##_back(const guf_darr_##TYPENAME *arr) { \
bool valid = arr && arr->data && arr->capacity && arr->size <= arr->capacity; \
assert(valid); \
if (!valid) { \
return NULL; \
}\
if (arr->size == 0) { \
return NULL; \
} \
return arr->data + (arr->size - 1);\
}\
\
TYPE *guf_darr_##TYPENAME##_front(const guf_darr_##TYPENAME *arr) { \
bool valid = arr && arr->data && arr->capacity && arr->size <= arr->capacity; \
assert(valid); \
if (!valid) { \
return NULL; \
}\
if (arr->size == 0) { \
return NULL; \
} \
return arr->data + 0;\
}\
\
TYPE *guf_darr_##TYPENAME##_at(const guf_darr_##TYPENAME *arr, size_t idx) { \
bool valid = arr && arr->data && arr->capacity && arr->size <= arr->capacity; \
assert(valid); \
if (!valid) { \
return NULL; \
}\
if (idx >= arr->size) { \
return NULL; \
} \
assert(arr->size != 0); \
return arr->data + idx; \
}\
\
bool guf_darr_##TYPENAME##_erase_at(guf_darr_##TYPENAME *arr, size_t idx) { \
bool valid = arr && arr->data && arr->capacity && arr->size <= arr->capacity; \
assert(valid); \
if (!valid) { \
return false; \
}\
if (idx >= arr->size) { \
return false; \
} \
assert(arr->size != 0); \
if (arr->elem_free) { \
arr->elem_free(arr->data[idx]); \
} \
if (idx == arr->size - 1) { \
--arr->size; \
return true; \
}\
if (idx + 1 < idx) { /* Overflow */ \
return false; \
} \
for (size_t i = idx + 1; i < arr->size; ++i) { \
arr->data[i - 1] = arr->data[i]; \
} \
--arr->size; \
return true; \
}\
\
bool guf_darr_##TYPENAME##_shrink_to_fit(guf_darr_##TYPENAME *arr) { \
bool valid = arr && arr->data && arr->capacity && arr->size <= arr->capacity; \
assert(valid); \
if (!valid) { \
return false; \
}\
if (arr->size == arr->capacity) { \
return true; \
}\
const size_t new_cap = arr->size == 0 ? 1 : arr->size; \
TYPE *data_new = realloc(arr->data, sizeof(TYPE) * new_cap); \
if (!data_new) { \
return false; \
} \
arr->data = data_new; \
arr->capacity = new_cap; \
return true; \
}\
\
bool guf_darr_##TYPENAME##_free(guf_darr_##TYPENAME *arr) { \
bool valid = arr && arr->data && arr->capacity && arr->size <= arr->capacity; \
assert(valid); \
if (!valid) { \
return false; \
}\
if (arr->elem_free) { \
for (size_t i = 0; i < arr->size; ++i) { \
arr->elem_free(arr->data[i]); \
} \
} \
free(arr->data); \
arr->data = NULL; \
arr->capacity = arr->size = 0; \
return true; \
}\
#endif

260
src/guf_dbuf.c Normal file
View File

@ -0,0 +1,260 @@
#include <string.h>
#include "guf_dbuf.h"
#include "guf_common.h"
static inline bool dbuf_valid_and_not_empty(const guf_dbuf* dbuf) {
return dbuf && guf_obj_meta_sizeof_obj(dbuf->elem_meta) > 0 && dbuf->data && dbuf->capacity > 0 && dbuf->size > 0 && dbuf->size <= dbuf->capacity;
}
static inline bool dbuf_valid_and_maybe_empty(const guf_dbuf* dbuf) {
GUF_ASSERT_RELEASE((!dbuf->data && !dbuf->capacity) || (dbuf->data && dbuf->capacity));
return dbuf && guf_obj_meta_sizeof_obj(dbuf->elem_meta) > 0 && dbuf->capacity >= 0 && dbuf->size >= 0 && dbuf->size <= dbuf->capacity;
}
bool guf_dbuf_reserve(guf_dbuf *dbuf, ptrdiff_t min_capacity)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_maybe_empty(dbuf));
GUF_ASSERT_RELEASE(min_capacity >= 0);
const ptrdiff_t sizeof_elem = guf_obj_meta_sizeof_obj(dbuf->elem_meta);
if (min_capacity <= dbuf->capacity) {
return true;
}
if (!dbuf->data) {
GUF_ASSERT_RELEASE(guf_is_safe_size_calc(min_capacity, sizeof_elem));
void *data = calloc(min_capacity, sizeof_elem);
GUF_ASSERT(data);
if (!data) {
return false;
}
dbuf->data = data;
} else {
void *data = realloc(dbuf->data, guf_safe_size_calc(min_capacity, sizeof_elem));
GUF_ASSERT(data);
if (!data) {
return false;
}
dbuf->data = data;
}
dbuf->capacity = min_capacity;
return true;
}
bool guf_dbuf_init(guf_dbuf *dbuf, guf_obj_meta elem_meta, ptrdiff_t start_cap)
{
GUF_ASSERT_RELEASE(dbuf);
GUF_ASSERT_RELEASE(start_cap >= 0);
const ptrdiff_t sizeof_elem = guf_obj_meta_sizeof_obj(elem_meta);
GUF_ASSERT_RELEASE(sizeof_elem > 0);
dbuf->elem_meta = elem_meta;
dbuf->size = dbuf->capacity = 0;
if (start_cap == 0) {
dbuf->data = NULL;
return true;
}
bool success = guf_dbuf_reserve(dbuf, start_cap);
if (success) {
dbuf->capacity = start_cap;
}
GUF_ASSERT(success);
return success;
}
guf_dbuf guf_dbuf_new(guf_obj_meta elem_meta)
{
guf_dbuf dbuf = {0};
bool success = guf_dbuf_init(&dbuf, elem_meta, 0);
GUF_ASSERT_RELEASE(success);
return dbuf;
}
guf_dbuf guf_dbuf_new_with_capacity(guf_obj_meta elem_meta, ptrdiff_t capacity)
{
GUF_ASSERT_RELEASE(capacity >= 0);
guf_dbuf dbuf = {0};
bool success = guf_dbuf_init(&dbuf, elem_meta, capacity);
GUF_ASSERT_RELEASE(success);
return dbuf;
}
static inline void *get_elem(guf_dbuf *dbuf, ptrdiff_t idx)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_not_empty(dbuf));
GUF_ASSERT_RELEASE(idx >= 0);
GUF_ASSERT_RELEASE(idx < dbuf->size && idx < dbuf->capacity);
char *ptr = (char*)dbuf->data;
return ptr + guf_safe_size_calc(idx, guf_obj_meta_sizeof_obj(dbuf->elem_meta));
}
static inline ptrdiff_t next_capacity(ptrdiff_t old_cap)
{
GUF_ASSERT_RELEASE(old_cap >= 0);
size_t new_cap = 0;
if (old_cap == 0) {
new_cap = GUF_DBUF_INITIAL_CAP;
} else if (old_cap < 8) {
new_cap = (size_t)old_cap * 2ull;
} else {
new_cap = (size_t)old_cap * 3ull / 2ull;
}
GUF_ASSERT_RELEASE(new_cap > (size_t)old_cap); // Fail on overflow.
GUF_ASSERT_RELEASE(new_cap <= PTRDIFF_MAX);
return new_cap;
}
static inline bool grow_if_full(guf_dbuf *dbuf)
{
GUF_ASSERT_RELEASE(dbuf->capacity >= 0 && dbuf->size >= 0);
if (dbuf->size == dbuf->capacity) {
bool success = guf_dbuf_reserve(dbuf, next_capacity(dbuf->capacity));
if (!success) {
return false;
}
}
GUF_ASSERT_RELEASE(dbuf->size < dbuf->capacity);
return true;
}
static inline void *cpy_to(guf_dbuf *dbuf, ptrdiff_t idx, void *elem, guf_obj_cpy_opt cpy_opt)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_not_empty(dbuf));
GUF_ASSERT_RELEASE(elem);
GUF_ASSERT_RELEASE(idx >= 0 && idx < dbuf->capacity && idx < dbuf->size);
void *dst = get_elem(dbuf, idx);
dst = guf_cpy(dst, elem, dbuf->elem_meta, cpy_opt);
GUF_ASSERT_RELEASE(dst);
return dst;
}
void *guf_dbuf_push(guf_dbuf *dbuf, void *elem, guf_obj_cpy_opt cpy_opt)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_maybe_empty(dbuf));
bool success = grow_if_full(dbuf);
GUF_ASSERT(success);
if (!success) {
return NULL;
}
return cpy_to(dbuf, dbuf->size++, elem, cpy_opt);
}
void *guf_dbuf_insert(guf_dbuf *dbuf, void *elem, ptrdiff_t idx, guf_obj_cpy_opt cpy_opt)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_maybe_empty(dbuf));
GUF_ASSERT_RELEASE(idx >= 0 && idx <= dbuf->size);
if (idx == dbuf->size) {
return guf_dbuf_push(dbuf, elem, cpy_opt);
}
GUF_ASSERT_RELEASE(idx < dbuf->size);
bool success = grow_if_full(dbuf);
GUF_ASSERT(success);
if (!success) {
return NULL;
}
for (ptrdiff_t free_idx = dbuf->size++; free_idx > idx; --free_idx) {
void *dst = get_elem(dbuf, free_idx);
void *src = get_elem(dbuf, free_idx - 1);
guf_cpy(dst, src, dbuf->elem_meta, GUF_CPY_VALUE);
}
return cpy_to(dbuf, idx, elem, cpy_opt);
}
void guf_dbuf_erase(guf_dbuf *dbuf, ptrdiff_t idx)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_not_empty(dbuf));
GUF_ASSERT_RELEASE(idx >= 0);
GUF_ASSERT_RELEASE(idx < dbuf->size);
void *to_erase = get_elem(dbuf, idx);
if (dbuf->elem_meta.has_ops && dbuf->elem_meta.data.ops->free) {
dbuf->elem_meta.data.ops->free(to_erase);
}
for (ptrdiff_t free_idx = idx; free_idx < dbuf->size - 1; ++free_idx) {
void *dst = get_elem(dbuf, free_idx);
void *src = get_elem(dbuf, free_idx + 1);
guf_cpy(dst, src, dbuf->elem_meta, GUF_CPY_VALUE);
}
}
void *guf_dbuf_pop(guf_dbuf *dbuf)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_not_empty(dbuf));
void *popped = get_elem(dbuf, dbuf->size - 1);
dbuf->size -= 1;
return popped;
}
void *guf_dbuf_at(guf_dbuf *dbuf, ptrdiff_t idx)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_not_empty(dbuf));
GUF_ASSERT_RELEASE(idx >= 0 && idx < dbuf->size)
return get_elem(dbuf, idx);
}
void *guf_dbuf_front(guf_dbuf *dbuf)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_not_empty(dbuf));
return get_elem(dbuf, 0);
}
void *guf_dbuf_back(guf_dbuf *dbuf)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_not_empty(dbuf));
return get_elem(dbuf, (ptrdiff_t)dbuf->size - 1);
}
bool guf_dbuf_shrink_to_fit(guf_dbuf *dbuf)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_maybe_empty(dbuf));
const ptrdiff_t new_capacity = dbuf->size;
if (new_capacity == dbuf->capacity) {
return true;
}
GUF_ASSERT_RELEASE(dbuf->data);
void *data = realloc(dbuf->data, guf_safe_size_calc(new_capacity, guf_obj_meta_sizeof_obj(dbuf->elem_meta)));
GUF_ASSERT(data);
if (!data) {
return false;
}
dbuf->data = data;
dbuf->capacity = new_capacity;
return true;
}
void guf_dbuf_free(guf_dbuf *dbuf)
{
GUF_ASSERT_RELEASE(dbuf_valid_and_maybe_empty(dbuf));
if (dbuf->capacity == 0) {
GUF_ASSERT_RELEASE(!dbuf->data);
GUF_ASSERT_RELEASE(dbuf->size == 0);
return;
}
GUF_ASSERT_RELEASE(dbuf->data);
if (dbuf->elem_meta.has_ops && dbuf->elem_meta.data.ops->free) {
for (ptrdiff_t idx = 0; idx < dbuf->size; ++idx) {
// printf("freeing %s\n",*(char**)get_elem(dbuf, idx));
dbuf->elem_meta.data.ops->free(get_elem(dbuf, idx));
}
}
free(dbuf->data);
dbuf->data = NULL;
dbuf->capacity = dbuf->size = 0;
}

81
src/guf_dbuf.h Normal file
View File

@ -0,0 +1,81 @@
#ifndef GUF_DBUF_H
#define GUF_DBUF_H
#include <stddef.h>
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include "guf_assert.h"
#include "guf_obj.h"
// Used for the first growth if dbuf->capacity is zero.
#define GUF_DBUF_INITIAL_CAP 8
typedef struct guf_dbuf {
bool is_init;
void *data;
ptrdiff_t size, capacity;
guf_obj_meta elem_meta;
} guf_dbuf;
bool guf_dbuf_init(guf_dbuf *dbuf, guf_obj_meta elem_meta, ptrdiff_t start_cap);
guf_dbuf guf_dbuf_new_with_capacity(guf_obj_meta elem_meta, ptrdiff_t start_cap);
guf_dbuf guf_dbuf_new(guf_obj_meta elem_meta);
void guf_dbuf_free(guf_dbuf *dbuf);
bool guf_dbuf_reserve(guf_dbuf *dbuf, ptrdiff_t min_capacity);
bool guf_dbuf_shrink_to_fit(guf_dbuf *dbuf);
void *guf_dbuf_push(guf_dbuf *dbuf, void *elem, guf_obj_cpy_opt cpy_opt);
void *guf_dbuf_insert(guf_dbuf *dbuf, void *elem, ptrdiff_t idx, guf_obj_cpy_opt cpy_opt);
void guf_dbuf_erase(guf_dbuf *dbuf, ptrdiff_t idx);
void *guf_dbuf_pop(guf_dbuf *dbuf);
void *guf_dbuf_at(guf_dbuf *dbuf, ptrdiff_t idx);
void *guf_dbuf_front(guf_dbuf *dbuf);
void *guf_dbuf_back(guf_dbuf *dbuf);
void guf_dbuf_sort(guf_dbuf *dbuf, void (*cmp)(const void *a, const void *b));
void guf_dbuf_ascending(guf_dbuf *dbuf);
void guf_dbuf_descending(guf_dbuf *dbuf);
// Convenience macros:
#define GUF_DBUF_NEW(elem_type) guf_dbuf_new((guf_obj_meta){.has_ops = false, .data.sizeof_obj = sizeof(elem_type)})
#define GUF_DBUF_NEW_WITH_CAP(elem_type, capacity) guf_dbuf_new_with_capacity((guf_obj_meta){.has_ops = false, .data.sizeof_obj = sizeof(elem_type)}, capacity)
// #define GUF_DBUF_NEW_FROM_OPS(obj_ops_ptr) guf_dbuf_new((guf_obj_meta){.has_ops = true, .data = obj_ops_ptr})
// #define GUF_DBUF_NEW_FROM_OPS_WITH_CAP(obj_ops_ptr, capacity) guf_dbuf_new_with_capacity(-1, (obj_ops_ptr), (capacity))
#define GUF_DBUF_PUSH_VAL(dbuf_ptr, elem_type, elem_val) do { \
elem_type tmp_lvalue = elem_val; \
void *res = guf_dbuf_push(dbuf_ptr, &tmp_lvalue, GUF_CPY_VALUE); \
GUF_ASSERT_RELEASE(res); \
} while (0); \
#define GUF_DBUF_PUSH_VAL_CPY(dbuf_ptr, elem_type, elem_val) do { \
elem_type tmp_lvalue = elem_val; \
void *res = guf_dbuf_push(dbuf_ptr, &tmp_lvalue, GUF_CPY_DEEP); \
GUF_ASSERT_RELEASE(res); \
} while (0); \
#define GUF_DBUF_TRY_PUSH_VAL(dbuf_ptr, elem_type, elem_val, success_bool_name) do { \
elem_type tmp_lvalue = elem_val; \
void *res = guf_dbuf_push(dbuf_ptr, &tmp_lvalue, GUF_CPY_VALUE); \
success_bool_name = res != NULL; \
} while (0); \
#define GUF_DBUF_TRY_PUSH_VAL_CPY(dbuf_ptr, elem_type, elem_val, success_bool_name); do { \
elem_type tmp_lvalue = elem_val; \
void *res = guf_dbuf_push(dbuf_ptr, &tmp_lvalue, GUF_CPY_DEEP); \
success_bool_name = res != NULL; \
} while (0); \
#define GUF_DBUF_AT_VAL(dbuf_ptr, elem_type, idx) *(elem_type*)guf_dbuf_at(dbuf_ptr, idx)
#define GUF_DBUF_POP_VAL(dbuf_ptr, elem_type) *(elem_type*)guf_dbuf_pop(dbuf_ptr)
#define GUF_DBUF_LAST_VAL(dbuf_ptr, elem_type) *(elem_type*)guf_dbuf_back(dbuf_ptr)
#define GUF_DBUF_FIRST_VAL(dbuf_ptr, elem_type) *(elem_type*)guf_dbuf_front(dbuf_ptr)
#define GUF_DBUF_FOREACH(DBUF, ELEM_TYPE, ELEM_PTR_NAME) for (ELEM_TYPE *ELEM_PTR_NAME = (ELEM_TYPE*)(DBUF).data, *end = ((ELEM_TYPE*)(DBUF).data) + (DBUF).size; ELEM_PTR_NAME != end; ++ELEM_PTR_NAME)
#endif

649
src/guf_dict.c Executable file
View File

@ -0,0 +1,649 @@
// #include <stdint.h>
// #include <stdbool.h>
// #include <stdlib.h>
// #include <string.h>
// #include <stdio.h>
// #include "guf_common.h"
// #include "guf_dict.h"
// /*
// FNV-1a (32 bit) hash function.
// Generally, you should always call csr_hash with GUF_HASH_INIT as the hash argument, unless you want to create "chains" of hashes.
// cf. http://www.isthe.com/chongo/tech/comp/fnv/ (last retrieved: 2023-11-30)
// */
// uint32_t guf_hash32(const void *data, size_t num_bytes, uint32_t hash)
// {
// GUF_ASSERT_RELEASE(data);
// const unsigned char *data_bytes = (const unsigned char*)data; // This does not break strict-aliasing rules I think...
// const uint32_t FNV_32_PRIME = 16777619ul;
// for (size_t i = 0; i < num_bytes; ++i) {
// hash ^= data_bytes[i];
// hash *= FNV_32_PRIME;
// }
// return hash;
// }
// uint64_t guf_hash64(const void *data, size_t num_bytes, uint64_t hash)
// {
// GUF_ASSERT_RELEASE(data);
// const unsigned char *data_bytes = (const unsigned char*)data; // This does not break strict-aliasing rules I think...
// const uint64_t FNV_64_PRIME = 1099511628211ull;
// for (size_t i = 0; i < num_bytes; ++i) {
// hash ^= data_bytes[i];
// hash *= FNV_64_PRIME;
// }
// return hash;
// }
// static inline size_t find_next_power_of_two(size_t num)
// {
// GUF_ASSERT_RELEASE(num > 0);
// size_t pof2 = 1;
// while (pof2 < num) {
// GUF_ASSERT_RELEASE(pof2 * 2 > pof2);
// pof2 *= 2;
// }
// return pof2;
// }
// static const guf_dict_kv_id KV_ID_NULL = GUF_DICT_HASH_MAX;
// static const guf_dict_kv_id KV_ID_TOMBSTONE = GUF_DICT_HASH_MAX - 1;
// static const guf_dict_kv_id KV_ID_MAX = GUF_DICT_HASH_MAX - 2;
// const guf_dict GUF_DICT_UNINITIALISED = {
// .capacity_kv_status = 0,
// .size = 0,
// .num_tombstones = 0,
// .keys = NULL,
// .vals = NULL,
// .val_funcs = {.eq = NULL, .hash = NULL, .cpy = NULL, .move = NULL, .free = NULL, .type_size = 0},
// .kv_status = NULL,
// .probe_t = 0,
// .max_load_fac_fx10 = 0,
// .max_probelen = 0,
// };
// static inline void *cpy_key(guf_dict *ht, void *dst, const void *src, bool default_cpy)
// {
// if (default_cpy || ht->key_funcs.cpy == NULL) { // Default copy.
// return memcpy(dst, src, ht->key_funcs.type_size);
// } else {
// return ht->key_funcs.cpy(dst, src);
// }
// }
// static inline void *cpy_val(guf_dict *ht, void *dst, const void *src, bool default_cpy)
// {
// if (dst == NULL || src == NULL) {
// GUF_ASSERT_RELEASE(ht->val_funcs.type_size == 0);
// return NULL;
// }
// if (default_cpy || ht->val_funcs.cpy == NULL) { // Default copy.
// return memcpy(dst, src, ht->val_funcs.type_size);
// } else {
// return ht->val_funcs.cpy(dst, src);
// }
// }
// static inline void *cpy_or_move_val(guf_dict *ht, void *dst, void *src, guf_dict_insert_opt opts)
// {
// if (dst == NULL || src == NULL) {
// GUF_ASSERT_RELEASE(ht->val_funcs.type_size == 0);
// return NULL;
// }
// if ((opts & GUF_DICT_MOVE_VAL)) {
// GUF_ASSERT_RELEASE(ht->val_funcs.move);
// return ht->val_funcs.move(dst, src);
// } else { // Default copy.
// return cpy_val(ht, dst, src, false);
// }
// }
// static inline void *cpy_or_move_key(guf_dict *ht, void *dst, void *src, guf_dict_insert_opt opts)
// {
// if ((opts & GUF_DICT_MOVE_KEY)) {
// GUF_ASSERT_RELEASE(ht->key_funcs.move);
// return ht->key_funcs.move(dst, src);
// } else {
// return cpy_key(ht, dst, src, false);
// }
// }
// static inline guf_hash_size_t key_hash(const guf_dict *ht, const void *key)
// {
// if (ht->key_funcs.hash) {
// return ht->key_funcs.hash(key);
// } else { // Default hash function.
// return guf_hash(key, ht->key_funcs.type_size, GUF_HASH_INIT);
// }
// }
// static inline bool kv_stat_occupied(guf_dict_kv_status kv_stat) {
// return kv_stat.kv_id != KV_ID_NULL && kv_stat.kv_id != KV_ID_TOMBSTONE;
// }
// static void *key_get(guf_dict *ht, guf_dict_kv_id idx)
// {
// GUF_ASSERT_RELEASE(idx <= KV_ID_MAX);
// char *ptr = (char*)ht->keys;
// return ptr + idx * ht->key_funcs.type_size;
// }
// static void *val_get(guf_dict *ht, guf_dict_kv_id idx)
// {
// if (ht->val_funcs.type_size == 0) {
// return NULL;
// }
// GUF_ASSERT_RELEASE(idx <= KV_ID_MAX);
// char *ptr = (char*)ht->vals;
// return ptr + idx * ht->val_funcs.type_size ;
// }
// static inline bool key_eq(guf_dict *ht, const void *key_a, const void *key_b)
// {
// if (ht->key_funcs.eq) {
// return ht->key_funcs.eq(key_a, key_b);
// } else { // Default equality function.
// return 0 == memcmp(key_a, key_b, ht->key_funcs.type_size);
// }
// }
// static inline bool key_eq_at(guf_dict *ht, size_t idx, const void *key, guf_hash_size_t hash_of_key)
// {
// GUF_ASSERT(idx < ht->capacity_kv_status);
// GUF_ASSERT(kv_stat_occupied(ht->kv_status[idx]));
// if (ht->kv_status[idx].k_hash == hash_of_key) { // Hashes match -> we check if the keys are actually equal.
// return key_eq(ht, key_get(ht, ht->kv_status[idx].kv_id), key);
// } else {
// // Hashes don't match -> early exit (we save a memory load from ht->kv_elems).
// return false;
// }
// }
// static inline size_t probe_offset(size_t probe_len, guf_dict_probe_type probe_t)
// {
// GUF_ASSERT(probe_len > 0);
// switch (probe_t)
// {
// case GUF_DICT_PROBE_LINEAR:
// default:
// return 1;
// case GUF_DICT_PROBE_QUADRATIC:
// /*
// Guaranteed to visit each index once for capacities which are powers of two.
// cf. https://fgiesen.wordpress.com/2015/02/22/triangular-numbers-mod-2n/ (last-retrieved 2024-07-29)
// */
// return probe_len * (probe_len + 1) / 2; // 1, 3, 6, 10, 15, ... (starting from probe_len == 1)
// }
// }
// static inline size_t mod_pow2(size_t a, size_t b) // a mod b (with b being a power of two.)
// {
// GUF_ASSERT(b > 0);
// return a & (b - 1);
// }
// static size_t find_idx(guf_dict *ht, const void *key, bool *key_exists, bool find_first_free)
// {
// const guf_hash_size_t hash = key_hash(ht, key);
// size_t idx = mod_pow2(hash, ht->capacity_kv_status); // hash % ht->capacity
// const size_t start_idx = idx;
// size_t probe_len = 1;
// size_t first_tombstone_idx = SIZE_MAX;
// do {
// if (ht->kv_status[idx].kv_id == KV_ID_NULL) { // 1.) Empty.
// if (first_tombstone_idx != SIZE_MAX) {
// idx = first_tombstone_idx;
// }
// ht->max_probelen = GUF_MAX(probe_len, ht->max_probelen);
// GUF_ASSERT(!kv_stat_occupied(ht->kv_status[idx]));
// *key_exists = false;
// return idx;
// } else if (ht->kv_status[idx].kv_id == KV_ID_TOMBSTONE) { // 2.) Tombstone.
// if (first_tombstone_idx == SIZE_MAX) {
// first_tombstone_idx = idx;
// }
// if (find_first_free) {
// goto end;
// } else {
// goto probe;
// }
// } else if (key_eq_at(ht, idx, key, hash)) { // 3.) Key already exists.
// ht->max_probelen = GUF_MAX(probe_len, ht->max_probelen);
// GUF_ASSERT(kv_stat_occupied(ht->kv_status[idx]));
// *key_exists = true;
// return idx;
// } else { // 4.) Have to probe due to hash-collision (idx is already occupied, but not by the key).
// probe:
// idx = mod_pow2(idx + probe_offset(probe_len, ht->probe_t), ht->capacity_kv_status);
// ++probe_len;
// GUF_ASSERT_RELEASE(probe_len < UINT32_MAX);
// }
// } while (idx != start_idx);
// end:
// if (first_tombstone_idx != SIZE_MAX) { // Edge case: No empty slots, but found tombstone.
// ht->max_probelen = GUF_MAX(probe_len, ht->max_probelen);
// GUF_ASSERT(!kv_stat_occupied(ht->kv_status[first_tombstone_idx]));
// *key_exists = false;
// return first_tombstone_idx;
// }
// *key_exists = false;
// return SIZE_MAX; // Failed to find an idx.
// }
// static void insert_kv(guf_dict *ht, void *key, void *val, size_t idx, guf_dict_insert_opt opts, bool default_cpy_key, bool default_cpy_val)
// {
// GUF_ASSERT_RELEASE(idx < ht->capacity_kv_status);
// GUF_ASSERT_RELEASE(ht->kv_status[idx].kv_id == KV_ID_NULL || ht->kv_status[idx].kv_id == KV_ID_TOMBSTONE);
// GUF_ASSERT_RELEASE(!kv_stat_occupied(ht->kv_status[idx]));
// if (!default_cpy_key) {
// if (!cpy_or_move_key(ht, key_get(ht, idx), key, opts)) {
// cpy_key(ht, key_get(ht, idx), key, true);
// }
// } else {
// cpy_key(ht, key_get(ht, idx), key, true);
// }
// if (!default_cpy_val) {
// if (!cpy_or_move_val(ht, val_get(ht, idx), val, opts)) {
// cpy_val(ht, val_get(ht, idx), val, true);
// }
// } else {
// cpy_val(ht, val_get(ht, idx), val, true);
// }
// if (ht->kv_status[idx].kv_id == KV_ID_TOMBSTONE) {
// GUF_ASSERT_RELEASE(ht->num_tombstones > 0);
// --ht->num_tombstones;
// }
// ht->kv_status[idx].k_hash = key_hash(ht, key_get(ht, idx));
// ++ht->size;
// }
// static void update_v(guf_dict *ht, void *val, size_t idx, guf_dict_insert_opt opts)
// {
// GUF_ASSERT_RELEASE(idx < ht->capacity_kv_status);
// GUF_ASSERT_RELEASE(kv_stat_occupied(ht->kv_status[idx]));
// if (ht->val_funcs.free) {
// ht->val_funcs.free(val_get(ht, idx));
// }
// cpy_or_move_val(ht, val_get(ht, idx), val, opts);
// }
// bool guf_dict_init(guf_dict *ht, size_t start_capacity, const guf_dict_kv_funcs *key_funcs, const guf_dict_kv_funcs *val_funcs)
// {
// GUF_ASSERT_RELEASE(ht);
// GUF_ASSERT_RELEASE(ht->capacity_kv_status == 0 && ht->size == 0 && ht->num_tombstones == 0 && ht->max_probelen == 0);
// GUF_ASSERT_RELEASE(ht->keys == NULL && ht->vals == NULL);
// GUF_ASSERT_RELEASE(key_funcs && val_funcs);
// ht->key_funcs = *key_funcs;
// ht->val_funcs = *val_funcs;
// if (val_funcs->type_size == 0) {
// // TODO: is a set!
// }
// if (start_capacity < 1) {
// start_capacity = 1;
// }
// ht->capacity_kv_status = find_next_power_of_two(start_capacity);
// ht->size = ht->num_tombstones = 0;
// ht->max_probelen = 0;
// GUF_ASSERT_RELEASE(ht->key_funcs.type_size > 0);
// // GUF_ASSERT_RELEASE(ht->val_funcs.type_size > 0);
// ht->probe_t = GUF_DICT_PROBE_QUADRATIC;
// ht->max_load_fac_fx10 = GUF_DICT_MAX_LOAD_FAC_FX10_DEFAULT;
// GUF_ASSERT_RELEASE(ht->max_load_fac_fx10 != 0 && ht->max_load_fac_fx10 <= 1024);
// ht->keys = calloc(ht->capacity_kv_status, ht->val_funcs.type_size);
// if (!ht->keys) {
// return false;
// }
// ht->vals = NULL;
// if (ht->val_funcs.type_size > 0) {
// ht->vals = calloc(ht->capacity_kv_status, ht->val_funcs.type_size);
// if (!ht->vals) {
// free(ht->keys);
// return false;
// }
// }
// ht->kv_status = calloc(ht->capacity_kv_status, sizeof(uint8_t));
// if (!ht->kv_status) {
// free(ht->keys);
// free(ht->vals);
// return false;
// }
// for (size_t i = 0; i < ht->capacity_kv_status; ++i) {
// GUF_ASSERT(ht->kv_status[i].kv_id == KV_ID_NULL);
// }
// return ht;
// }
// bool guf_dict_insert(guf_dict *ht, void *key, void *val, guf_dict_insert_opt opts)
// {
// GUF_ASSERT_RELEASE(ht);
// GUF_ASSERT_RELEASE(ht->capacity_kv_status > 0 && ht->size <= ht->capacity_kv_status);
// GUF_ASSERT_RELEASE(ht->keys && ht->vals && ht->kv_status);
// if ((opts & GUF_DICT_MOVE_KEY) && ht->key_funcs.move == NULL) {
// // Ignore -Wunused-value.
// fprintf(stderr, "guf_dict_insert: key_funcs.move is NULL while GUF_DICT_MOVE_KEY is set\n");
// GUF_ASSERT(false);
// return false;
// }
// if ((opts & GUF_DICT_MOVE_VAL) && ht->val_funcs.move == NULL) {
// // Ignore -Wunused-value.
// fprintf(stderr, "guf_dict_insert: val_funcs.move is NULL while GUF_DICT_MOVE_VAL is set\n");
// GUF_ASSERT(false);
// return false;
// }
// if (guf_dict_load_factor_fx10(ht) > ht->max_load_fac_fx10 || ht->capacity_kv_status == ht->size) { // Handle growth:
// const size_t new_cap = 2 * ht->capacity_kv_status; // TODO: Limit to MAX_CAPACITY
// bool overflow = new_cap <= ht->capacity_kv_status;
// GUF_ASSERT(!overflow);
// if (overflow) {
// return false;
// }
// void *new_keys = calloc(new_cap, ht->key_funcs.type_size);
// if (!new_keys) {
// return false;
// }
// void *new_vals = NULL;
// if (ht->val_funcs.type_size > 0) {
// new_vals = calloc(new_cap, ht->val_funcs.type_size);
// if (!new_vals) {
// free(new_keys);
// return false;
// }
// } else {
// GUF_ASSERT_RELEASE(ht->vals == NULL);
// }
// guf_dict_kv_status *new_kv_status = calloc(new_cap, sizeof(guf_dict_kv_status)); // No realloc here!
// if (!new_kv_status) {
// free(new_keys);
// free(new_vals);
// return false;
// }
// for (size_t i = 0; i < new_cap; ++i) {
// new_kv_status[i].kv_id = KV_ID_NULL;
// new_kv_status[i].k_hash = 0;
// }
// guf_dict ht_new = *ht;
// ht_new.kv_status = new_kv_status;
// ht_new.keys = new_keys;
// ht_new.vals = new_vals;
// ht_new.size = 0;
// ht_new.capacity_kv_status = new_cap;
// ht_new.max_probelen = 0;
// size_t new_size = 0;
// for (size_t i = 0; i < ht->capacity_kv_status; ++i) {
// if (kv_stat_occupied(ht->kv_status[i])) {
// bool key_exists = false;
// const size_t new_idx = find_idx(&ht_new, key_get(ht, i), &key_exists, true);
// GUF_ASSERT_RELEASE(new_idx != SIZE_MAX);
// GUF_ASSERT_RELEASE(!key_exists);
// bool dumb_copy_key = ht->key_funcs.move == NULL;
// bool dumb_copy_val = ht->val_funcs.move == NULL;
// insert_kv(&ht_new, key_get(ht, i), val_get(ht, i), new_idx, GUF_DICT_MOVE_KEY | GUF_DICT_MOVE_VAL, dumb_copy_key, dumb_copy_val);
// ++new_size;
// }
// }
// GUF_ASSERT_RELEASE(new_size == ht->size);
// free(ht->kv_status);
// free(ht->keys);
// free(ht->vals);
// *ht = ht_new;
// }
// bool key_exists = false;
// const size_t idx = find_idx(ht, key, &key_exists, true);
// GUF_ASSERT_RELEASE(idx != SIZE_MAX);
// GUF_ASSERT_RELEASE(!kv_stat_occupied(ht->kv_status[idx]));
// if (key_exists) {
// update_v(ht, val, idx, opts);
// } else {
// insert_kv(ht, key, val, idx, opts, false, false);
// }
// return true;
// }
// bool guf_dict_contains_key(const guf_dict *ht, const void *key)
// {
// GUF_ASSERT_RELEASE(ht);
// GUF_ASSERT_RELEASE(ht->capacity_kv_status > 0 && ht->size <= ht->capacity_kv_status);
// GUF_ASSERT_RELEASE (ht->keys && ht->vals && ht->kv_status);
// bool key_exists = false;
// const size_t idx = find_idx((guf_dict*)ht, key, &key_exists, false); // TODO: const cast
// GUF_ASSERT_RELEASE(!(key_exists && idx == SIZE_MAX));
// return key_exists;
// }
// void *guf_dict_get_val(guf_dict *ht, const void *key)
// {
// GUF_ASSERT_RELEASE(ht);
// GUF_ASSERT_RELEASE(ht->capacity_kv_status > 0 && ht->size <= ht->capacity_kv_status);
// GUF_ASSERT_RELEASE(ht->keys && ht->vals && ht->kv_status);
// if (ht->capacity_kv_status == 0 || ht->size == 0) {
// return NULL;
// }
// bool key_exists = false;
// const size_t idx = find_idx(ht, key, &key_exists, false);
// GUF_ASSERT_RELEASE(idx != SIZE_MAX);
// if (!key_exists) {
// return NULL;
// } else {
// GUF_ASSERT(kv_stat_occupied(ht->kv_status[idx]));
// return val_get(ht, idx);
// }
// }
// bool guf_dict_remove(guf_dict *ht, const void *key)
// {
// GUF_ASSERT_RELEASE(ht);
// GUF_ASSERT_RELEASE(ht->capacity_kv_status > 0 && ht->size <= ht->capacity_kv_status);
// GUF_ASSERT_RELEASE(ht->keys && ht->vals && ht->kv_status);
// if (ht->size == 0) {
// return false;
// }
// bool key_exists = false;
// const size_t idx = find_idx(ht, key, &key_exists, false);
// if (!key_exists) {
// return false;
// }
// if (ht->key_funcs.free) {
// ht->key_funcs.free(key_get(ht, idx));
// }
// if (ht->val_funcs.free) {
// ht->val_funcs.free(val_get(ht, idx));
// }
// ht->kv_status[idx].kv_id = KV_ID_TOMBSTONE;
// --ht->size;
// ++ht->num_tombstones;
// GUF_ASSERT(ht->size + ht->num_tombstones <= ht->capacity_kv_status);
// return true;
// }
// uint32_t guf_dict_load_factor_fx10(const guf_dict *ht)
// {
// const uint64_t fx10_scale = 1024; // 2^10 (represents 1 in fx10 fixed point format).
// GUF_ASSERT_RELEASE(ht->capacity_kv_status >= ht->size);
// if (ht->capacity_kv_status == 0) {
// return 0;
// }
// size_t size = ht->size + ht->num_tombstones;
// // <=> size * fx_scale * fx_scale) / (ht->capacity * fx_scale)
// GUF_ASSERT(size <= ht->capacity_kv_status);
// uint64_t load_fac = (size * fx10_scale) / (uint64_t)(ht->capacity_kv_status);
// GUF_ASSERT_RELEASE(load_fac <= fx10_scale);
// return (uint32_t)load_fac;
// }
// double guf_dict_load_factor_double(const guf_dict *ht)
// {
// GUF_ASSERT_RELEASE(ht->capacity_kv_status >= ht->size);
// if (ht->capacity_kv_status == 0 || ht->capacity_kv_status == 0) {
// return 0.f;
// }
// size_t size = ht->size + ht->num_tombstones;
// GUF_ASSERT(size <= ht->capacity_kv_status);
// return size / (double)(ht->capacity_kv_status);
// }
// void guf_dict_free(guf_dict *ht)
// {
// if (!ht) {
// return;
// }
// if (!ht->kv_status && !ht->keys && !ht->vals && ht->size == 0) {
// return;
// }
// for (size_t i = 0; i < ht->size; ++i) { // TODO: ht->size, not ht->capacity ?
// if (ht->keys && ht->key_funcs.free) {
// ht->key_funcs.free(key_get(ht, i));
// }
// if (ht->vals && ht->val_funcs.free) {
// ht->val_funcs.free(val_get(ht, i));
// }
// }
// free(ht->keys);
// free(ht->vals);
// free(ht->kv_status);
// ht->keys = NULL;
// ht->vals = NULL;
// ht->kv_status = NULL;
// ht->capacity_kv_status = ht->size = ht->num_tombstones = ht->max_probelen = 0;
// *ht = GUF_DICT_UNINITIALISED;
// }
// guf_dict_iter guf_dict_iter_begin(guf_dict *ht)
// {
// guf_dict_iter iter = {.elems_seen = ht->size + 1, .idx = 0, .ht = ht, .key = NULL, .val = NULL};
// if (ht->size == 0) {
// return iter; // end iter
// }
// for (size_t idx = 0; idx < ht->capacity_kv_status; ++idx) {
// if (kv_stat_occupied(ht->kv_status[idx])) {
// iter.idx = idx;
// iter.elems_seen = 1;
// iter.key = key_get(iter.ht, iter.idx);
// iter.val = val_get(iter.ht, iter.idx);
// GUF_ASSERT_RELEASE(iter.key != NULL);
// return iter;
// }
// }
// return iter; // end iter
// }
// bool guf_dict_iter_is_end(guf_dict_iter *iter)
// {
// return iter->elems_seen == iter->ht->size + 1;
// }
// void guf_dict_iter_advance(guf_dict_iter *iter)
// {
// if (guf_dict_iter_is_end(iter)) {
// return;
// }
// if (iter->elems_seen == iter->ht->size) {
// ++iter->elems_seen;
// iter->key = NULL;
// iter->val = NULL;
// return;
// }
// GUF_ASSERT_RELEASE(iter->elems_seen < iter->ht->size);
// GUF_ASSERT_RELEASE(iter->idx < iter->ht->capacity_kv_status);
// GUF_ASSERT_RELEASE(iter->key);
// for (size_t idx = iter->idx + 1; idx < iter->ht->capacity_kv_status; ++idx) {
// if (kv_stat_occupied(iter->ht->kv_status[idx])) {
// iter->idx = idx;
// iter->key = key_get(iter->ht, iter->idx);
// iter->val = val_get(iter->ht, iter->idx);
// ++iter->elems_seen;
// return;
// }
// }
// GUF_ASSERT_RELEASE(false);
// iter->elems_seen = iter->ht->size + 1;
// }
// /*
// Removal of keys without tombstones (only would work for linear probing I think).
// cf. https://stackoverflow.com/questions/9127207/hash-table-why-deletion-is-difficult-in-open-addressing-scheme/24886657#24886657 (last-retrieved 2024-07-26)
// The following del function from https://github.com/attractivechaos/klib/blob/6f73c80c6409d6f91cdf66ec1a002177274da2e7/cpp/khashl.hpp#L142-L150 (last-retrieved 2024-07-26)
// int del(khint_t i) {
// khint_t j = i, k, mask, nb = n_buckets();
// if (keys == 0 || i >= nb) return 0;
// mask = nb - khint_t(1);
// while (1) {
// j = (j + khint_t(1)) & mask;
// if (j == i || !__kh_used(used, j)) break; //j==i only when the table is completely full
// k = __kh_h2b(Hash()(keys[j]), bits);
// if (k <= i || k > j)
// keys[i] = keys[j], i = j;
// }
// __kh_set_unused(used, i);
// --count;
// return 1;
// }
// cf. https://en.wikipedia.org/w/index.php?title=Hash_table&oldid=95275577 (last-retrieved 2024-07-26)
// Note:
// - For all records in a cluster, there must be no vacant slots between their natural hash position
// and their current position (else lookups will terminate before finding the record).
// - i is a vacant slot that might be invalidating this property for subsequent records in the cluster.
// - j is such a subsequent record.
// - k is the raw hash where the record at j would naturally land in the hash table if there were no collisions.
// - This test is asking if the record at j is invalidly positioned with respect
// to the required properties of a cluster now that i is vacant.
// */

111
src/guf_dict.h Executable file
View File

@ -0,0 +1,111 @@
#ifndef GUF_DICT_H
#define GUF_DICT_H
#include <stddef.h>
#include <stdint.h>
#include <assert.h>
#include "guf_common.h"
typedef enum guf_dict_probe_type {GUF_DICT_PROBE_LINEAR = 0, GUF_DICT_PROBE_QUADRATIC} guf_dict_probe_type;
// ~0.65 in fx10 fixed point (0.65 * 2^10)
#define GUF_DICT_MAX_LOAD_FAC_FX10_DEFAULT 666ul
#define GUF_DICT_PROBE_TYPE_DEFAULT GUF_DICT_PROBE_QUADRATIC
#define GUF_HASH32_INIT 2166136261ul
#define GUF_HASH64_INIT 14695981039346656037ull
uint32_t guf_hash32(const void *data, size_t num_bytes, uint32_t hash);
uint64_t guf_hash64(const void *data, size_t num_bytes, uint64_t hash);
#ifdef GUF_DICT_USE_32_BIT_HASH
#define GUF_HASH_INIT GUF_HASH32_INIT
static inline uint32_t guf_hash(const void *data, size_t num_bytes, uint32_t hash) {
return guf_hash32(data, num_bytes, hash);
}
#define GUF_DICT_HASH_MAX UINT32_MAX
#else
#define GUF_HASH_INIT GUF_HASH64_INIT
static inline uint64_t guf_hash(const void *data, size_t num_bytes, uint64_t hash) {
return guf_hash64(data, num_bytes, hash);
}
#define GUF_DICT_HASH_MAX UINT64_MAX
#endif
typedef struct guf_dict_kv_funcs {
// Only used for keys:
bool (*eq)(const void *key_a, const void *key_b); // Can be NULL for keys and vals. Can be left uninitialised for vals.
guf_hash_size_t (*hash)(const void *key); // Can be NULL for keys and vals. Can be left uninitialised for vals.
// Used for keys and vals:
void *(*cpy)(void *key_or_val_dst, const void *key_or_val_src); // Can be NULL for keys and vals. Never leave uninitialised.
void *(*move)(void *dst, void *key_or_val_src); // Can be NULL for keys and vals. Never leave uninitialised.
void (*free)(void *key_or_val); // Can be NULL for keys and vals. Never leave uninitialised.
size_t type_size; // Must always be set to sizeof(key_or_val).
} guf_dict_kv_funcs;
typedef guf_hash_size_t guf_dict_kv_id;
typedef struct guf_dict_kv_status {
guf_dict_kv_id kv_id;
guf_hash_size_t k_hash;
} guf_dict_kv_status;
typedef struct guf_dict {
guf_dict_kv_status *kv_status;
void *keys, *vals;
guf_dict_kv_funcs key_funcs, val_funcs;
guf_dict_probe_type probe_t;
uint32_t max_load_fac_fx10;
size_t size, capacity_kv_status, capacity_key_val, num_tombstones, max_probelen;
} guf_dict;
extern const guf_dict GUF_DICT_UNINITIALISED;
typedef enum guf_dict_insert_opt {
GUF_DICT_CPY_KEY_VAL = 0,
GUF_DICT_MOVE_KEY = 1,
GUF_DICT_MOVE_VAL = 2,
} guf_dict_insert_opt;
bool guf_dict_init(guf_dict *ht, size_t start_capacity, const guf_dict_kv_funcs *key_funcs, const guf_dict_kv_funcs *val_funcs);
void guf_dict_free(guf_dict *ht);
bool guf_dict_insert(guf_dict *ht, void *key, void *val); // bool copy_by_value
bool guf_dict_remove(guf_dict *ht, const void *key);
void *guf_dict_get_val(guf_dict *ht, const void *key);
bool guf_dict_contains_key(const guf_dict *ht, const void *key);
uint32_t guf_dict_load_factor_fx10(const guf_dict *ht);
double guf_dict_load_factor_double(const guf_dict *ht);
static inline uint32_t guf_dict_dbl_to_fx10_load_fac(double n)
{
n = GUF_CLAMP(n, 0, 1);
const uint32_t fx10_scale = 1024; // 2^10
return n * fx10_scale;
}
typedef struct guf_dict_iter {
guf_dict *ht;
size_t idx;
size_t elems_seen;
const void *key;
void *val;
} guf_dict_iter;
guf_dict_iter guf_dict_iter_begin(guf_dict *ht);
bool guf_dict_iter_is_end(guf_dict_iter *iter);
void guf_dict_iter_advance(guf_dict_iter *iter);
extern const guf_dict_kv_funcs GUF_DICT_FUNCS_CSTR;
extern const guf_dict_kv_funcs GUF_DICT_FUNCS_GUF_STR;
extern const guf_dict_kv_funcs GUF_DICT_FUNCS_i32;
extern const guf_dict_kv_funcs GUF_DICT_FUNCS_i64;
extern const guf_dict_kv_funcs GUF_DICT_FUNCS_u32;
extern const guf_dict_kv_funcs GUF_DICT_FUNCS_u64;
extern const guf_dict_kv_funcs GUF_DICT_FUNCS_float;
extern const guf_dict_kv_funcs GUF_DICT_FUNCS_double;
extern const guf_dict_kv_funcs GUF_DICT_FUNCS_NULL;
#endif

348
src/guf_dict_impls.c Normal file
View File

@ -0,0 +1,348 @@
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include "guf_common.h"
#include "guf_dict.h"
#include "guf_str.h"
// Guf_str
static bool guf_dict_eq_guf_str(const void *guf_str_a, const void *guf_str_b)
{
return guf_str_equals((const guf_str*)guf_str_a, (const guf_str*)guf_str_b);
}
static guf_hash_size_t guf_dict_hash_guf_str(const void *str)
{
const char *c_str = guf_str_get_const_c_str_non_zero_term((const guf_str*)str);
size_t num_bytes = guf_str_len((const guf_str*)str);
return guf_hash(c_str, num_bytes, GUF_HASH_INIT);
}
static void *guf_dict_cpy_guf_str(void *dst, const void *key_or_val)
{
GUF_ASSERT_RELEASE(dst && key_or_val);
const guf_str *src_ptr = (guf_str*)key_or_val;
guf_str *dst_ptr = (guf_str*)dst;
guf_str cpy = guf_str_cpy(src_ptr, false);
*dst_ptr = cpy;
return dst_ptr;
}
static void *guf_dict_move_guf_str(void *dst, void *key_or_val)
{
GUF_ASSERT_RELEASE(dst && key_or_val);
if (guf_str_is_view(key_or_val) && !guf_str_is_stack_allocated(key_or_val)) {
printf("gufdictmove nulllllll\n");
return NULL;
}
guf_str *src_ptr = (guf_str*)key_or_val;
guf_str *dst_ptr = (guf_str*)dst;
guf_str moved = guf_str_move(src_ptr);
*dst_ptr = moved;
return dst_ptr;
}
static void guf_dict_free_guf_str(void *key_or_val)
{
GUF_ASSERT_RELEASE(key_or_val);
guf_str_free((guf_str*)key_or_val);
}
// Regular cstr
static bool guf_dict_eq_cstr(const void *str_a, const void *str_b)
{
return 0 == strcmp(*(const char**)str_a, *(const char**)str_b);
}
static guf_hash_size_t guf_dict_hash_cstr(const void *str)
{
size_t num_bytes = strlen(*(const char **)str);
return guf_hash(*(const char**)str, num_bytes, GUF_HASH_INIT);
}
static void *guf_dict_cpy_cstr(void *dst, const void *key_or_val)
{
GUF_ASSERT_RELEASE(dst && key_or_val);
char **dst_ptr = (char**)dst;
*dst_ptr = strdup(*(const char**)key_or_val);
GUF_ASSERT(*dst_ptr);
return dst_ptr;
}
static void *guf_dict_move_cstr(void *dst, void *src)
{
GUF_ASSERT_RELEASE(dst && src);
char **dst_ptr = (char**)dst;
char **src_ptr = (char**)src;
*dst_ptr = *src_ptr;
*src_ptr = NULL;
return dst_ptr;
}
static void guf_dict_free_cstr(void *key_or_val)
{
GUF_ASSERT_RELEASE(key_or_val);
free(*(char**)key_or_val);
}
// Signed ints.
static guf_hash_size_t guf_dict_hash_i8(const void *n)
{
if (!guf_is_big_endian()) {
return guf_hash(n, sizeof(int8_t), GUF_HASH_INIT);
} else {
const unsigned char *bytes = (const unsigned char*)n;
unsigned char bytes_reversed[sizeof(int8_t)];
for (size_t i = 0; i < sizeof(bytes); ++i) {
bytes_reversed[i] = bytes[sizeof(bytes) - 1 - i];
}
return guf_hash(bytes_reversed, sizeof(int8_t), GUF_HASH_INIT);
}
}
static guf_hash_size_t guf_dict_hash_i16(const void *n)
{
if (!guf_is_big_endian()) {
return guf_hash(n, sizeof(int16_t), GUF_HASH_INIT);
} else {
const unsigned char *bytes = (const unsigned char*)n;
unsigned char bytes_reversed[sizeof(int16_t)];
for (size_t i = 0; i < sizeof(bytes); ++i) {
bytes_reversed[i] = bytes[sizeof(bytes) - 1 - i];
}
return guf_hash(bytes_reversed, sizeof(int16_t), GUF_HASH_INIT);
}
}
static guf_hash_size_t guf_dict_hash_i32(const void *n)
{
if (!guf_is_big_endian()) {
return guf_hash(n, sizeof(int32_t), GUF_HASH_INIT);
} else {
const unsigned char *bytes = (const unsigned char*)n;
unsigned char bytes_reversed[sizeof(int32_t)];
for (size_t i = 0; i < sizeof(bytes); ++i) {
bytes_reversed[i] = bytes[sizeof(bytes) - 1 - i];
}
return guf_hash(bytes_reversed, sizeof(int32_t), GUF_HASH_INIT);
}
}
static guf_hash_size_t guf_dict_hash_i64(const void *n)
{
if (!guf_is_big_endian()) {
return guf_hash(n, sizeof(int64_t), GUF_HASH_INIT);
} else {
const unsigned char *bytes = (const unsigned char*)n;
unsigned char bytes_reversed[sizeof(int64_t)];
for (size_t i = 0; i < sizeof(bytes); ++i) {
bytes_reversed[i] = bytes[sizeof(bytes) - 1 - i];
}
return guf_hash(bytes_reversed, sizeof(int64_t), GUF_HASH_INIT);
}
}
// Unsigned ints.
static guf_hash_size_t guf_dict_hash_u8(const void *n)
{
if (!guf_is_big_endian()) {
return guf_hash(n, sizeof(uint8_t), GUF_HASH_INIT);
} else {
const unsigned char *bytes = (const unsigned char*)n;
unsigned char bytes_reversed[sizeof(uint8_t)];
for (size_t i = 0; i < sizeof(bytes); ++i) {
bytes_reversed[i] = bytes[sizeof(bytes) - 1 - i];
}
return guf_hash(bytes_reversed, sizeof(uint8_t), GUF_HASH_INIT);
}
}
static guf_hash_size_t guf_dict_hash_u16(const void *n)
{
if (!guf_is_big_endian()) {
return guf_hash(n, sizeof(uint16_t), GUF_HASH_INIT);
} else {
const unsigned char *bytes = (const unsigned char*)n;
unsigned char bytes_reversed[sizeof(uint16_t)];
for (size_t i = 0; i < sizeof(bytes); ++i) {
bytes_reversed[i] = bytes[sizeof(bytes) - 1 - i];
}
return guf_hash(bytes_reversed, sizeof(uint16_t), GUF_HASH_INIT);
}
}
static guf_hash_size_t guf_dict_hash_u32(const void *n)
{
if (!guf_is_big_endian()) {
return guf_hash(n, sizeof(uint32_t), GUF_HASH_INIT);
} else {
const unsigned char *bytes = (const unsigned char*)n;
unsigned char bytes_reversed[sizeof(uint32_t)];
for (size_t i = 0; i < sizeof(bytes); ++i) {
bytes_reversed[i] = bytes[sizeof(bytes) - 1 - i];
}
return guf_hash(bytes_reversed, sizeof(uint32_t), GUF_HASH_INIT);
}
}
static guf_hash_size_t guf_dict_hash_u64(const void *n)
{
if (!guf_is_big_endian()) {
return guf_hash(n, sizeof(uint64_t), GUF_HASH_INIT);
} else {
const unsigned char *bytes = (const unsigned char*)n;
unsigned char bytes_reversed[sizeof(uint64_t)];
for (size_t i = 0; i < sizeof(bytes); ++i) {
bytes_reversed[i] = bytes[sizeof(bytes) - 1 - i];
}
return guf_hash(bytes_reversed, sizeof(uint64_t), GUF_HASH_INIT);
}
}
// Floats.
static guf_hash_size_t guf_dict_hash_float(const void *n)
{
if (!guf_is_big_endian()) {
return guf_hash(n, sizeof(float), GUF_HASH_INIT);
} else {
const unsigned char *bytes = (const unsigned char*)n;
unsigned char bytes_reversed[sizeof(float)];
for (size_t i = 0; i < sizeof(bytes); ++i) {
bytes_reversed[i] = bytes[sizeof(bytes) - 1 - i];
}
return guf_hash(bytes_reversed, sizeof(float), GUF_HASH_INIT);
}
}
static guf_hash_size_t guf_dict_hash_double(const void *n)
{
if (!guf_is_big_endian()) {
return guf_hash(n, sizeof(double), GUF_HASH_INIT);
} else {
const unsigned char *bytes = (const unsigned char*)n;
unsigned char bytes_reversed[sizeof(double)];
for (size_t i = 0; i < sizeof(bytes); ++i) {
bytes_reversed[i] = bytes[sizeof(bytes) - 1 - i];
}
return guf_hash(bytes_reversed, sizeof(double), GUF_HASH_INIT);
}
}
const guf_dict_kv_funcs GUF_DICT_FUNCS_CSTR = {
.type_size = sizeof(char*),
.eq = guf_dict_eq_cstr,
.hash = guf_dict_hash_cstr,
.cpy = guf_dict_cpy_cstr,
.move = guf_dict_move_cstr,
.free = guf_dict_free_cstr,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_GUF_STR = {
.type_size = sizeof(guf_str),
.eq = guf_dict_eq_guf_str,
.hash = guf_dict_hash_guf_str,
.cpy = guf_dict_cpy_guf_str,
.move = guf_dict_move_guf_str,
.free = guf_dict_free_guf_str,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_i8 = {
.type_size = sizeof(int8_t),
.eq = NULL,
.hash = guf_dict_hash_i8,
.cpy = NULL,
.move = NULL,
.free = NULL,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_i16 = {
.type_size = sizeof(int16_t),
.eq = NULL,
.hash = guf_dict_hash_i16,
.cpy = NULL,
.move = NULL,
.free = NULL,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_i32 = {
.type_size = sizeof(int32_t),
.eq = NULL,
.hash = guf_dict_hash_i32,
.cpy = NULL,
.move = NULL,
.free = NULL,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_i64 = {
.type_size = sizeof(int64_t),
.eq = NULL,
.hash = guf_dict_hash_i64,
.cpy = NULL,
.move = NULL,
.free = NULL,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_u8 = {
.type_size = sizeof(uint8_t),
.eq = NULL,
.hash = guf_dict_hash_u8,
.cpy = NULL,
.move = NULL,
.free = NULL,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_u16 = {
.type_size = sizeof(uint16_t),
.eq = NULL,
.hash = guf_dict_hash_u16,
.cpy = NULL,
.move = NULL,
.free = NULL,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_u32 = {
.type_size = sizeof(uint32_t),
.eq = NULL,
.hash = guf_dict_hash_u32,
.cpy = NULL,
.move = NULL,
.free = NULL,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_u64 = {
.type_size = sizeof(uint64_t),
.eq = NULL,
.hash = guf_dict_hash_u64,
.cpy = NULL,
.move = NULL,
.free = NULL,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_float = {
.type_size = sizeof(float),
.eq = NULL,
.hash = guf_dict_hash_float,
.cpy = NULL,
.move = NULL,
.free = NULL,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_double = {
.type_size = sizeof(double),
.eq = NULL,
.hash = guf_dict_hash_double,
.cpy = NULL,
.move = NULL,
.free = NULL,
};
const guf_dict_kv_funcs GUF_DICT_FUNCS_NULL = {
.type_size = 0,
.eq = NULL,
.hash = NULL,
.cpy = NULL,
.move = NULL,
.free = NULL,
};

74
src/guf_obj.c Normal file
View File

@ -0,0 +1,74 @@
#include "guf_obj.h"
static int cstr_ptr_cmp(const void *a, const void *b){
GUF_ASSERT_RELEASE(a && b);
// typeof dst/src: pointer to const char* (const char**)
const char **a_ptr = (const char**)a;
const char **b_ptr = (const char**)b;
return strcmp(*a_ptr, *b_ptr);
}
static GUF_OBJ_DEFINE_CMP_DESC(cstr_ptr_cmp, cstr_ptr_cmp_desc)
static bool cstr_ptr_eq(const void *a, const void *b) {
GUF_ASSERT_RELEASE(a && b);
return 0 == cstr_ptr_cmp(a, b);
}
static void *cstr_default_init(void *dst)
{
GUF_ASSERT_RELEASE(dst);
char **dst_ptr = (char**)dst;
*dst_ptr = NULL;
return dst;
}
static void *cstr_cpy_init(void *dst, const void *src)
{
GUF_ASSERT_RELEASE(dst && src);
char **dst_ptr = (char**)dst;
const char **src_ptr = (const char**)src;
char *cpy = strdup(*src_ptr);
GUF_ASSERT_RELEASE(cpy);
*dst_ptr = cpy;
return dst;
}
void *cstr_move_init(void *dst, void *src)
{
GUF_ASSERT_RELEASE(dst && src);
char **dst_ptr = (char**)dst;
char **src_ptr = (char**)src;
*dst_ptr = *src_ptr;
*src_ptr = NULL;
return dst;
}
void cstr_ptr_free(void *ptr)
{
GUF_ASSERT_RELEASE(ptr);
char **cstr_ptr = (char**)ptr;
free(*cstr_ptr);
}
static const guf_obj_ops guf_cstr_ops = {
.sizeof_obj = sizeof(guf_cstr_type),
.cmp = cstr_ptr_cmp,
.cmp_desc = cstr_ptr_cmp_desc,
.eq = cstr_ptr_eq ,
.default_init = cstr_default_init,
.copy_init = cstr_cpy_init,
.move_init = cstr_move_init,
.free = cstr_ptr_free,
.hash = NULL,
};
const guf_obj_meta guf_cstr_obj_meta = {
.has_ops = true,
.data.ops = &guf_cstr_ops
};
const guf_obj_meta guf_const_cstr_obj_meta = {
.has_ops = false,
.data.sizeof_obj = sizeof(guf_const_cstr_type)
};

127
src/guf_obj.h Normal file
View File

@ -0,0 +1,127 @@
#ifndef GUF_OBJ_H
#define GUF_OBJ_H
#include <string.h>
#include "guf_common.h"
typedef enum guf_obj_cpy_opt {
GUF_CPY_VALUE = 0,
GUF_CPY_DEEP = 1,
GUF_CPY_MOVE = 2,
} guf_obj_cpy_opt;
typedef struct guf_obj_ops {
ptrdiff_t sizeof_obj;
void *(*default_init)(void *dst_obj); // Default constructor.
void *(*copy_init)(void *dst_obj, const void *src_obj); // Copy constructor (deep copies src).
void *(*move_init)(void *dst_obj, void *src_obj); // Move constructor ("steals" from src).
void (*free)(void *obj);
bool (*eq)(const void *obj_a, const void *obj_b);
int (*cmp)(const void *obj_a, const void *obj_b);
int (*cmp_desc)(const void *obj_a, const void *obj_b); // Define with GUF_OBJ_DEFINE_CMP_DESC
guf_hash_size_t (*hash)(const void *obj);
const char *type_str;
} guf_obj_ops;
#define GUF_OBJ_DEFINE_CMP_DESC(CMP_FN, CMP_DESC_FN_NAME) int CMP_DESC_FN_NAME(const void *a, const void *b) {return -CMP_FN(a, b);}
typedef struct guf_obj_meta {
bool has_ops;
union {
const guf_obj_ops *ops;
ptrdiff_t sizeof_obj;
} data;
} guf_obj_meta;
static inline ptrdiff_t guf_obj_meta_sizeof_obj(guf_obj_meta meta)
{
ptrdiff_t size = meta.has_ops ? meta.data.ops->sizeof_obj : meta.data.sizeof_obj;
GUF_ASSERT_RELEASE(size > 0);
return size;
}
static inline bool guf_obj_meta_same(guf_obj_meta a, guf_obj_meta b) {
if (a.has_ops != b.has_ops) {
return false;
}
if (a.has_ops) {
return a.data.ops == b.data.ops;
} else {
return a.data.sizeof_obj == b.data.sizeof_obj;
}
}
static inline void *guf_cpy(void *dst_ptr, void *src_ptr, guf_obj_meta obj_meta, guf_obj_cpy_opt cpy_opt)
{
GUF_ASSERT_RELEASE(dst_ptr);
GUF_ASSERT_RELEASE(src_ptr);
if (obj_meta.has_ops) {
GUF_ASSERT_RELEASE(obj_meta.data.ops != NULL);
}
const ptrdiff_t sizeof_obj = obj_meta.has_ops ? obj_meta.data.ops->sizeof_obj : obj_meta.data.sizeof_obj;
const guf_obj_ops *ops = obj_meta.has_ops ? obj_meta.data.ops : NULL;
GUF_ASSERT_RELEASE(sizeof_obj > 0);
switch (cpy_opt) {
case GUF_CPY_VALUE:
dst_ptr = memcpy(dst_ptr, src_ptr, sizeof_obj);
GUF_ASSERT_RELEASE(dst_ptr); // This should never fail.
break;
case GUF_CPY_DEEP:
GUF_ASSERT_RELEASE(ops->copy_init != NULL);
dst_ptr = ops->copy_init(dst_ptr, src_ptr);
break;
case GUF_CPY_MOVE:
GUF_ASSERT_RELEASE(ops->move_init != NULL);
dst_ptr = ops->move_init(dst_ptr, src_ptr);
break;
default:
GUF_ASSERT_RELEASE(false);
}
GUF_ASSERT(dst_ptr);
return dst_ptr;
}
static inline guf_obj_meta guf_obj_get_meta(void *obj)
{
guf_obj_meta *meta_ptr = (guf_obj_meta*)obj; // IMPORTANT: Assumes the obj's first member is of type guf_obj_meta, otherwise this is undefined behaviour.
GUF_ASSERT_RELEASE(meta_ptr);
return *meta_ptr;
}
static inline void *guf_obj_copy(void *dst_obj, void *src_obj, guf_obj_cpy_opt cpy_opts)
{
guf_obj_meta meta = guf_obj_get_meta(dst_obj);
guf_obj_meta meta_src = guf_obj_get_meta(src_obj);
GUF_ASSERT_RELEASE(guf_obj_meta_same(meta, meta_src));
return guf_cpy(dst_obj, src_obj, meta, cpy_opts);
}
#define GUF_OBJ_META_NAME guf_obj_meta_member
#define GUF_OBJ_DECLARE_OBJ_META() guf_obj_meta GUF_OBJ_META_NAME;
#define GUF_OBJ_GET_META_TYPESAFE(PTR) (PTR)->GUF_OBJ_META_NAME
#define GUF_OBJ_FREE_TYPESAFE(ptr) if (GUF_OBJ_GET_META_TYPESAFE(ptr).has_ops && GUF_OBJ_GET_META_TYPESAFE(ptr).data.ops->free) {GUF_OBJ_GET_META_TYPESAFE(ptr).data.ops->free(ptr);}
#define GUF_OBJ_FREE(PTR) if (guf_obj_get_meta(PTR).has_ops && guf_obj_get_meta(PTR).data.ops->free) { guf_obj_get_meta(PTR).data.ops->free(PTR); }
#define GUF_OBJ_LIFETIME_BLOCK_TYPESAFE(ptr, code_block) \
do { \
code_block \
GUF_OBJ_FREE_TYPESAFE((ptr)); \
} while (0);
#define GUF_OBJ_LIFETIME_BLOCK(ptr, code_block) \
do { \
code_block \
GUF_OBJ_FREE((ptr)); \
} while (0); \
typedef const char* guf_const_cstr_type;
typedef char* guf_cstr_type;
extern const guf_obj_meta guf_cstr_obj_meta;
extern const guf_obj_meta guf_const_cstr_obj_meta;
#endif

756
src/guf_str.c Normal file
View File

@ -0,0 +1,756 @@
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include "guf_common.h"
#include "guf_str.h"
static inline size_t capacity_grow(size_t size)
{
return (size * 2);
}
static inline void set_flag(guf_str *str, guf_str_state flag)
{
GUF_ASSERT(str);
str->state |= flag;
}
static inline void unset_flag(guf_str *str, guf_str_state flag)
{
GUF_ASSERT(str);
str->state = str->state & (~flag);
}
static inline bool has_state(const guf_str *str, guf_str_state flag)
{
GUF_ASSERT(str);
return str->state & flag;
}
static inline bool is_short(const guf_str *str)
{
GUF_ASSERT(str);
return has_state(str, GUF_STR_STATE_SHORT);
}
static inline void set_len(guf_str *str, size_t len)
{
GUF_ASSERT(str);
if (is_short(str)) {
GUF_ASSERT(len <= GUF_STR_SSO_BUFSIZE);
str->data.stack.len = len;
} else {
GUF_ASSERT(len <= str->data.heap.capacity);
str->data.heap.len = len;
}
}
static inline char *get_cstr(guf_str *str)
{
GUF_ASSERT(str);
if (is_short(str)) {
GUF_ASSERT(str->data.stack.c_str);
return str->data.stack.c_str;
} else {
GUF_ASSERT(str->data.heap.c_str);
return str->data.heap.c_str;
}
}
static inline const char *get_const_cstr(const guf_str *str)
{
GUF_ASSERT(str);
if (is_short(str)) {
GUF_ASSERT(str->data.stack.c_str);
return str->data.stack.c_str;
} else {
GUF_ASSERT(str->data.heap.c_str);
return str->data.heap.c_str;
}
}
static inline bool integrity_check(const guf_str *str)
{
GUF_ASSERT(str);
bool good_len_cap = guf_str_len(str) <= guf_str_capacity(str);
GUF_ASSERT(good_len_cap);
if (!good_len_cap) {
return false;
}
const char *c_str = get_const_cstr(str);
GUF_ASSERT(c_str);
if (!c_str) {
return false;
}
bool good_null_term = c_str[guf_str_len(str)] == '\0';
GUF_ASSERT(good_null_term);
return good_len_cap && c_str != NULL && good_null_term;
}
static inline bool handle_alloc_fail(const guf_str *str)
{
GUF_ASSERT(str);
bool good_alloc = !has_state(str, GUF_STR_STATE_ALLOC_ERR);
#ifdef GUF_STR_ABORT_ON_ALLOC_FAILURE
GUF_ASSERT_RELEASE(good_alloc)
#else
GUF_ASSERT(good_alloc);
#endif
return good_alloc;
}
bool guf_str_is_valid(const guf_str *str)
{
bool not_null = str != NULL;
GUF_ASSERT(str);
if (!not_null) {
return false;
}
bool integrity = integrity_check(str);
GUF_ASSERT(integrity);
if (!integrity) {
return false;
}
bool good_alloc = handle_alloc_fail(str) ;
GUF_ASSERT(good_alloc);
return not_null && integrity && good_alloc;
}
const guf_str GUF_STR_UNINITIALISED_FAILED_ALLOC = {
.state = GUF_STR_STATE_INIT | GUF_STR_STATE_SHORT | GUF_STR_STATE_ALLOC_ERR,
.data.stack.len = 0,
.data.stack.c_str = {'\0'}
};
const guf_str GUF_STR_UNINITIALISED = {
.state = GUF_STR_STATE_INIT | GUF_STR_STATE_SHORT,
.data.stack.len = 0,
.data.stack.c_str = {'\0'}
};
bool guf_str_alloc_success(const guf_str *str)
{
bool fail = str->state & GUF_STR_STATE_ALLOC_ERR;
return !fail;
}
// Creation:
guf_str *guf_str_reserve(guf_str *str, size_t new_cap)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t str_len = guf_str_len(str);
const size_t current_cap = guf_str_capacity(str);
if (new_cap <= current_cap) {
return str;
}
GUF_ASSERT(new_cap > GUF_STR_SSO_BUFCAP);
GUF_ASSERT(new_cap + 1 > GUF_STR_SSO_BUFSIZE);
if (is_short(str)) { // a) Was short string.
char tmp_buf[GUF_STR_SSO_BUFSIZE];
GUF_ASSERT_RELEASE(GUF_STATIC_BUF_SIZE(tmp_buf) >= str_len + 1);
memcpy(tmp_buf, str->data.stack.c_str, str_len + 1);
str->data.heap.c_str = calloc(new_cap + 1, sizeof(str->data.heap.c_str[0]));
if (!str->data.heap.c_str) {
set_flag(str, GUF_STR_STATE_ALLOC_ERR);
str->data.heap.capacity = str->data.heap.len = 0;
handle_alloc_fail(str);
return NULL;
}
str->data.heap.capacity = new_cap;
str->data.heap.len = str_len;
memcpy(str->data.heap.c_str, tmp_buf, str_len + 1);
return str;
}
// b) Was already heap allocated.
GUF_ASSERT_RELEASE(str->data.heap.c_str);
char *new_cstr = realloc(str->data.heap.c_str, new_cap + 1);
if (!new_cstr) {
set_flag(str, GUF_STR_STATE_ALLOC_ERR);
handle_alloc_fail(str);
return NULL;
}
str->data.heap.c_str = new_cstr;
str->data.heap.capacity = new_cap;
return str;
}
guf_str guf_str_new(guf_str_view str_view)
{
GUF_ASSERT(str_view.str);
if (!str_view.str) {
return GUF_STR_UNINITIALISED;
}
guf_str str = GUF_STR_UNINITIALISED;
// Temporary debug; TODO: remove
GUF_ASSERT_RELEASE(GUF_STATIC_BUF_SIZE(str.data.stack.c_str) == GUF_STR_SSO_BUFSIZE);
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(str.data.stack.c_str); ++i) {
GUF_ASSERT_RELEASE(str.data.stack.c_str[i] == '\0');
}
if (!guf_str_reserve(&str, str_view.len)) {
return str;
}
GUF_ASSERT_RELEASE(guf_str_capacity(&str) == str_view.len);
char *c_str = get_cstr(&str);
memcpy(c_str, str_view.str, str_view.len);
c_str[str_view.len] = '\0';
GUF_ASSERT_RELEASE(guf_str_is_valid(&str));
return str;
}
guf_str guf_str_new_with_extra_cap(guf_str_view str_view, size_t extra_capacity)
{
GUF_ASSERT(str_view.str);
if (!str_view.str) {
return GUF_STR_UNINITIALISED;
}
guf_str str = GUF_STR_UNINITIALISED;
// Temporary debug; TODO: remove
GUF_ASSERT_RELEASE(GUF_STATIC_BUF_SIZE(str.data.stack.c_str) == GUF_STR_SSO_BUFSIZE);
for (size_t i = 0; i < GUF_STATIC_BUF_SIZE(str.data.stack.c_str); ++i) {
GUF_ASSERT_RELEASE(str.data.stack.c_str[i] == '\0');
}
const size_t capacity = str_view.len + extra_capacity;
if (!guf_str_reserve(&str, capacity)) {
return str;
}
GUF_ASSERT_RELEASE(guf_str_capacity(&str) == capacity);
char *c_str = get_cstr(&str);
memcpy(c_str, str_view.str, str_view.len);
c_str[str_view.len] = '\0';
GUF_ASSERT_RELEASE(guf_str_is_valid(&str));
return str;
}
guf_str guf_str_new_from_cstr(const char *c_str)
{
return guf_str_new(GUF_CSTR_TO_VIEW(c_str));
}
guf_str *guf_str_init(guf_str *str, guf_str_view str_view)
{
GUF_ASSERT_RELEASE(str);
*str = guf_str_new(str_view);
bool fail = handle_alloc_fail(str);
if (!fail) {
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
return str;
} else {
return NULL;
}
}
guf_str *guf_str_init_from_cstr(guf_str *str, const char* c_str)
{
GUF_ASSERT_RELEASE(str);
*str = guf_str_new(GUF_CSTR_TO_VIEW(c_str));
bool fail = handle_alloc_fail(str);
if (!fail) {
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
return str;
} else {
return NULL;
}
}
guf_str guf_str_new_empty_with_capacity(size_t capacity)
{
guf_str str = guf_str_new_from_cstr("");
bool fail = handle_alloc_fail(&str);
if (!fail) {
guf_str_reserve(&str, capacity);
fail = handle_alloc_fail(&str);
}
GUF_ASSERT_RELEASE(guf_str_is_valid(&str));
return str;
}
guf_str *guf_str_init_empty_with_capacity(guf_str *str, size_t capacity)
{
GUF_ASSERT_RELEASE(str);
*str = guf_str_new_empty_with_capacity(capacity);
bool fail = handle_alloc_fail(str);
if (!fail) {
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
return str;
} else {
return NULL;
}
}
// Copying:
guf_str guf_str_substr_cpy(guf_str_view str, size_t pos, size_t count)
{
GUF_ASSERT(str.str);
if (str.len == 0 || count == 0 || pos >= str.len || str.str == NULL) {
return guf_str_new_from_cstr("");
}
guf_str substr = GUF_STR_UNINITIALISED;
const size_t substr_len = pos + count > str.len ? str.len - pos : count;
GUF_ASSERT(substr_len >= 1);
GUF_ASSERT(substr_len <= str.len);
GUF_ASSERT(substr_len <= count);
if (!guf_str_reserve(&substr, substr_len)) {
return substr;
}
GUF_ASSERT_RELEASE(guf_str_capacity(&substr) == substr_len);
char *c_str = get_cstr(&substr);
memcpy(c_str, str.str + pos, substr_len);
c_str[substr_len] = '\0';
GUF_ASSERT_RELEASE(guf_str_is_valid(&substr));
return substr;
}
guf_str_view guf_str_substr_view(guf_str_view str, size_t pos, size_t count)
{
GUF_ASSERT(str.str);
if (str.len == 0 || count == 0 || pos >= str.len || str.str == NULL) {
return (guf_str_view){.str = str.str, .len = 0};
}
const size_t substr_len = pos + count > str.len ? str.len - pos : count;
GUF_ASSERT(substr_len >= 1);
GUF_ASSERT(substr_len <= str.len);
return (guf_str_view){.str = str.str + pos, .len = substr_len};
}
// Modifying:
guf_str *guf_str_substr(guf_str* str, size_t pos, size_t count)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t len = guf_str_len(str);
const size_t cap = guf_str_capacity(str);
const char *c_str = guf_str_const_cstr(str);
if (guf_str_len(str) == 0 || count == 0 || pos >= len || c_str == NULL) {
return str;
}
const size_t substr_len = pos + count > len ? len - pos : count;
GUF_ASSERT(substr_len >= 1);
GUF_ASSERT(substr_len <= len);
if (is_short(str)) { // a) Short string (stack).
GUF_ASSERT(pos + substr_len <= GUF_STR_SSO_BUFCAP);
str->data.stack.len = substr_len;
memcpy(str->data.stack.c_str, c_str + pos, substr_len);
str->data.stack.c_str[substr_len] = '\0';
set_len(str, substr_len);
return str;
}
// b) Long string (heap) (Don't shrink capacity here).
GUF_ASSERT(pos + substr_len <= len && pos + substr_len <= cap);
size_t num_moved = 0;
for (size_t i = pos; i < pos + substr_len; ++i) {
str->data.heap.c_str[num_moved++] = str->data.heap.c_str[i];
}
GUF_ASSERT(num_moved == len);
str->data.heap.c_str[len] = '\0';
set_len(str, substr_len);
return str;
}
guf_str *guf_str_append(guf_str *str, guf_str_view to_append)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t str_len = guf_str_len(str);
const size_t total_len = str_len + to_append.len;
if (to_append.len == 0) {
return str;
}
if (guf_str_capacity(str) < total_len) { // The capacity of the destination string is too small -> grow.
str = guf_str_reserve(str, capacity_grow(total_len));
GUF_ASSERT_RELEASE(str != NULL);
GUF_ASSERT_RELEASE(guf_str_capacity(str) >= total_len);
}
char *dst_ptr = get_cstr(str);
const char *src_ptr = to_append.str;
size_t num_copied = 0;
for (size_t dst_idx = str_len; dst_idx < total_len; ++dst_idx) {
GUF_ASSERT(num_copied <= to_append.len);
GUF_ASSERT(dst_idx < guf_str_capacity(str));
dst_ptr[dst_idx] = src_ptr[num_copied++];
}
GUF_ASSERT_RELEASE(num_copied == to_append.len);
dst_ptr[total_len] = '\0';
set_len(str, total_len);
return str;
}
guf_str *guf_str_append_cstr(guf_str *str, const char *cstr_to_append)
{
return guf_str_append(str, GUF_CSTR_TO_VIEW(cstr_to_append));
}
guf_str *guf_str_shrink_to_fit(guf_str *str)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
if (is_short(str)) {
return str;
}
const size_t len = guf_str_len(str);
GUF_ASSERT(str->data.heap.c_str);
GUF_ASSERT(str->data.heap.capacity >= len);
if (len == str->data.heap.capacity) {
return str;
}
const size_t new_cap = len;
GUF_ASSERT(len <= new_cap);
if (new_cap <= GUF_STR_SSO_BUFCAP) { // a) Short string.
char *src = str->data.heap.c_str;
GUF_ASSERT(src);
str->data.heap.c_str = NULL;
set_flag(str, GUF_STR_STATE_SHORT);
str->data.stack.len = len;
memcpy(str->data.stack.c_str, src, len);
str->data.stack.c_str[len] = '\0';
free(src);
return str;
} else { // b) Long string.
char *new_cstr = realloc(str->data.heap.c_str, new_cap + 1);
GUF_ASSERT(new_cstr);
if (!new_cstr) {
set_flag(str, GUF_STR_STATE_ALLOC_ERR);
handle_alloc_fail(str);
return str;
}
str->data.heap.c_str = new_cstr;
str->data.heap.capacity = new_cap;
GUF_ASSERT(str->data.heap.c_str[len] == '\0');
return str;
}
}
char guf_str_pop_back(guf_str *str)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t len = guf_str_len(str);
if (len == 0) {
return '\0';
}
char *last_char = guf_str_at(str, len - 1);
GUF_ASSERT(last_char);
char popped = *last_char;
*last_char = '\0';
set_len(str, len - 1);
return popped;
}
char guf_str_pop_front(guf_str *str)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t len = guf_str_len(str);
if (len == 0) {
return '\0';
}
char *first_char = guf_str_at(str, 0);
GUF_ASSERT(first_char);
char popped = *first_char;
char *c_str = get_cstr(str);
for (size_t dst_idx = 0; dst_idx < len; ++dst_idx) { // Move the remaining string to the left.
GUF_ASSERT(dst_idx + 1 <= len + 1);
c_str[dst_idx] = c_str[dst_idx + 1];
}
GUF_ASSERT_RELEASE(c_str[len - 1] == '\0');
set_len(str, len - 1);
return popped;
}
// Non-modifying:
// The size (in chars) without the final null-terminator.
size_t guf_str_len(const guf_str *str)
{
GUF_ASSERT_RELEASE(str);
GUF_ASSERT_RELEASE(integrity_check(str));
if (is_short(str)) {
return str->data.stack.len;
} else {
GUF_ASSERT_RELEASE(str->data.heap.capacity > GUF_STR_SSO_BUFCAP);
return str->data.heap.len;
}
}
size_t guf_str_capacity(const guf_str *str)
{
GUF_ASSERT_RELEASE(str);
GUF_ASSERT_RELEASE(integrity_check(str));
if (is_short(str)) {
return GUF_STR_SSO_BUFCAP;
} else {
// GUF_ASSERT(str->data.heap.capacity > GUF_STR_SSO_BUFCAP); // TODO: Not sure...
return str->data.heap.capacity;
}
}
bool guf_str_view_equal(guf_str_view a, guf_str_view b)
{
GUF_ASSERT_RELEASE(a.str && b.str);
if (a.len != b.len) {
return false;
}
return 0 == memcmp(a.str, b.str, a.len);
}
bool guf_str_equal(const guf_str *a, const guf_str *b)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(a) && guf_str_is_valid(b));
return guf_str_view_equal(GUF_STR_TO_VIEW(a), GUF_STR_TO_VIEW(b));
}
bool guf_str_equals_cstr(const guf_str *a, const char *c_str)
{
GUF_ASSERT_RELEASE(a && c_str);
GUF_ASSERT_RELEASE(guf_str_is_valid(a));
return guf_str_view_equal(GUF_STR_TO_VIEW(a), GUF_CSTR_TO_VIEW(c_str));
}
bool guf_str_equals_strview(const guf_str *a, guf_str_view b)
{
GUF_ASSERT_RELEASE(a && b.str);
GUF_ASSERT_RELEASE(guf_str_is_valid(a));
return guf_str_view_equal(GUF_STR_TO_VIEW(a), b);
}
int guf_str_view_cmp(const void *str_view_a, const void *str_view_b)
{ // For qsort etc.
GUF_ASSERT_RELEASE(str_view_a && str_view_b);
const guf_str_view *a = (const guf_str_view*)str_view_a;
const guf_str_view *b = (const guf_str_view*)str_view_b;
GUF_ASSERT_RELEASE(a->str && b->str);
if (a->len != b->len) {
return false;
}
return memcmp(a->str, b->str, a->len);
}
bool guf_str_is_stack_allocated(const guf_str *str)
{
GUF_ASSERT_RELEASE(str);
return is_short(str);
}
// Indexing operations:
const char *guf_str_const_cstr(const guf_str *str)
{
return get_const_cstr(str);
}
char *guf_str_cstr(guf_str *str)
{
return get_cstr(str);
}
char *guf_str_at(guf_str *str, size_t idx)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
GUF_ASSERT(idx < guf_str_len(str));
if (idx >= guf_str_len(str)) {
return NULL;
}
char *c_str = get_cstr(str);
GUF_ASSERT(c_str != NULL);
return c_str + idx;
}
char *guf_str_back(guf_str *str)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t len = guf_str_len(str);
GUF_ASSERT_RELEASE(len > 0);
GUF_ASSERT_RELEASE(len < guf_str_capacity(str));
return guf_str_at(str, len - 1);
}
char *guf_str_front(guf_str *str)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t len = guf_str_len(str);
GUF_ASSERT_RELEASE(len > 0);
return guf_str_at(str, 0);
}
// Destruction:
void guf_str_free(guf_str *str)
{
GUF_ASSERT(integrity_check(str));
if (is_short(str)) {
GUF_ASSERT_RELEASE(str->data.stack.len <= GUF_STR_SSO_BUFCAP);
str->data.stack.len = 0;
str->data.stack.c_str[0] = '\0';
return;
}
// GUF_ASSERT_RELEASE(str->data.heap.capacity > GUF_STR_SSO_BUFCAP);
if (str->data.heap.c_str) {
free(str->data.heap.c_str);
str->data.heap.c_str = NULL;
}
set_flag(str, GUF_STR_STATE_SHORT);
str->data.stack.len = 0;
str->data.stack.c_str[0] = '\0';
}
// UTF-8
bool guf_str_char_is_ascii(char c)
{
return c >= 0 && c <= 127;
}
bool guf_str_is_ascii(const guf_str *str)
{
const char *c_str = get_const_cstr(str);
for (size_t i = 0; i < guf_str_len(str); ++i) {
if (!guf_str_char_is_ascii(c_str[i])) {
return false;
}
}
GUF_ASSERT(c_str[guf_str_len(str)] == '\0');
return true;
}
typedef struct guf_str_codepoint_utf8 {
unsigned char num_bytes;
unsigned char bytes[5];
bool valid;
} guf_str_codepoint_utf8;
bool guf_str_iter_done(const guf_str_codepoint_utf8 *cp)
{
return cp->valid && cp->num_bytes == 1 && cp->bytes[0] == '\0';
}
guf_str_codepoint_utf8 guf_str_iterate_utf8(const guf_str *str, size_t *idx)
{
GUF_ASSERT(idx);
const char *c_str = get_const_cstr(str);
size_t len = guf_str_len(str);
guf_str_codepoint_utf8 cp = {.num_bytes = 1, .bytes = {'\0', '\0', '\0', '\0', '\0'}, .valid = true};
const unsigned char four_bytes_mask = 240;
const unsigned char three_bytes_mask = 224;
const unsigned char two_bytes_mask = 192;
size_t i = *idx;
if (guf_str_char_is_ascii(c_str[i])) {
cp.num_bytes = 1;
cp.bytes[0] = c_str[i];
*idx = i + 1;
if (i == len) {
GUF_ASSERT(c_str[i] == '\0');
}
return cp;
}
else if ((unsigned char)c_str[i] & four_bytes_mask) {
cp.num_bytes = 4;
if (i + cp.num_bytes >= len - 1) {
cp.valid = false;
return cp;
}
}
else if ((unsigned char)c_str[i] & three_bytes_mask) {
cp.num_bytes = 3;
if (i + cp.num_bytes >= len - 1) {
cp.valid = false;
return cp;
}
}
else if ((unsigned char)c_str[i] & two_bytes_mask) {
cp.num_bytes = 2;
if (i + cp.num_bytes >= len - 1) {
cp.valid = false;
return cp;
}
}
else {
cp.valid = false;
return cp;
}
cp.bytes[0] = c_str[i];
for (size_t j = 1; j < cp.num_bytes; ++j) {
size_t id = i + j;
assert(id < len);
unsigned char byte = c_str[id];
if (byte >= 128 && byte < 192) { // Binary: 10......
cp.bytes[id] = byte;
} else {
cp.valid = false;
return cp;
}
}
*idx = i + cp.num_bytes;
return cp;
}
// Length without null-terminator.
size_t guf_str_len_utf8(const guf_str *str)
{
size_t idx = 0;
size_t n = 0;
for (guf_str_codepoint_utf8 cp = guf_str_iterate_utf8(str, &idx); !guf_str_iter_done(&cp); cp = guf_str_iterate_utf8(str, &idx)) {
++n;
}
assert(n >= 1);
return n - 1;
}
// guf_str_tokenise (const guf_str *str, const char *delims, const char *preserved_delims, )

136
src/guf_str.h Normal file
View File

@ -0,0 +1,136 @@
#ifndef GUF_STR_H
#define GUF_STR_H
#include <stddef.h>
#include <stdbool.h>
#include "guf_common.h"
#include "guf_obj.h"
#define GUF_STR_ABORT_ON_ALLOC_FAILURE 1
// TODO: don't allocate self but take allocator?
typedef enum guf_str_state {
GUF_STR_STATE_INIT = 0,
GUF_STR_STATE_SHORT = 1,
GUF_STR_STATE_ALLOC_ERR = 2
} guf_str_state;
typedef struct guf_str {
guf_str_state state;
union {
struct heap {
char *c_str;
size_t len, capacity; // len and capacity do not include the null-terminator.
} heap;
struct stack { // Short-string optimisation.
#define GUF_STR_SSO_BUFSIZE (sizeof(struct heap) - sizeof(unsigned char))
#define GUF_STR_SSO_BUFCAP (GUF_STR_SSO_BUFSIZE - 1)
char c_str[GUF_STR_SSO_BUFSIZE];
unsigned char len;
} stack;
} data;
} guf_str;
typedef struct guf_str_view {
const char *str;
size_t len;
} guf_str_view;
#define GUF_CSTR_TO_VIEW(C_STR_PTR) ((guf_str_view){.str = (C_STR_PTR), .len = strlen((C_STR_PTR))})
#define GUF_STR_TO_VIEW(GUF_STR_PTR) ((guf_str_view){.str = guf_str_const_cstr((GUF_STR_PTR)), .len = guf_str_len((GUF_STR_PTR))})
extern const guf_str GUF_STR_UNINITIALISED;
extern const guf_str GUF_STR_UNINITIALISED_FAILED_ALLOC;
// TODO: find_first_of and tokenise -> for parsing, see aoclib.
// Creation:
guf_str *guf_str_init(guf_str *str, guf_str_view str_view);
guf_str *guf_str_init_from_cstr(guf_str *str, const char* c_str);
guf_str *guf_str_init_empty_with_capacity(guf_str *str, size_t capacity);
// guf_str_new functions return GUF_DICT_UNINITIALISED or GUF_STR_UNINITIALISED_FAILED_ALLOC on failure (can be checked with guf_str_alloc_success)
guf_str guf_str_new(guf_str_view str_view);
guf_str guf_str_new_from_cstr(const char *c_str);
guf_str guf_str_new_empty_with_capacity(size_t capacity);
// Destruction:
void guf_str_free(guf_str *str);
// Modification:
guf_str *guf_str_append(guf_str *str, guf_str_view to_append);
guf_str *guf_str_append_cstr(guf_str *str, const char *cstr_to_append);
guf_str *guf_str_substr(guf_str* str, size_t pos, size_t count);
guf_str *guf_str_reserve(guf_str *str, size_t bufsize);
guf_str *guf_str_shrink_capacity(guf_str *str, size_t shrink_trigger_fac, bool shrink_exact);
char guf_str_pop_back(guf_str *str);
char guf_str_pop_front(guf_str *str);
// Copying and viewing:
guf_str guf_str_substr_cpy(guf_str_view str, size_t pos, size_t count);
guf_str_view guf_str_substr_view(guf_str_view str, size_t pos, size_t count);
// Indexing:
char *guf_str_at(guf_str *str, size_t idx);
char *guf_str_back(guf_str *str);
char *guf_str_front(guf_str *str);
const char *guf_str_const_cstr(const guf_str *str);
// Metadata retrieval:
size_t guf_str_len(const guf_str *str); // The size (in chars) without the final zero-terminator (size - 1).
size_t guf_str_capacity(const guf_str *str);
bool guf_str_is_stack_allocated(const guf_str *str);
bool guf_str_is_valid(const guf_str *str);
bool guf_str_alloc_success(const guf_str *str);
// Comparison:
bool guf_str_view_equal(guf_str_view a, guf_str_view b);
bool guf_str_equal(const guf_str *a, const guf_str *b);
bool guf_str_equals_cstr(const guf_str *a, const char *c_str);
bool guf_str_equals_strview(const guf_str *a, guf_str_view b);
int guf_str_view_cmp(const void *str_view_a, const void *str_view_b); // For qsort etc.
// UTF-8 operations.
bool guf_str_char_is_ascii(char c);
bool guf_str_is_ascii(const guf_str *str);
// TODO:
// typedef struct guf_str_pool_elem {
// guf_str str;
// bool in_use;
// struct guf_str_pool_elem *next_free;
// } guf_str_pool_elem;
// typedef struct guf_str_pool {
// guf_str_pool_elem *elems;
// size_t capacity, num_in_use;
// size_t min_str_bufsize;
// guf_str_pool_elem *first_free;
// } guf_str_pool;
// void guf_str_pool_init(guf_str_pool *pool, size_t capacity, size_t str_initial_size)
// {
// if (capacity < 1) {
// capacity = 1;
// }
// pool->num_in_use = 0;
// pool->capacity = capacity;
// pool->elems = calloc(capacity, sizeof(guf_str_pool_elem));
// pool->min_str_bufsize = str_initial_size;
// GUF_ASSERT_RELEASE(pool->elems);
// pool->first_free = pool->elems + 0;
// for (size_t i = 0; i < pool->capacity; ++i) {
// pool->elems[i].in_use = false;
// pool->elems[i].str = guf_str_new_empty_with_capacity(pool->min_str_bufsize);
// pool->elems[i].next_free = i + 1 < pool->capacity ? pool->elems + i + 1 : NULL;
// }
// }
// find_free and find_free_with_cap
#endif

93
src/guf_test.c Normal file
View File

@ -0,0 +1,93 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "guf_dbuf.h"
#include "guf_str.h"
typedef struct guf_test {
const char *name, *expected_output;
char *output;
void (*test_fn)(struct guf_test *test);
uint64_t runtime_ms;
bool passed;
} guf_test;
static void guf_dbuf_test(guf_test *test)
{
guf_dbuf ints = GUF_DBUF_NEW(int);
for (int i = 0; i < 2048; ++i) {
GUF_DBUF_PUSH_VAL(&ints, int, i);
}
int last_int = GUF_DBUF_LAST_VAL(&ints, int);
int first_int = GUF_DBUF_FIRST_VAL(&ints, int);
int nth = GUF_DBUF_AT_VAL(&ints, int, 14);
printf("First: %d, Last: %d, 14th: %d\n", first_int, last_int, nth);
guf_dbuf strings = GUF_DBUF_NEW(guf_const_cstr_type);
GUF_DBUF_PUSH_VAL(&strings, guf_const_cstr_type, "First elem");
GUF_DBUF_PUSH_VAL(&strings, guf_const_cstr_type, "Second elem");
GUF_DBUF_PUSH_VAL(&strings, guf_const_cstr_type, "Third elem");
int i = 0;
GUF_DBUF_FOREACH(strings, guf_const_cstr_type, elem) {
printf("elem %d: %s\n", i++, *elem);
}
guf_dbuf mut_strings = guf_dbuf_new(guf_cstr_obj_meta);
char foo[] = "Hello, world!";
char bar[] = "Hej, verden!";
char *baz = calloc(64, sizeof(char));
strcpy(baz, "Farvel, I forbandede hundehoveder!");
GUF_DBUF_PUSH_VAL_CPY(&mut_strings, guf_cstr_type, &foo[0]);
GUF_DBUF_PUSH_VAL_CPY(&mut_strings, guf_cstr_type, &bar[0]);
guf_dbuf_push(&mut_strings, &baz, GUF_CPY_MOVE);
GUF_ASSERT_RELEASE(baz == NULL);
// char *popped = GUF_DBUF_POP_VAL(&mut_strings, cstr_type);
// printf("popped: %s\n", popped);
// free(popped);
printf("First str_mut: %s, second: %s, last: %s\n", GUF_DBUF_FIRST_VAL(&mut_strings, guf_cstr_type), GUF_DBUF_AT_VAL(&mut_strings, guf_cstr_type, 1), GUF_DBUF_LAST_VAL(&mut_strings, guf_cstr_type));
guf_dbuf_free(&mut_strings);
}
int main(void)
{
bool success = true;
guf_dbuf_test(NULL);
// guf_test_arr_register();
// bool alloc_init = guf_alloc_init();
// GUF_ASSERT_RELEASE(alloc_init);
// void *ptr = guf_malloc(sizeof(int) * 42, "int alloc");
// void *ptr2 = guf_malloc(sizeof(int) * 4, "int alloc 2 ");
// void *ptr3 = guf_malloc(sizeof(int) * 4, "int alloc 3aaaaaaaaaafsjfjsdkfjksldjflssdfsdfjjjsdflkdjflsd");
// guf_free(ptr, "int alloc");
// guf_free(ptr3, "int alloc 3");
// guf_free(ptr2, "int alloc 2");
// guf_alloc_print();
// GUF_ARR_FOREACH(test_arr, guf_test, test) {
// test->test_fn(test);
// if (guf_str_equals(&test->expected_output, &test->output)) {
// printf("Test %s passed!\n", guf_str_get_const_c_str(&test->name));
// } else {
// printf("Test %s failed!\n", guf_str_get_const_c_str(&test->name));
// }
// }
return success ? EXIT_SUCCESS : EXIT_FAILURE;
}

13
test/data_01.txt Normal file
View File

@ -0,0 +1,13 @@
„Ich weiß nicht“, rief ich ohne Klang „ich weiß ja nicht. Wenn
niemand kommt, dann kommt eben niemand. Ich habe niemandem etwas
Böses getan, niemand hat mir etwas Böses getan, niemand aber will
mir helfen. Lauter niemand. Aber so ist es doch nicht. Nur daß mir
niemand hilft —, sonst wäre lauter niemand hübsch. Ich würde ganz
gern — warum denn nicht — einen Ausflug mit einer Gesellschaft von
lauter Niemand machen. Natürlich ins Gebirge, wohin denn sonst? Wie
sich diese Niemand aneinander drängen, diese vielen quer gestreckten
und eingehängten Arme, diese vielen Füße, durch winzige Schritte
getrennt! Versteht sich, daß alle in Frack sind. Wir gehen so lala,
der Wind fährt durch die Lücken, die wir und unsere Gliedmaßen offen
lassen. Die Hälse werden im Gebirge frei! Es ist ein Wunder, daß
wir nicht singen.“

29
testgen.py Normal file
View File

@ -0,0 +1,29 @@
import textwrap
import dbuf_tests
def gen_test_struct(test_fn_name: str, name: str, expected_output: str) -> str:
return textwrap.dedent(f"""
(guf_test) {{
.test_fn = {test_fn_name}, .name = "{name}",
.expected_output = "{expected_output}",
.output = NULL,
.passed = false,
.runtime_ms = 0,
}}""")
def gen_res_str(buf):
res = ""
for elem in buf:
res += str(elem) + ","
res = res[:-1]
return res
if __name__ == "__main__":
test_array_definition = "static const guf_test dbuf_tests[] = {"
for test_fn in dbuf_tests.all_tests():
test_array_definition += textwrap.indent(test_fn() + ",", " ")
test_array_definition += "\n};"
print(test_array_definition)