Add comments

This commit is contained in:
jun 2025-03-21 00:51:29 +01:00
parent 7630ecfdcf
commit 3ee07078f0
18 changed files with 74 additions and 762 deletions

View File

@ -1,3 +1,7 @@
/*
is parametrized: no
*/
#ifndef GUF_ALLOC_H
#define GUF_ALLOC_H
#include "guf_common.h"

View File

@ -1,3 +1,7 @@
/*
is parametrized: no
*/
#ifndef GUF_ALLOC_LIBC_H
#define GUF_ALLOC_LIBC_H
#include <memory.h>

View File

@ -1,3 +1,10 @@
/*
is parametrized: no, but needs to be included with GUF_INIT in the init implementation
TOOD: - Thread safety?
- Maybe allow user defined guf_errs?
*/
#ifndef GUF_ASSERT_H
#define GUF_ASSERT_H
#include <stdio.h>

View File

@ -1,3 +1,7 @@
/*
is parametrized: no
*/
#ifndef GUF_COMMON_H
#define GUF_COMMON_H
#include <stdlib.h>

View File

@ -1,3 +1,7 @@
/*
is parametrized: no
*/
#ifndef GUF_CSTR_H
#define GUF_CSTR_H
#include <string.h>

View File

@ -1,3 +1,7 @@
/*
is parametrized: yes
*/
#if defined(GUF_DBUF_STATIC_IMPL)
#define GUF_DBUF_KWRDS static
#else

View File

@ -1,3 +1,7 @@
/*
is parametrized: yes
*/
#if defined(GUF_DICT_IMPL_STATIC)
#define GUF_DICT_KWRDS static
#else

View File

@ -1,3 +1,7 @@
/*
is parametrized: no (but recieves GUF_HASH_32_BIT from guf_common.h to set guf_hash_size_t depending on platform)
*/
#if defined(GUF_HASH_IMPL_STATIC)
#define GUF_HASH_KWRDS static
#else

View File

@ -1,3 +1,7 @@
/*
is parametrized: yes
*/
#if defined(GUF_ID_POOL_IMPL_STATIC)
#define GUF_ID_POOL_KWRDS static
#else

View File

@ -1,3 +1,7 @@
/*
is parametrized: no, but must to be implemented once (libguf always requires implementing guf_assert and guf_hash)
*/
#ifndef GUF_INIT_H
#define GUF_INIT_H

View File

@ -1,3 +1,7 @@
/*
is parametrized: no
*/
#if defined(GUF_LINALG_IMPL_STATIC)
#define GUF_LINALG_KWRDS static
#else

View File

@ -1,3 +1,8 @@
/*
is parametrized: yes
TODO: - maybe allow 64- and 32-bit implementations to co-exist...
*/
#if defined(GUF_RAND_IMPL_STATIC)
#define GUF_RAND_KWRDS static
#else

View File

@ -1,3 +1,7 @@
/*
is parametrized: yes
*/
#ifndef GUF_SORT_H
#define GUF_SORT_H
#include "guf_common.h"

View File

@ -1,762 +0,0 @@
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include "guf_common.h"
#include "guf_str.h"
static inline size_t capacity_grow(size_t size)
{
return (size * 2);
}
static inline void set_flag(guf_str *str, guf_str_state flag)
{
GUF_ASSERT(str);
str->state |= flag;
}
static inline void unset_flag(guf_str *str, guf_str_state flag)
{
GUF_ASSERT(str);
str->state = str->state & (~flag);
}
static inline bool has_state(const guf_str *str, guf_str_state flag)
{
GUF_ASSERT(str);
return str->state & flag;
}
static inline bool is_short(const guf_str *str)
{
GUF_ASSERT(str);
return has_state(str, GUF_STR_STATE_SHORT);
}
static inline void set_len(guf_str *str, size_t len)
{
GUF_ASSERT(str);
if (is_short(str)) {
GUF_ASSERT(len <= GUF_STR_SSO_BUFSIZE);
GUF_ASSERT(len <= UCHAR_MAX);
str->data.stack.len = (unsigned char)len;
} else {
GUF_ASSERT(len <= str->data.heap.capacity);
str->data.heap.len = len;
}
}
static inline char *get_cstr(guf_str *str)
{
GUF_ASSERT(str);
if (is_short(str)) {
GUF_ASSERT(str->data.stack.c_str);
return str->data.stack.c_str;
} else {
GUF_ASSERT(str->data.heap.c_str);
return str->data.heap.c_str;
}
}
static inline const char *get_const_cstr(const guf_str *str)
{
GUF_ASSERT(str);
if (is_short(str)) {
GUF_ASSERT(str->data.stack.c_str);
return str->data.stack.c_str;
} else {
GUF_ASSERT(str->data.heap.c_str);
return str->data.heap.c_str;
}
}
static inline bool integrity_check(const guf_str *str)
{
GUF_ASSERT(str);
bool good_len_cap = guf_str_len(str) <= guf_str_capacity(str);
GUF_ASSERT(good_len_cap);
if (!good_len_cap) {
return false;
}
const char *c_str = get_const_cstr(str);
GUF_ASSERT(c_str);
if (!c_str) {
return false;
}
bool good_null_term = c_str[guf_str_len(str)] == '\0';
GUF_ASSERT(good_null_term);
return good_len_cap && c_str != NULL && good_null_term;
}
static inline bool handle_alloc_fail(const guf_str *str)
{
GUF_ASSERT(str);
bool good_alloc = !has_state(str, GUF_STR_STATE_ALLOC_ERR);
#ifdef GUF_STR_ABORT_ON_ALLOC_FAILURE
GUF_ASSERT_RELEASE(good_alloc)
#else
GUF_ASSERT(good_alloc);
#endif
return good_alloc;
}
bool guf_str_is_valid(const guf_str *str)
{
bool not_null = str != NULL;
GUF_ASSERT(str);
if (!not_null) {
return false;
}
bool integrity = integrity_check(str);
GUF_ASSERT(integrity);
if (!integrity) {
return false;
}
bool good_alloc = handle_alloc_fail(str) ;
GUF_ASSERT(good_alloc);
return not_null && integrity && good_alloc;
}
const guf_str GUF_STR_UNINITIALISED_FAILED_ALLOC = {
.state = GUF_STR_STATE_INIT | GUF_STR_STATE_SHORT | GUF_STR_STATE_ALLOC_ERR,
.data.stack.len = 0,
.data.stack.c_str = {'\0'}
};
const guf_str GUF_STR_UNINITIALISED = {
.state = GUF_STR_STATE_INIT | GUF_STR_STATE_SHORT,
.data.stack.len = 0,
.data.stack.c_str = {'\0'}
};
bool guf_str_alloc_success(const guf_str *str)
{
bool fail = str->state & GUF_STR_STATE_ALLOC_ERR;
return !fail;
}
// Creation:
guf_str *guf_str_reserve(guf_str *str, size_t new_cap)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t str_len = guf_str_len(str);
const size_t current_cap = guf_str_capacity(str);
if (new_cap <= current_cap) {
return str;
}
GUF_ASSERT(new_cap > GUF_STR_SSO_BUFCAP);
GUF_ASSERT(new_cap + 1 > GUF_STR_SSO_BUFSIZE);
if (is_short(str)) { // a) Was short string.
char tmp_buf[GUF_STR_SSO_BUFSIZE];
GUF_ASSERT_RELEASE(GUF_ARR_SIZE(tmp_buf) >= str_len + 1);
memcpy(tmp_buf, str->data.stack.c_str, str_len + 1);
str->data.heap.c_str = calloc(new_cap + 1, sizeof(str->data.heap.c_str[0]));
if (!str->data.heap.c_str) {
set_flag(str, GUF_STR_STATE_ALLOC_ERR);
str->data.heap.capacity = str->data.heap.len = 0;
handle_alloc_fail(str);
return NULL;
}
str->data.heap.capacity = new_cap;
str->data.heap.len = str_len;
memcpy(str->data.heap.c_str, tmp_buf, str_len + 1);
return str;
}
// b) Was already heap allocated.
GUF_ASSERT_RELEASE(str->data.heap.c_str);
char *new_cstr = realloc(str->data.heap.c_str, new_cap + 1);
if (!new_cstr) {
set_flag(str, GUF_STR_STATE_ALLOC_ERR);
handle_alloc_fail(str);
return NULL;
}
str->data.heap.c_str = new_cstr;
str->data.heap.capacity = new_cap;
return str;
}
guf_str guf_str_new(guf_str_view str_view)
{
GUF_ASSERT(str_view.str);
if (!str_view.str) {
return GUF_STR_UNINITIALISED;
}
guf_str str = GUF_STR_UNINITIALISED;
// Temporary debug; TODO: remove
GUF_ASSERT_RELEASE(GUF_ARR_SIZE(str.data.stack.c_str) == GUF_STR_SSO_BUFSIZE);
for (size_t i = 0; i < GUF_ARR_SIZE(str.data.stack.c_str); ++i) {
GUF_ASSERT_RELEASE(str.data.stack.c_str[i] == '\0');
}
if (!guf_str_reserve(&str, str_view.len)) {
return str;
}
GUF_ASSERT_RELEASE(guf_str_capacity(&str) == str_view.len);
char *c_str = get_cstr(&str);
memcpy(c_str, str_view.str, str_view.len);
c_str[str_view.len] = '\0';
GUF_ASSERT_RELEASE(guf_str_is_valid(&str));
return str;
}
guf_str guf_str_new_with_extra_cap(guf_str_view str_view, size_t extra_capacity)
{
GUF_ASSERT(str_view.str);
if (!str_view.str) {
return GUF_STR_UNINITIALISED;
}
guf_str str = GUF_STR_UNINITIALISED;
// Temporary debug; TODO: remove
GUF_ASSERT_RELEASE(GUF_ARR_SIZE(str.data.stack.c_str) == GUF_STR_SSO_BUFSIZE);
for (size_t i = 0; i < GUF_ARR_SIZE(str.data.stack.c_str); ++i) {
GUF_ASSERT_RELEASE(str.data.stack.c_str[i] == '\0');
}
const size_t capacity = str_view.len + extra_capacity;
if (!guf_str_reserve(&str, capacity)) {
return str;
}
GUF_ASSERT_RELEASE(guf_str_capacity(&str) == capacity);
char *c_str = get_cstr(&str);
memcpy(c_str, str_view.str, str_view.len);
c_str[str_view.len] = '\0';
GUF_ASSERT_RELEASE(guf_str_is_valid(&str));
return str;
}
guf_str guf_str_new_from_cstr(const char *c_str)
{
return guf_str_new(GUF_CSTR_TO_VIEW(c_str));
}
guf_str *guf_str_init(guf_str *str, guf_str_view str_view)
{
GUF_ASSERT_RELEASE(str);
*str = guf_str_new(str_view);
bool fail = handle_alloc_fail(str);
if (!fail) {
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
return str;
} else {
return NULL;
}
}
guf_str *guf_str_init_from_cstr(guf_str *str, const char* c_str)
{
GUF_ASSERT_RELEASE(str);
*str = guf_str_new(GUF_CSTR_TO_VIEW(c_str));
bool fail = handle_alloc_fail(str);
if (!fail) {
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
return str;
} else {
return NULL;
}
}
guf_str guf_str_new_empty_with_capacity(size_t capacity)
{
guf_str str = guf_str_new_from_cstr("");
bool fail = handle_alloc_fail(&str);
if (!fail) {
guf_str_reserve(&str, capacity);
fail = handle_alloc_fail(&str);
}
GUF_ASSERT_RELEASE(guf_str_is_valid(&str));
return str;
}
guf_str *guf_str_init_empty_with_capacity(guf_str *str, size_t capacity)
{
GUF_ASSERT_RELEASE(str);
*str = guf_str_new_empty_with_capacity(capacity);
bool fail = handle_alloc_fail(str);
if (!fail) {
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
return str;
} else {
return NULL;
}
}
// Copying:
guf_str guf_str_substr_cpy(guf_str_view str, size_t pos, size_t count)
{
GUF_ASSERT(str.str);
if (str.len == 0 || count == 0 || pos >= str.len || str.str == NULL) {
return guf_str_new_from_cstr("");
}
guf_str substr = GUF_STR_UNINITIALISED;
const size_t substr_len = pos + count > str.len ? str.len - pos : count;
GUF_ASSERT(substr_len >= 1);
GUF_ASSERT(substr_len <= str.len);
GUF_ASSERT(substr_len <= count);
if (!guf_str_reserve(&substr, substr_len)) {
return substr;
}
GUF_ASSERT_RELEASE(guf_str_capacity(&substr) == substr_len);
char *c_str = get_cstr(&substr);
memcpy(c_str, str.str + pos, substr_len);
c_str[substr_len] = '\0';
GUF_ASSERT_RELEASE(guf_str_is_valid(&substr));
return substr;
}
guf_str_view guf_str_substr_view(guf_str_view str, size_t pos, size_t count)
{
GUF_ASSERT(str.str);
if (str.len == 0 || count == 0 || pos >= str.len || str.str == NULL) {
return (guf_str_view){.str = str.str, .len = 0};
}
const size_t substr_len = pos + count > str.len ? str.len - pos : count;
GUF_ASSERT(substr_len >= 1);
GUF_ASSERT(substr_len <= str.len);
return (guf_str_view){.str = str.str + pos, .len = substr_len};
}
// Modifying:
guf_str *guf_str_substr(guf_str* str, size_t pos, size_t count)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t len = guf_str_len(str);
const size_t cap = guf_str_capacity(str);
const char *c_str = guf_str_const_cstr(str);
if (guf_str_len(str) == 0 || count == 0 || pos >= len || c_str == NULL) {
return str;
}
const size_t substr_len = pos + count > len ? len - pos : count;
GUF_ASSERT(substr_len >= 1);
GUF_ASSERT(substr_len <= len);
if (is_short(str)) { // a) Short string (stack).
GUF_ASSERT(pos + substr_len <= GUF_STR_SSO_BUFCAP);
GUF_ASSERT(substr_len <= UCHAR_MAX);
str->data.stack.len = (unsigned char)substr_len;
memcpy(str->data.stack.c_str, c_str + pos, substr_len);
str->data.stack.c_str[substr_len] = '\0';
set_len(str, substr_len);
return str;
}
// b) Long string (heap) (Don't shrink capacity here).
GUF_ASSERT(pos + substr_len <= len && pos + substr_len <= cap);
size_t num_moved = 0;
for (size_t i = pos; i < pos + substr_len; ++i) {
str->data.heap.c_str[num_moved++] = str->data.heap.c_str[i];
}
GUF_ASSERT(num_moved == len);
str->data.heap.c_str[len] = '\0';
set_len(str, substr_len);
return str;
}
guf_str *guf_str_append(guf_str *str, guf_str_view to_append)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t str_len = guf_str_len(str);
const size_t total_len = str_len + to_append.len;
if (to_append.len == 0) {
return str;
}
if (guf_str_capacity(str) < total_len) { // The capacity of the destination string is too small -> grow.
str = guf_str_reserve(str, capacity_grow(total_len));
GUF_ASSERT_RELEASE(str != NULL);
GUF_ASSERT_RELEASE(guf_str_capacity(str) >= total_len);
}
char *dst_ptr = get_cstr(str);
const char *src_ptr = to_append.str;
size_t num_copied = 0;
for (size_t dst_idx = str_len; dst_idx < total_len; ++dst_idx) {
GUF_ASSERT(num_copied <= to_append.len);
GUF_ASSERT(dst_idx < guf_str_capacity(str));
dst_ptr[dst_idx] = src_ptr[num_copied++];
}
GUF_ASSERT_RELEASE(num_copied == to_append.len);
dst_ptr[total_len] = '\0';
set_len(str, total_len);
return str;
}
guf_str *guf_str_append_cstr(guf_str *str, const char *cstr_to_append)
{
return guf_str_append(str, GUF_CSTR_TO_VIEW(cstr_to_append));
}
guf_str *guf_str_shrink_to_fit(guf_str *str)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
if (is_short(str)) {
return str;
}
const size_t len = guf_str_len(str);
GUF_ASSERT(str->data.heap.c_str);
GUF_ASSERT(str->data.heap.capacity >= len);
if (len == str->data.heap.capacity) {
return str;
}
const size_t new_cap = len;
GUF_ASSERT(len <= new_cap);
if (new_cap <= GUF_STR_SSO_BUFCAP) { // a) Short string.
char *src = str->data.heap.c_str;
GUF_ASSERT(src);
str->data.heap.c_str = NULL;
set_flag(str, GUF_STR_STATE_SHORT);
GUF_ASSERT(len < UCHAR_MAX);
str->data.stack.len = (unsigned char)len;
memcpy(str->data.stack.c_str, src, len);
str->data.stack.c_str[len] = '\0';
free(src);
return str;
} else { // b) Long string.
char *new_cstr = realloc(str->data.heap.c_str, new_cap + 1);
GUF_ASSERT(new_cstr);
if (!new_cstr) {
set_flag(str, GUF_STR_STATE_ALLOC_ERR);
handle_alloc_fail(str);
return str;
}
str->data.heap.c_str = new_cstr;
str->data.heap.capacity = new_cap;
GUF_ASSERT(str->data.heap.c_str[len] == '\0');
return str;
}
}
char guf_str_pop_back(guf_str *str)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t len = guf_str_len(str);
if (len == 0) {
return '\0';
}
char *last_char = guf_str_at(str, len - 1);
GUF_ASSERT(last_char);
char popped = *last_char;
*last_char = '\0';
set_len(str, len - 1);
return popped;
}
char guf_str_pop_front(guf_str *str)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t len = guf_str_len(str);
if (len == 0) {
return '\0';
}
char *first_char = guf_str_at(str, 0);
GUF_ASSERT(first_char);
char popped = *first_char;
char *c_str = get_cstr(str);
for (size_t dst_idx = 0; dst_idx < len; ++dst_idx) { // Move the remaining string to the left.
GUF_ASSERT(dst_idx + 1 <= len + 1);
c_str[dst_idx] = c_str[dst_idx + 1];
}
GUF_ASSERT_RELEASE(c_str[len - 1] == '\0');
set_len(str, len - 1);
return popped;
}
// Non-modifying:
// The size (in chars) without the final null-terminator.
size_t guf_str_len(const guf_str *str)
{
GUF_ASSERT_RELEASE(str);
GUF_ASSERT_RELEASE(integrity_check(str));
if (is_short(str)) {
return str->data.stack.len;
} else {
GUF_ASSERT_RELEASE(str->data.heap.capacity > GUF_STR_SSO_BUFCAP);
return str->data.heap.len;
}
}
size_t guf_str_capacity(const guf_str *str)
{
GUF_ASSERT_RELEASE(str);
GUF_ASSERT_RELEASE(integrity_check(str));
if (is_short(str)) {
return GUF_STR_SSO_BUFCAP;
} else {
// GUF_ASSERT(str->data.heap.capacity > GUF_STR_SSO_BUFCAP); // TODO: Not sure...
return str->data.heap.capacity;
}
}
bool guf_str_view_equal(guf_str_view a, guf_str_view b)
{
GUF_ASSERT_RELEASE(a.str && b.str);
if (a.len != b.len) {
return false;
}
return 0 == memcmp(a.str, b.str, a.len);
}
bool guf_str_equal(const guf_str *a, const guf_str *b)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(a) && guf_str_is_valid(b));
return guf_str_view_equal(GUF_STR_TO_VIEW(a), GUF_STR_TO_VIEW(b));
}
bool guf_str_equals_cstr(const guf_str *a, const char *c_str)
{
GUF_ASSERT_RELEASE(a && c_str);
GUF_ASSERT_RELEASE(guf_str_is_valid(a));
return guf_str_view_equal(GUF_STR_TO_VIEW(a), GUF_CSTR_TO_VIEW(c_str));
}
bool guf_str_equals_strview(const guf_str *a, guf_str_view b)
{
GUF_ASSERT_RELEASE(a && b.str);
GUF_ASSERT_RELEASE(guf_str_is_valid(a));
return guf_str_view_equal(GUF_STR_TO_VIEW(a), b);
}
int guf_str_view_cmp(const void *str_view_a, const void *str_view_b)
{ // For qsort etc.
GUF_ASSERT_RELEASE(str_view_a && str_view_b);
const guf_str_view *a = (const guf_str_view*)str_view_a;
const guf_str_view *b = (const guf_str_view*)str_view_b;
GUF_ASSERT_RELEASE(a->str && b->str);
if (a->len != b->len) {
return false;
}
return memcmp(a->str, b->str, a->len);
}
bool guf_str_is_stack_allocated(const guf_str *str)
{
GUF_ASSERT_RELEASE(str);
return is_short(str);
}
// Indexing operations:
const char *guf_str_const_cstr(const guf_str *str)
{
return get_const_cstr(str);
}
char *guf_str_cstr(guf_str *str)
{
return get_cstr(str);
}
char *guf_str_at(guf_str *str, size_t idx)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
GUF_ASSERT(idx < guf_str_len(str));
if (idx >= guf_str_len(str)) {
return NULL;
}
char *c_str = get_cstr(str);
GUF_ASSERT(c_str != NULL);
return c_str + idx;
}
char *guf_str_back(guf_str *str)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t len = guf_str_len(str);
GUF_ASSERT_RELEASE(len > 0);
GUF_ASSERT_RELEASE(len < guf_str_capacity(str));
return guf_str_at(str, len - 1);
}
char *guf_str_front(guf_str *str)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const size_t len = guf_str_len(str);
GUF_ASSERT_RELEASE(len > 0);
return guf_str_at(str, 0);
}
// Destruction:
void guf_str_free(guf_str *str)
{
GUF_ASSERT(integrity_check(str));
if (is_short(str)) {
GUF_ASSERT_RELEASE(str->data.stack.len <= GUF_STR_SSO_BUFCAP);
str->data.stack.len = 0;
str->data.stack.c_str[0] = '\0';
return;
}
// GUF_ASSERT_RELEASE(str->data.heap.capacity > GUF_STR_SSO_BUFCAP);
if (str->data.heap.c_str) {
free(str->data.heap.c_str);
str->data.heap.c_str = NULL;
}
set_flag(str, GUF_STR_STATE_SHORT);
str->data.stack.len = 0;
str->data.stack.c_str[0] = '\0';
}
// UTF-8
bool guf_str_char_is_ascii(char c)
{
return c >= 0 && c <= 127;
}
bool guf_str_is_ascii(const guf_str *str)
{
const char *c_str = get_const_cstr(str);
for (size_t i = 0; i < guf_str_len(str); ++i) {
if (!guf_str_char_is_ascii(c_str[i])) {
return false;
}
}
GUF_ASSERT(c_str[guf_str_len(str)] == '\0');
return true;
}
typedef struct guf_str_codepoint_utf8 {
unsigned char num_bytes;
unsigned char bytes[5];
bool valid;
} guf_str_codepoint_utf8;
bool guf_str_iter_done(const guf_str_codepoint_utf8 *cp)
{
return cp->valid && cp->num_bytes == 1 && cp->bytes[0] == '\0';
}
guf_str_codepoint_utf8 guf_str_iterate_utf8(const guf_str *str, size_t *idx)
{
GUF_ASSERT(idx);
const char *c_str = get_const_cstr(str);
size_t len = guf_str_len(str);
guf_str_codepoint_utf8 cp = {.num_bytes = 1, .bytes = {'\0', '\0', '\0', '\0', '\0'}, .valid = true};
const unsigned char four_bytes_val = 240; // 0b1111.0xxx
const unsigned char four_bytes_mask = 248; // 0b1111.1000
const unsigned char three_bytes_val = 224; // 0b1110.xxxx
const unsigned char three_bytes_mask = 240; // 0b1111.0000
const unsigned char two_bytes_val = 192; // 0b110x.xxxx
const unsigned char two_bytes_mask = 224 ; // 0b1110.0000
size_t i = *idx;
if (guf_str_char_is_ascii(c_str[i])) {
cp.num_bytes = 1;
cp.bytes[0] = c_str[i];
*idx = i + 1;
if (i == len) {
GUF_ASSERT(c_str[i] == '\0');
}
return cp;
}
else if ((unsigned char)c_str[i] & four_bytes_mask) {
cp.num_bytes = 4;
if (i + cp.num_bytes >= len - 1) {
cp.valid = false;
return cp;
}
}
else if ((unsigned char)c_str[i] & three_bytes_mask) {
cp.num_bytes = 3;
if (i + cp.num_bytes >= len - 1) {
cp.valid = false;
return cp;
}
}
else if ((unsigned char)c_str[i] & two_bytes_mask) {
cp.num_bytes = 2;
if (i + cp.num_bytes >= len - 1) {
cp.valid = false;
return cp;
}
}
else {
cp.valid = false;
return cp;
}
cp.bytes[0] = c_str[i];
for (size_t j = 1; j < cp.num_bytes; ++j) {
size_t id = i + j;
assert(id < len);
unsigned char byte = c_str[id];
if (byte >= 128 && byte < 192) { // 0b10xx.xxxx
cp.bytes[id] = byte;
} else {
cp.valid = false;
return cp;
}
}
*idx = i + cp.num_bytes;
return cp;
}
// Length without null-terminator.
size_t guf_str_len_utf8(const guf_str *str)
{
size_t idx = 0;
size_t n = 0;
for (guf_str_codepoint_utf8 cp = guf_str_iterate_utf8(str, &idx); !guf_str_iter_done(&cp); cp = guf_str_iterate_utf8(str, &idx)) {
++n;
}
assert(n >= 1);
return n - 1;
}

View File

@ -1,3 +1,8 @@
/*
is parametrized: no
NOTE: automatically includes/implements guf_utf8.h
*/
#if defined(GUF_STR_IMPL_STATIC)
#define GUF_STR_KWRDS static
#else

View File

@ -1,3 +1,7 @@
/*
is parametrized: no
*/
#ifndef GUF_STR_VIEW_TYPE_H
#define GUF_STR_VIEW_TYPE_H
#include <stddef.h>

View File

@ -1,3 +1,8 @@
/*
is parametrized: no
NOTE: don't include if you already use guf_str.h
*/
#if defined(GUF_UTF8_IMPL_STATIC)
#define GUF_UTF8_KWRDS static
#else

View File

@ -1,3 +1,7 @@
/*
is parametrized: no
*/
#ifndef GUF_UTILS_H
#define GUF_UTILS_H
#include "guf_assert.h"