|
|
|
|
@ -1,762 +0,0 @@
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <assert.h>
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
|
|
#include "guf_common.h"
|
|
|
|
|
#include "guf_str.h"
|
|
|
|
|
|
|
|
|
|
static inline size_t capacity_grow(size_t size)
|
|
|
|
|
{
|
|
|
|
|
return (size * 2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void set_flag(guf_str *str, guf_str_state flag)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str);
|
|
|
|
|
str->state |= flag;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void unset_flag(guf_str *str, guf_str_state flag)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str);
|
|
|
|
|
str->state = str->state & (~flag);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool has_state(const guf_str *str, guf_str_state flag)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str);
|
|
|
|
|
return str->state & flag;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool is_short(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str);
|
|
|
|
|
return has_state(str, GUF_STR_STATE_SHORT);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline void set_len(guf_str *str, size_t len)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str);
|
|
|
|
|
if (is_short(str)) {
|
|
|
|
|
GUF_ASSERT(len <= GUF_STR_SSO_BUFSIZE);
|
|
|
|
|
GUF_ASSERT(len <= UCHAR_MAX);
|
|
|
|
|
str->data.stack.len = (unsigned char)len;
|
|
|
|
|
} else {
|
|
|
|
|
GUF_ASSERT(len <= str->data.heap.capacity);
|
|
|
|
|
str->data.heap.len = len;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline char *get_cstr(guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str);
|
|
|
|
|
if (is_short(str)) {
|
|
|
|
|
GUF_ASSERT(str->data.stack.c_str);
|
|
|
|
|
return str->data.stack.c_str;
|
|
|
|
|
} else {
|
|
|
|
|
GUF_ASSERT(str->data.heap.c_str);
|
|
|
|
|
return str->data.heap.c_str;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline const char *get_const_cstr(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str);
|
|
|
|
|
if (is_short(str)) {
|
|
|
|
|
GUF_ASSERT(str->data.stack.c_str);
|
|
|
|
|
return str->data.stack.c_str;
|
|
|
|
|
} else {
|
|
|
|
|
GUF_ASSERT(str->data.heap.c_str);
|
|
|
|
|
return str->data.heap.c_str;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool integrity_check(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str);
|
|
|
|
|
bool good_len_cap = guf_str_len(str) <= guf_str_capacity(str);
|
|
|
|
|
|
|
|
|
|
GUF_ASSERT(good_len_cap);
|
|
|
|
|
if (!good_len_cap) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *c_str = get_const_cstr(str);
|
|
|
|
|
GUF_ASSERT(c_str);
|
|
|
|
|
if (!c_str) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
bool good_null_term = c_str[guf_str_len(str)] == '\0';
|
|
|
|
|
GUF_ASSERT(good_null_term);
|
|
|
|
|
|
|
|
|
|
return good_len_cap && c_str != NULL && good_null_term;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline bool handle_alloc_fail(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str);
|
|
|
|
|
bool good_alloc = !has_state(str, GUF_STR_STATE_ALLOC_ERR);
|
|
|
|
|
#ifdef GUF_STR_ABORT_ON_ALLOC_FAILURE
|
|
|
|
|
GUF_ASSERT_RELEASE(good_alloc)
|
|
|
|
|
#else
|
|
|
|
|
GUF_ASSERT(good_alloc);
|
|
|
|
|
#endif
|
|
|
|
|
return good_alloc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool guf_str_is_valid(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
bool not_null = str != NULL;
|
|
|
|
|
GUF_ASSERT(str);
|
|
|
|
|
if (!not_null) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
bool integrity = integrity_check(str);
|
|
|
|
|
GUF_ASSERT(integrity);
|
|
|
|
|
if (!integrity) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
bool good_alloc = handle_alloc_fail(str) ;
|
|
|
|
|
GUF_ASSERT(good_alloc);
|
|
|
|
|
return not_null && integrity && good_alloc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const guf_str GUF_STR_UNINITIALISED_FAILED_ALLOC = {
|
|
|
|
|
.state = GUF_STR_STATE_INIT | GUF_STR_STATE_SHORT | GUF_STR_STATE_ALLOC_ERR,
|
|
|
|
|
.data.stack.len = 0,
|
|
|
|
|
.data.stack.c_str = {'\0'}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const guf_str GUF_STR_UNINITIALISED = {
|
|
|
|
|
.state = GUF_STR_STATE_INIT | GUF_STR_STATE_SHORT,
|
|
|
|
|
.data.stack.len = 0,
|
|
|
|
|
.data.stack.c_str = {'\0'}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
bool guf_str_alloc_success(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
bool fail = str->state & GUF_STR_STATE_ALLOC_ERR;
|
|
|
|
|
return !fail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Creation:
|
|
|
|
|
|
|
|
|
|
guf_str *guf_str_reserve(guf_str *str, size_t new_cap)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
|
|
|
|
|
const size_t str_len = guf_str_len(str);
|
|
|
|
|
const size_t current_cap = guf_str_capacity(str);
|
|
|
|
|
|
|
|
|
|
if (new_cap <= current_cap) {
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
GUF_ASSERT(new_cap > GUF_STR_SSO_BUFCAP);
|
|
|
|
|
GUF_ASSERT(new_cap + 1 > GUF_STR_SSO_BUFSIZE);
|
|
|
|
|
|
|
|
|
|
if (is_short(str)) { // a) Was short string.
|
|
|
|
|
char tmp_buf[GUF_STR_SSO_BUFSIZE];
|
|
|
|
|
GUF_ASSERT_RELEASE(GUF_ARR_SIZE(tmp_buf) >= str_len + 1);
|
|
|
|
|
memcpy(tmp_buf, str->data.stack.c_str, str_len + 1);
|
|
|
|
|
|
|
|
|
|
str->data.heap.c_str = calloc(new_cap + 1, sizeof(str->data.heap.c_str[0]));
|
|
|
|
|
if (!str->data.heap.c_str) {
|
|
|
|
|
set_flag(str, GUF_STR_STATE_ALLOC_ERR);
|
|
|
|
|
str->data.heap.capacity = str->data.heap.len = 0;
|
|
|
|
|
handle_alloc_fail(str);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
str->data.heap.capacity = new_cap;
|
|
|
|
|
str->data.heap.len = str_len;
|
|
|
|
|
memcpy(str->data.heap.c_str, tmp_buf, str_len + 1);
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
// b) Was already heap allocated.
|
|
|
|
|
GUF_ASSERT_RELEASE(str->data.heap.c_str);
|
|
|
|
|
char *new_cstr = realloc(str->data.heap.c_str, new_cap + 1);
|
|
|
|
|
if (!new_cstr) {
|
|
|
|
|
set_flag(str, GUF_STR_STATE_ALLOC_ERR);
|
|
|
|
|
handle_alloc_fail(str);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
str->data.heap.c_str = new_cstr;
|
|
|
|
|
str->data.heap.capacity = new_cap;
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str guf_str_new(guf_str_view str_view)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str_view.str);
|
|
|
|
|
if (!str_view.str) {
|
|
|
|
|
return GUF_STR_UNINITIALISED;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str str = GUF_STR_UNINITIALISED;
|
|
|
|
|
|
|
|
|
|
// Temporary debug; TODO: remove
|
|
|
|
|
GUF_ASSERT_RELEASE(GUF_ARR_SIZE(str.data.stack.c_str) == GUF_STR_SSO_BUFSIZE);
|
|
|
|
|
for (size_t i = 0; i < GUF_ARR_SIZE(str.data.stack.c_str); ++i) {
|
|
|
|
|
GUF_ASSERT_RELEASE(str.data.stack.c_str[i] == '\0');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!guf_str_reserve(&str, str_view.len)) {
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_capacity(&str) == str_view.len);
|
|
|
|
|
|
|
|
|
|
char *c_str = get_cstr(&str);
|
|
|
|
|
memcpy(c_str, str_view.str, str_view.len);
|
|
|
|
|
c_str[str_view.len] = '\0';
|
|
|
|
|
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(&str));
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str guf_str_new_with_extra_cap(guf_str_view str_view, size_t extra_capacity)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str_view.str);
|
|
|
|
|
if (!str_view.str) {
|
|
|
|
|
return GUF_STR_UNINITIALISED;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str str = GUF_STR_UNINITIALISED;
|
|
|
|
|
|
|
|
|
|
// Temporary debug; TODO: remove
|
|
|
|
|
GUF_ASSERT_RELEASE(GUF_ARR_SIZE(str.data.stack.c_str) == GUF_STR_SSO_BUFSIZE);
|
|
|
|
|
for (size_t i = 0; i < GUF_ARR_SIZE(str.data.stack.c_str); ++i) {
|
|
|
|
|
GUF_ASSERT_RELEASE(str.data.stack.c_str[i] == '\0');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const size_t capacity = str_view.len + extra_capacity;
|
|
|
|
|
if (!guf_str_reserve(&str, capacity)) {
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_capacity(&str) == capacity);
|
|
|
|
|
|
|
|
|
|
char *c_str = get_cstr(&str);
|
|
|
|
|
memcpy(c_str, str_view.str, str_view.len);
|
|
|
|
|
c_str[str_view.len] = '\0';
|
|
|
|
|
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(&str));
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
guf_str guf_str_new_from_cstr(const char *c_str)
|
|
|
|
|
{
|
|
|
|
|
return guf_str_new(GUF_CSTR_TO_VIEW(c_str));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str *guf_str_init(guf_str *str, guf_str_view str_view)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(str);
|
|
|
|
|
*str = guf_str_new(str_view);
|
|
|
|
|
bool fail = handle_alloc_fail(str);
|
|
|
|
|
if (!fail) {
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
return str;
|
|
|
|
|
} else {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str *guf_str_init_from_cstr(guf_str *str, const char* c_str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(str);
|
|
|
|
|
*str = guf_str_new(GUF_CSTR_TO_VIEW(c_str));
|
|
|
|
|
bool fail = handle_alloc_fail(str);
|
|
|
|
|
if (!fail) {
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
return str;
|
|
|
|
|
} else {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str guf_str_new_empty_with_capacity(size_t capacity)
|
|
|
|
|
{
|
|
|
|
|
guf_str str = guf_str_new_from_cstr("");
|
|
|
|
|
bool fail = handle_alloc_fail(&str);
|
|
|
|
|
if (!fail) {
|
|
|
|
|
guf_str_reserve(&str, capacity);
|
|
|
|
|
fail = handle_alloc_fail(&str);
|
|
|
|
|
}
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(&str));
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str *guf_str_init_empty_with_capacity(guf_str *str, size_t capacity)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(str);
|
|
|
|
|
*str = guf_str_new_empty_with_capacity(capacity);
|
|
|
|
|
bool fail = handle_alloc_fail(str);
|
|
|
|
|
if (!fail) {
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
return str;
|
|
|
|
|
} else {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Copying:
|
|
|
|
|
|
|
|
|
|
guf_str guf_str_substr_cpy(guf_str_view str, size_t pos, size_t count)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str.str);
|
|
|
|
|
|
|
|
|
|
if (str.len == 0 || count == 0 || pos >= str.len || str.str == NULL) {
|
|
|
|
|
return guf_str_new_from_cstr("");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str substr = GUF_STR_UNINITIALISED;
|
|
|
|
|
|
|
|
|
|
const size_t substr_len = pos + count > str.len ? str.len - pos : count;
|
|
|
|
|
GUF_ASSERT(substr_len >= 1);
|
|
|
|
|
GUF_ASSERT(substr_len <= str.len);
|
|
|
|
|
GUF_ASSERT(substr_len <= count);
|
|
|
|
|
|
|
|
|
|
if (!guf_str_reserve(&substr, substr_len)) {
|
|
|
|
|
return substr;
|
|
|
|
|
}
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_capacity(&substr) == substr_len);
|
|
|
|
|
|
|
|
|
|
char *c_str = get_cstr(&substr);
|
|
|
|
|
memcpy(c_str, str.str + pos, substr_len);
|
|
|
|
|
c_str[substr_len] = '\0';
|
|
|
|
|
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(&substr));
|
|
|
|
|
return substr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str_view guf_str_substr_view(guf_str_view str, size_t pos, size_t count)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(str.str);
|
|
|
|
|
|
|
|
|
|
if (str.len == 0 || count == 0 || pos >= str.len || str.str == NULL) {
|
|
|
|
|
return (guf_str_view){.str = str.str, .len = 0};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const size_t substr_len = pos + count > str.len ? str.len - pos : count;
|
|
|
|
|
GUF_ASSERT(substr_len >= 1);
|
|
|
|
|
GUF_ASSERT(substr_len <= str.len);
|
|
|
|
|
|
|
|
|
|
return (guf_str_view){.str = str.str + pos, .len = substr_len};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Modifying:
|
|
|
|
|
|
|
|
|
|
guf_str *guf_str_substr(guf_str* str, size_t pos, size_t count)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
const size_t len = guf_str_len(str);
|
|
|
|
|
const size_t cap = guf_str_capacity(str);
|
|
|
|
|
|
|
|
|
|
const char *c_str = guf_str_const_cstr(str);
|
|
|
|
|
if (guf_str_len(str) == 0 || count == 0 || pos >= len || c_str == NULL) {
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const size_t substr_len = pos + count > len ? len - pos : count;
|
|
|
|
|
GUF_ASSERT(substr_len >= 1);
|
|
|
|
|
GUF_ASSERT(substr_len <= len);
|
|
|
|
|
|
|
|
|
|
if (is_short(str)) { // a) Short string (stack).
|
|
|
|
|
GUF_ASSERT(pos + substr_len <= GUF_STR_SSO_BUFCAP);
|
|
|
|
|
GUF_ASSERT(substr_len <= UCHAR_MAX);
|
|
|
|
|
str->data.stack.len = (unsigned char)substr_len;
|
|
|
|
|
memcpy(str->data.stack.c_str, c_str + pos, substr_len);
|
|
|
|
|
str->data.stack.c_str[substr_len] = '\0';
|
|
|
|
|
set_len(str, substr_len);
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
// b) Long string (heap) (Don't shrink capacity here).
|
|
|
|
|
GUF_ASSERT(pos + substr_len <= len && pos + substr_len <= cap);
|
|
|
|
|
size_t num_moved = 0;
|
|
|
|
|
for (size_t i = pos; i < pos + substr_len; ++i) {
|
|
|
|
|
str->data.heap.c_str[num_moved++] = str->data.heap.c_str[i];
|
|
|
|
|
}
|
|
|
|
|
GUF_ASSERT(num_moved == len);
|
|
|
|
|
str->data.heap.c_str[len] = '\0';
|
|
|
|
|
set_len(str, substr_len);
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str *guf_str_append(guf_str *str, guf_str_view to_append)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
|
|
|
|
|
const size_t str_len = guf_str_len(str);
|
|
|
|
|
const size_t total_len = str_len + to_append.len;
|
|
|
|
|
|
|
|
|
|
if (to_append.len == 0) {
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (guf_str_capacity(str) < total_len) { // The capacity of the destination string is too small -> grow.
|
|
|
|
|
str = guf_str_reserve(str, capacity_grow(total_len));
|
|
|
|
|
GUF_ASSERT_RELEASE(str != NULL);
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_capacity(str) >= total_len);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *dst_ptr = get_cstr(str);
|
|
|
|
|
const char *src_ptr = to_append.str;
|
|
|
|
|
size_t num_copied = 0;
|
|
|
|
|
for (size_t dst_idx = str_len; dst_idx < total_len; ++dst_idx) {
|
|
|
|
|
GUF_ASSERT(num_copied <= to_append.len);
|
|
|
|
|
GUF_ASSERT(dst_idx < guf_str_capacity(str));
|
|
|
|
|
dst_ptr[dst_idx] = src_ptr[num_copied++];
|
|
|
|
|
}
|
|
|
|
|
GUF_ASSERT_RELEASE(num_copied == to_append.len);
|
|
|
|
|
dst_ptr[total_len] = '\0';
|
|
|
|
|
set_len(str, total_len);
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str *guf_str_append_cstr(guf_str *str, const char *cstr_to_append)
|
|
|
|
|
{
|
|
|
|
|
return guf_str_append(str, GUF_CSTR_TO_VIEW(cstr_to_append));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
guf_str *guf_str_shrink_to_fit(guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
|
|
|
|
|
if (is_short(str)) {
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const size_t len = guf_str_len(str);
|
|
|
|
|
GUF_ASSERT(str->data.heap.c_str);
|
|
|
|
|
GUF_ASSERT(str->data.heap.capacity >= len);
|
|
|
|
|
|
|
|
|
|
if (len == str->data.heap.capacity) {
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const size_t new_cap = len;
|
|
|
|
|
GUF_ASSERT(len <= new_cap);
|
|
|
|
|
|
|
|
|
|
if (new_cap <= GUF_STR_SSO_BUFCAP) { // a) Short string.
|
|
|
|
|
char *src = str->data.heap.c_str;
|
|
|
|
|
GUF_ASSERT(src);
|
|
|
|
|
str->data.heap.c_str = NULL;
|
|
|
|
|
set_flag(str, GUF_STR_STATE_SHORT);
|
|
|
|
|
GUF_ASSERT(len < UCHAR_MAX);
|
|
|
|
|
str->data.stack.len = (unsigned char)len;
|
|
|
|
|
memcpy(str->data.stack.c_str, src, len);
|
|
|
|
|
str->data.stack.c_str[len] = '\0';
|
|
|
|
|
free(src);
|
|
|
|
|
return str;
|
|
|
|
|
} else { // b) Long string.
|
|
|
|
|
char *new_cstr = realloc(str->data.heap.c_str, new_cap + 1);
|
|
|
|
|
GUF_ASSERT(new_cstr);
|
|
|
|
|
if (!new_cstr) {
|
|
|
|
|
set_flag(str, GUF_STR_STATE_ALLOC_ERR);
|
|
|
|
|
handle_alloc_fail(str);
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
str->data.heap.c_str = new_cstr;
|
|
|
|
|
str->data.heap.capacity = new_cap;
|
|
|
|
|
GUF_ASSERT(str->data.heap.c_str[len] == '\0');
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char guf_str_pop_back(guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
|
|
|
|
|
const size_t len = guf_str_len(str);
|
|
|
|
|
if (len == 0) {
|
|
|
|
|
return '\0';
|
|
|
|
|
}
|
|
|
|
|
char *last_char = guf_str_at(str, len - 1);
|
|
|
|
|
GUF_ASSERT(last_char);
|
|
|
|
|
char popped = *last_char;
|
|
|
|
|
*last_char = '\0';
|
|
|
|
|
set_len(str, len - 1);
|
|
|
|
|
return popped;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char guf_str_pop_front(guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
|
|
|
|
|
const size_t len = guf_str_len(str);
|
|
|
|
|
if (len == 0) {
|
|
|
|
|
return '\0';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *first_char = guf_str_at(str, 0);
|
|
|
|
|
GUF_ASSERT(first_char);
|
|
|
|
|
char popped = *first_char;
|
|
|
|
|
|
|
|
|
|
char *c_str = get_cstr(str);
|
|
|
|
|
for (size_t dst_idx = 0; dst_idx < len; ++dst_idx) { // Move the remaining string to the left.
|
|
|
|
|
GUF_ASSERT(dst_idx + 1 <= len + 1);
|
|
|
|
|
c_str[dst_idx] = c_str[dst_idx + 1];
|
|
|
|
|
}
|
|
|
|
|
GUF_ASSERT_RELEASE(c_str[len - 1] == '\0');
|
|
|
|
|
set_len(str, len - 1);
|
|
|
|
|
return popped;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Non-modifying:
|
|
|
|
|
|
|
|
|
|
// The size (in chars) without the final null-terminator.
|
|
|
|
|
size_t guf_str_len(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(str);
|
|
|
|
|
GUF_ASSERT_RELEASE(integrity_check(str));
|
|
|
|
|
|
|
|
|
|
if (is_short(str)) {
|
|
|
|
|
return str->data.stack.len;
|
|
|
|
|
} else {
|
|
|
|
|
GUF_ASSERT_RELEASE(str->data.heap.capacity > GUF_STR_SSO_BUFCAP);
|
|
|
|
|
return str->data.heap.len;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t guf_str_capacity(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(str);
|
|
|
|
|
GUF_ASSERT_RELEASE(integrity_check(str));
|
|
|
|
|
|
|
|
|
|
if (is_short(str)) {
|
|
|
|
|
return GUF_STR_SSO_BUFCAP;
|
|
|
|
|
} else {
|
|
|
|
|
// GUF_ASSERT(str->data.heap.capacity > GUF_STR_SSO_BUFCAP); // TODO: Not sure...
|
|
|
|
|
return str->data.heap.capacity;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool guf_str_view_equal(guf_str_view a, guf_str_view b)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(a.str && b.str);
|
|
|
|
|
if (a.len != b.len) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return 0 == memcmp(a.str, b.str, a.len);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool guf_str_equal(const guf_str *a, const guf_str *b)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(a) && guf_str_is_valid(b));
|
|
|
|
|
return guf_str_view_equal(GUF_STR_TO_VIEW(a), GUF_STR_TO_VIEW(b));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool guf_str_equals_cstr(const guf_str *a, const char *c_str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(a && c_str);
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(a));
|
|
|
|
|
return guf_str_view_equal(GUF_STR_TO_VIEW(a), GUF_CSTR_TO_VIEW(c_str));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool guf_str_equals_strview(const guf_str *a, guf_str_view b)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(a && b.str);
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(a));
|
|
|
|
|
return guf_str_view_equal(GUF_STR_TO_VIEW(a), b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int guf_str_view_cmp(const void *str_view_a, const void *str_view_b)
|
|
|
|
|
{ // For qsort etc.
|
|
|
|
|
GUF_ASSERT_RELEASE(str_view_a && str_view_b);
|
|
|
|
|
const guf_str_view *a = (const guf_str_view*)str_view_a;
|
|
|
|
|
const guf_str_view *b = (const guf_str_view*)str_view_b;
|
|
|
|
|
GUF_ASSERT_RELEASE(a->str && b->str);
|
|
|
|
|
|
|
|
|
|
if (a->len != b->len) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return memcmp(a->str, b->str, a->len);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool guf_str_is_stack_allocated(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(str);
|
|
|
|
|
return is_short(str);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Indexing operations:
|
|
|
|
|
|
|
|
|
|
const char *guf_str_const_cstr(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
return get_const_cstr(str);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *guf_str_cstr(guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
return get_cstr(str);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *guf_str_at(guf_str *str, size_t idx)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
|
|
|
|
|
GUF_ASSERT(idx < guf_str_len(str));
|
|
|
|
|
if (idx >= guf_str_len(str)) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
char *c_str = get_cstr(str);
|
|
|
|
|
GUF_ASSERT(c_str != NULL);
|
|
|
|
|
return c_str + idx;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *guf_str_back(guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
|
|
|
|
|
const size_t len = guf_str_len(str);
|
|
|
|
|
GUF_ASSERT_RELEASE(len > 0);
|
|
|
|
|
GUF_ASSERT_RELEASE(len < guf_str_capacity(str));
|
|
|
|
|
return guf_str_at(str, len - 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *guf_str_front(guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
|
|
|
|
|
|
|
|
|
|
const size_t len = guf_str_len(str);
|
|
|
|
|
GUF_ASSERT_RELEASE(len > 0);
|
|
|
|
|
return guf_str_at(str, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Destruction:
|
|
|
|
|
|
|
|
|
|
void guf_str_free(guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(integrity_check(str));
|
|
|
|
|
|
|
|
|
|
if (is_short(str)) {
|
|
|
|
|
GUF_ASSERT_RELEASE(str->data.stack.len <= GUF_STR_SSO_BUFCAP);
|
|
|
|
|
str->data.stack.len = 0;
|
|
|
|
|
str->data.stack.c_str[0] = '\0';
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// GUF_ASSERT_RELEASE(str->data.heap.capacity > GUF_STR_SSO_BUFCAP);
|
|
|
|
|
if (str->data.heap.c_str) {
|
|
|
|
|
free(str->data.heap.c_str);
|
|
|
|
|
str->data.heap.c_str = NULL;
|
|
|
|
|
}
|
|
|
|
|
set_flag(str, GUF_STR_STATE_SHORT);
|
|
|
|
|
str->data.stack.len = 0;
|
|
|
|
|
str->data.stack.c_str[0] = '\0';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// UTF-8
|
|
|
|
|
|
|
|
|
|
bool guf_str_char_is_ascii(char c)
|
|
|
|
|
{
|
|
|
|
|
return c >= 0 && c <= 127;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool guf_str_is_ascii(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
const char *c_str = get_const_cstr(str);
|
|
|
|
|
for (size_t i = 0; i < guf_str_len(str); ++i) {
|
|
|
|
|
if (!guf_str_char_is_ascii(c_str[i])) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
GUF_ASSERT(c_str[guf_str_len(str)] == '\0');
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
typedef struct guf_str_codepoint_utf8 {
|
|
|
|
|
unsigned char num_bytes;
|
|
|
|
|
unsigned char bytes[5];
|
|
|
|
|
bool valid;
|
|
|
|
|
} guf_str_codepoint_utf8;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool guf_str_iter_done(const guf_str_codepoint_utf8 *cp)
|
|
|
|
|
{
|
|
|
|
|
return cp->valid && cp->num_bytes == 1 && cp->bytes[0] == '\0';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
guf_str_codepoint_utf8 guf_str_iterate_utf8(const guf_str *str, size_t *idx)
|
|
|
|
|
{
|
|
|
|
|
GUF_ASSERT(idx);
|
|
|
|
|
const char *c_str = get_const_cstr(str);
|
|
|
|
|
size_t len = guf_str_len(str);
|
|
|
|
|
|
|
|
|
|
guf_str_codepoint_utf8 cp = {.num_bytes = 1, .bytes = {'\0', '\0', '\0', '\0', '\0'}, .valid = true};
|
|
|
|
|
|
|
|
|
|
const unsigned char four_bytes_val = 240; // 0b1111.0xxx
|
|
|
|
|
const unsigned char four_bytes_mask = 248; // 0b1111.1000
|
|
|
|
|
|
|
|
|
|
const unsigned char three_bytes_val = 224; // 0b1110.xxxx
|
|
|
|
|
const unsigned char three_bytes_mask = 240; // 0b1111.0000
|
|
|
|
|
|
|
|
|
|
const unsigned char two_bytes_val = 192; // 0b110x.xxxx
|
|
|
|
|
const unsigned char two_bytes_mask = 224 ; // 0b1110.0000
|
|
|
|
|
|
|
|
|
|
size_t i = *idx;
|
|
|
|
|
if (guf_str_char_is_ascii(c_str[i])) {
|
|
|
|
|
cp.num_bytes = 1;
|
|
|
|
|
cp.bytes[0] = c_str[i];
|
|
|
|
|
*idx = i + 1;
|
|
|
|
|
if (i == len) {
|
|
|
|
|
GUF_ASSERT(c_str[i] == '\0');
|
|
|
|
|
}
|
|
|
|
|
return cp;
|
|
|
|
|
}
|
|
|
|
|
else if ((unsigned char)c_str[i] & four_bytes_mask) {
|
|
|
|
|
cp.num_bytes = 4;
|
|
|
|
|
if (i + cp.num_bytes >= len - 1) {
|
|
|
|
|
cp.valid = false;
|
|
|
|
|
return cp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if ((unsigned char)c_str[i] & three_bytes_mask) {
|
|
|
|
|
cp.num_bytes = 3;
|
|
|
|
|
if (i + cp.num_bytes >= len - 1) {
|
|
|
|
|
cp.valid = false;
|
|
|
|
|
return cp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if ((unsigned char)c_str[i] & two_bytes_mask) {
|
|
|
|
|
cp.num_bytes = 2;
|
|
|
|
|
if (i + cp.num_bytes >= len - 1) {
|
|
|
|
|
cp.valid = false;
|
|
|
|
|
return cp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
cp.valid = false;
|
|
|
|
|
return cp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cp.bytes[0] = c_str[i];
|
|
|
|
|
for (size_t j = 1; j < cp.num_bytes; ++j) {
|
|
|
|
|
size_t id = i + j;
|
|
|
|
|
assert(id < len);
|
|
|
|
|
unsigned char byte = c_str[id];
|
|
|
|
|
if (byte >= 128 && byte < 192) { // 0b10xx.xxxx
|
|
|
|
|
cp.bytes[id] = byte;
|
|
|
|
|
} else {
|
|
|
|
|
cp.valid = false;
|
|
|
|
|
return cp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
*idx = i + cp.num_bytes;
|
|
|
|
|
return cp;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Length without null-terminator.
|
|
|
|
|
size_t guf_str_len_utf8(const guf_str *str)
|
|
|
|
|
{
|
|
|
|
|
size_t idx = 0;
|
|
|
|
|
size_t n = 0;
|
|
|
|
|
|
|
|
|
|
for (guf_str_codepoint_utf8 cp = guf_str_iterate_utf8(str, &idx); !guf_str_iter_done(&cp); cp = guf_str_iterate_utf8(str, &idx)) {
|
|
|
|
|
++n;
|
|
|
|
|
}
|
|
|
|
|
assert(n >= 1);
|
|
|
|
|
return n - 1;
|
|
|
|
|
}
|