Add guf_str_substr

This commit is contained in:
jun 2025-03-28 20:26:43 +01:00
parent 9e0cd79ca0
commit 461006746e
2 changed files with 216 additions and 32 deletions

View File

@ -498,7 +498,7 @@ GUF_DBUF_KWRDS void GUF_CAT(GUF_DBUF_NAME, _try_grow_if_full)(GUF_DBUF_NAME *dbu
guf_err_set_if_not_null(err, GUF_ERR_NONE);
}
static inline bool GUF_CAT(GUF_DBUF_NAME, _copy_opt_available)(guf_cpy_opt cpy_opt)
static inline bool GUF_CAT(GUF_DBUF_NAME, _copy_opt_available_)(guf_cpy_opt cpy_opt)
{
if (cpy_opt == GUF_CPY_DEEP) {
#ifdef GUF_T_COPY
@ -541,8 +541,8 @@ GUF_DBUF_KWRDS GUF_T *GUF_CAT(GUF_DBUF_NAME, _try_insert)(GUF_DBUF_NAME *dbuf, G
GUF_T *dst = dbuf->data + idx;
if (!GUF_CAT(GUF_DBUF_NAME, _copy_opt_available)(cpy_opt)) {
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in function " GUF_STRINGIFY(GUF_CAT(GUF_DBUF_NAME, _copy_opt_available)) ": cpy_opt unavailable"));
if (!GUF_CAT(GUF_DBUF_NAME, _copy_opt_available_)(cpy_opt)) {
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in function " GUF_STRINGIFY(GUF_CAT(GUF_DBUF_NAME, _copy_opt_available_)) ": cpy_opt unavailable"));
return NULL;
} else if (cpy_opt == GUF_CPY_DEEP) {
#ifdef GUF_T_COPY

View File

@ -55,16 +55,20 @@ typedef struct guf_str {
#define GUF_CSTR_LIT_TO_VIEW(CSTR) ((guf_str_view){.str = (CSTR), .len = (ptrdiff_t)sizeof((CSTR)) - 1})
#define GUF_STR_TO_VIEW(GUF_STR_PTR) ((guf_str_view){.str = guf_str_const_cstr((GUF_STR_PTR)), .len = (ptrdiff_t)guf_str_len((GUF_STR_PTR))})
#define GUF_CSTR_TO_READONLY_STR(CSTR) ((guf_str){.allocator = NULL, .data.lng.c_str = (CSTR), .data.lng.size = strlen(CSTR) + 1, .data.lng.capacity = 0})
#define GUF_STR_UNINITIALISED (guf_str){.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'}
#ifdef __cplusplus
// Standard C++ does not have compound literals like C99...
#define GUF_CSTR_TO_VIEW_CPP(CSTR) guf_str_view{.str = (CSTR), .len = (ptrdiff_t)strlen(CSTR)}
#define GUF_CSTR_LIT_TO_VIEW_CPP(CSTR) guf_str_view{.str = (CSTR), .len = (ptrdiff_t)sizeof(CSTR) - 1}
#define GUF_STR_UNINITIALISED_CPP guf_str{.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'}
#endif
// guf_str_view:
GUF_STR_KWRDS bool guf_str_view_is_valid(guf_str_view sv);
GUF_STR_KWRDS guf_str guf_str_substr_cpy(guf_str_view str, ptrdiff_t pos, size_t count); // not necessary
GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrdiff_t count);
GUF_STR_KWRDS guf_str_view guf_str_view_substr(guf_str_view str, ptrdiff_t pos, ptrdiff_t count);
GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv);
GUF_STR_KWRDS uint64_t guf_str_view_hash64(const guf_str_view *sv);
@ -98,11 +102,30 @@ GUF_STR_KWRDS guf_str *guf_str_move(guf_str *dst, guf_str *src, void *ctx);
GUF_STR_KWRDS bool guf_str_equal(const guf_str *a, const guf_str *b);
GUF_STR_KWRDS int guf_str_cmp(const guf_str *a, const guf_str *b);
// TODO:
// DONE:
GUF_STR_KWRDS char *guf_str_try_at(guf_str *str, ptrdiff_t idx, guf_err *err);
GUF_STR_KWRDS char *guf_str_at(guf_str *str, ptrdiff_t idx);
GUF_STR_KWRDS char *guf_str_try_back(guf_str *str, guf_err *err);
GUF_STR_KWRDS char *guf_str_back(guf_str *str);
GUF_STR_KWRDS char *guf_str_try_front(guf_str *str, guf_err *err);
GUF_STR_KWRDS char *guf_str_front(guf_str *str);
GUF_STR_KWRDS char guf_str_try_at_cpy(const guf_str *str, ptrdiff_t idx, guf_err *err);
GUF_STR_KWRDS char guf_str_at_cpy(const guf_str *str, ptrdiff_t idx);
GUF_STR_KWRDS char guf_str_try_back_cpy(const guf_str *str, guf_err *err);
GUF_STR_KWRDS char guf_str_back_cpy(const guf_str *str);
GUF_STR_KWRDS char guf_str_try_front_cpy(const guf_str *str, guf_err *err);
GUF_STR_KWRDS char guf_str_front_cpy(const guf_str *str);
// DONE:
// Make substring in-place (constant time if pos == 0, otherwise copying count chars to the beginning of the str, i.e. linear time)
GUF_STR_KWRDS guf_str *guf_str_try_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count, guf_err *err);
GUF_STR_KWRDS guf_str *guf_str_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count);
// TODO:
GUF_STR_KWRDS char guf_str_pop_back(guf_str *str);
GUF_STR_KWRDS char guf_str_pop_front(guf_str *str);
// DONE:
GUF_STR_KWRDS guf_str *guf_str_try_append_char(guf_str *str, char c, ptrdiff_t times, guf_err *err);
GUF_STR_KWRDS guf_str *guf_str_append_char(guf_str *str, char c, ptrdiff_t times);
@ -115,10 +138,6 @@ GUF_STR_KWRDS guf_str *guf_str_append(guf_str *str, guf_str_view sv);
GUF_STR_KWRDS guf_str *guf_str_try_append_cstr(guf_str *str, const char *c_str, guf_err *err);
GUF_STR_KWRDS guf_str *guf_str_append_cstr(guf_str *str, const char *c_str);
// TODO:
GUF_STR_KWRDS char guf_str_pop_back(guf_str *str);
GUF_STR_KWRDS char guf_str_pop_front(guf_str *str);
// DONE:
GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t min_capacity, guf_err *err);
GUF_STR_KWRDS guf_str *guf_str_reserve(guf_str *str, ptrdiff_t min_capacity);
@ -328,7 +347,7 @@ GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t new_cap_min,
}
if (new_cap_min >= PTRDIFF_MAX - 1) {
guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, "in guf_str_try_reserve: new_cap_min >= PTRDIFF_MAX - 1");
guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in guf_str_try_reserve: new_cap_min >= PTRDIFF_MAX - 1"));
return NULL;
}
@ -358,7 +377,7 @@ GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t new_cap_min,
if (guf_str_is_short_internal_(str)) { // a.) Was short string -> need initial allocation.
char *c_str_new = str->allocator->alloc(new_cap_min_with_null, str->allocator->ctx);
if (!c_str_new) {
guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, "in guf_str_try_reserve: Initial allocation failed.");
guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in guf_str_try_reserve: Initial allocation failed."));
return NULL;
}
memcpy(c_str_new, str->data.shrt.c_str, len_with_null);
@ -367,7 +386,7 @@ GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t new_cap_min,
} else { // b) Was long string -> need re-allocation
char *c_str_new = str->allocator->realloc(str->data.lng.c_str, old_cap_with_null, new_cap_min_with_null, str->allocator->ctx);
if (!c_str_new) {
guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, "in guf_str_try_reserve: re-allocation failed.");
guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in guf_str_try_reserve: re-allocation failed."));
return NULL;
}
str->data.lng.c_str = c_str_new;
@ -409,11 +428,12 @@ GUF_STR_KWRDS char *guf_str_try_get_cstr(guf_str *str, guf_err *err)
{
GUF_ASSERT(guf_str_is_valid(str));
if (guf_str_is_readonly(str)) {
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, "in guf_str_try_get_cstr: cannot return non-const char pointer because str is readonly");
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in guf_str_try_get_cstr: cannot return non-const char pointer because str is readonly"));
return NULL;
}
char *c_str = guf_str_get_cstr_internal_(str);
GUF_ASSERT(c_str);
guf_err_set_if_not_null(err, GUF_ERR_NONE);
return c_str;
}
@ -438,8 +458,7 @@ static void guf_str_set_len_internal_(guf_str *str, size_t len)
GUF_STR_KWRDS guf_str guf_str_new_uninitialised(void)
{
guf_str str = {.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'};
return str;
return GUF_STR_UNINITIALISED;
}
GUF_STR_KWRDS bool guf_str_is_uninit(const guf_str *str)
@ -462,15 +481,15 @@ GUF_STR_KWRDS guf_str *guf_str_init_empty(guf_str *str, guf_allocator *allocator
GUF_STR_KWRDS guf_str *guf_str_try_init(guf_str *str, guf_str_view str_view, guf_allocator *alloc, guf_err *err)
{
if (!str) {
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_init: str is NULL");
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init: str is NULL"));
return NULL;
} else if (!alloc || !alloc->alloc || !alloc->realloc || !alloc->free) {
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_init: alloc (or allocs function pointers) is/are NULL");
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init: alloc (or allocs function pointers) is/are NULL"));
return NULL;
}
if (!guf_str_view_is_valid(str_view)) {
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_init: invalid str_view");
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init: invalid str_view"));
return NULL;
}
@ -485,7 +504,7 @@ GUF_STR_KWRDS guf_str *guf_str_try_init(guf_str *str, guf_str_view str_view, guf
guf_str_try_reserve(str, str_view.len, err);
if (err && *err != GUF_ERR_NONE) {
guf_panic(*err, "in guf_str_try_init: Initial allocation failed");
guf_panic(*err, GUF_ERR_MSG("in guf_str_try_init: Initial allocation failed"));
return NULL;
}
GUF_ASSERT(guf_str_len_internal_(str) == 0);
@ -513,7 +532,7 @@ GUF_STR_KWRDS guf_str guf_str_try_new(guf_str_view str_view, guf_allocator *allo
guf_str str = guf_str_new_uninitialised();
guf_str_try_init(&str, str_view, alloc, err);
if (err && *err != GUF_ERR_NONE) {
guf_err_set_or_panic(err, *err, "in guf_str_try_new: failed init");
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_new: failed init"));
return guf_str_new_uninitialised();
} else {
GUF_ASSERT(!guf_str_is_uninit(&str));
@ -521,6 +540,126 @@ GUF_STR_KWRDS guf_str guf_str_try_new(guf_str_view str_view, guf_allocator *allo
}
}
GUF_STR_KWRDS char *guf_str_try_at(guf_str *str, ptrdiff_t idx, guf_err *err)
{
GUF_ASSERT_RELEASE(guf_str_is_valid(str));
const ptrdiff_t len = guf_str_len(str);
if (idx < 0) {
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at: idx < 0"));
return NULL;
} else if (idx >= len) {
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at: idx out of range (idx >= len)"));
return NULL;
} else {
char *c_str = guf_str_try_get_cstr(str, err);
if (err && *err != GUF_ERR_NONE) {
return NULL;
}
GUF_ASSERT(c_str);
guf_err_set_if_not_null(err, GUF_ERR_NONE);
return c_str + idx;
}
}
GUF_STR_KWRDS char *guf_str_at(guf_str *str, ptrdiff_t idx)
{
return guf_str_try_at(str, idx, NULL);
}
GUF_STR_KWRDS char *guf_str_try_back(guf_str *str, guf_err *err)
{
GUF_ASSERT(guf_str_is_valid(str));
const ptrdiff_t len = guf_str_len(str);
if (len == 0) {
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_back: len == 0"));
return NULL;
} else {
return guf_str_try_at(str, len - 1, err);
}
}
GUF_STR_KWRDS char *guf_str_back(guf_str *str)
{
return guf_str_try_back(str, NULL);
}
GUF_STR_KWRDS char *guf_str_try_front(guf_str *str, guf_err *err)
{
GUF_ASSERT(guf_str_is_valid(str));
const ptrdiff_t len = guf_str_len(str);
if (len == 0) {
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_front: len == 0"));
return NULL;
} else {
return guf_str_try_at(str, 0, err);
}
}
GUF_STR_KWRDS char *guf_str_front(guf_str *str)
{
return guf_str_try_front(str, NULL);
}
GUF_STR_KWRDS char guf_str_try_at_cpy(const guf_str *str, ptrdiff_t idx, guf_err *err)
{
const ptrdiff_t len = guf_str_len(str);
if (idx < 0) {
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at_cpy: idx < 0"));
return '\0';
} else if (idx >= len) {
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at_cpy: idx out of range (idx >= len)"));
return '\0';
} else {
const char *c_str = guf_str_const_cstr(str);
GUF_ASSERT(c_str);
guf_err_set_if_not_null(err, GUF_ERR_NONE);
return c_str[idx];
}
}
GUF_STR_KWRDS char guf_str_at_cpy(const guf_str *str, ptrdiff_t idx)
{
return guf_str_try_at_cpy(str, idx, NULL);
}
GUF_STR_KWRDS char guf_str_try_back_cpy(const guf_str *str, guf_err *err)
{
GUF_ASSERT(guf_str_is_valid(str));
const ptrdiff_t len = guf_str_len(str);
if (len == 0) {
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_back_cpy: len == 0"));
return '\0';
} else {
return guf_str_try_at_cpy(str, len - 1, err);
}
}
GUF_STR_KWRDS char guf_str_back_cpy(const guf_str *str)
{
return guf_str_try_back_cpy(str, NULL);
}
GUF_STR_KWRDS char guf_str_try_front_cpy(const guf_str *str, guf_err *err)
{
GUF_ASSERT(guf_str_is_valid(str));
const ptrdiff_t len = guf_str_len(str);
if (len == 0) {
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_front_cpy: len == 0"));
return '\0';
} else {
return guf_str_try_at_cpy(str, 0, err);
}
}
GUF_STR_KWRDS char guf_str_front_cpy(const guf_str *str)
{
return guf_str_try_front_cpy(str, NULL);
}
GUF_STR_KWRDS void guf_str_free(guf_str *str, void *ctx)
{
(void)ctx;
@ -792,6 +931,50 @@ GUF_STR_KWRDS guf_str *guf_str_append_cstr(guf_str *str, const char *c_str)
return guf_str_try_append_cstr(str, c_str, NULL);
}
GUF_STR_KWRDS guf_str *guf_str_try_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count, guf_err *err)
{
GUF_ASSERT(guf_str_is_valid(str));
const ptrdiff_t len = guf_str_len(str);
if (pos < 0 || pos >= len) {
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_substr: pos out of range"));
return NULL;
} else if (count < 0) {
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_substr: count < 0"));
return NULL;
}
char *c_str = guf_str_try_get_cstr(str, err);
if (err && *err != GUF_ERR_NONE) {
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_substr: string is readonly (guf_str_try_get_cstr() failed)"));
return NULL;
}
GUF_ASSERT(c_str);
const ptrdiff_t pos_plus_count = guf_add_is_overflow_ptrdiff(pos, count) ? PTRDIFF_MAX : pos + count;
const ptrdiff_t substr_len = pos_plus_count > len ? len - pos : count;
GUF_ASSERT(substr_len >= 0 && substr_len <= len && substr_len <= guf_str_capacity(str));
GUF_ASSERT((size_t)pos + (size_t)(substr_len) <= (size_t)len); // [*]
if (pos > 0) {
for (ptrdiff_t i = 0; i < substr_len; ++i) {
// GUF_ASSERT(pos + i < len); // cf. [*]
c_str[i] = c_str[pos + i];
}
}
c_str[substr_len] = '\0';
guf_str_set_len_internal_(str, substr_len);
GUF_ASSERT(guf_str_is_valid(str));
guf_err_set_if_not_null(err, GUF_ERR_NONE);
return str;
}
GUF_STR_KWRDS guf_str *guf_str_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count)
{
return guf_str_try_substr(str, pos, count, NULL);
}
// guf_str_view:
@ -831,7 +1014,7 @@ GUF_STR_KWRDS guf_str_view guf_str_next_tok(guf_str_view *input, const guf_str_v
const int num_bytes = guf_utf8_char_num_bytes(&ch);
for (ptrdiff_t delim_len = GUF_MIN(max_delim_len, prev_input.len); delim_len > 0; --delim_len) {
guf_str_view delim_candidate = guf_substr_view(prev_input, 0, delim_len);
guf_str_view delim_candidate = guf_str_view_substr(prev_input, 0, delim_len);
for (ptrdiff_t delim_i = 0; delim_i < num_delims; ++delim_i) {
if (guf_str_view_equal(&delim_candidate, delims + delim_i)) { // Found delim.
bool preserved = false;
@ -906,17 +1089,18 @@ GUF_STR_KWRDS guf_str_view guf_str_view_trim_right_ascii(guf_str_view sv)
return sv;
}
GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrdiff_t count)
GUF_STR_KWRDS guf_str_view guf_str_view_substr(guf_str_view str, ptrdiff_t pos, ptrdiff_t count)
{
GUF_ASSERT(str.str);
GUF_ASSERT(pos >= 0);
GUF_ASSERT(count >= 0);
GUF_ASSERT_RELEASE(str.str);
GUF_ASSERT_RELEASE(pos >= 0);
GUF_ASSERT_RELEASE(count >= 0);
if (str.len == 0 || count == 0 || pos >= str.len || str.str == NULL) {
if (str.len == 0 || count == 0 || pos >= str.len || pos < 0 || str.str == NULL) {
return (guf_str_view){.str = str.str, .len = 0};
}
const ptrdiff_t substr_len = pos + count > str.len ? str.len - pos : count;
const ptrdiff_t pos_plus_count = guf_add_is_overflow_ptrdiff(pos, count) ? PTRDIFF_MAX : pos + count;
const ptrdiff_t substr_len = pos_plus_count > str.len ? str.len - pos : count;
GUF_ASSERT(substr_len >= 0);
GUF_ASSERT(substr_len <= str.len);