From 461006746e6bf82331d073bf1874ea82303022ab Mon Sep 17 00:00:00 2001 From: jun <83899451+zeichensystem@users.noreply.github.com> Date: Fri, 28 Mar 2025 20:26:43 +0100 Subject: [PATCH] Add guf_str_substr --- src/guf_dbuf.h | 6 +- src/guf_str.h | 242 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 216 insertions(+), 32 deletions(-) diff --git a/src/guf_dbuf.h b/src/guf_dbuf.h index 2599934..5b56c91 100644 --- a/src/guf_dbuf.h +++ b/src/guf_dbuf.h @@ -498,7 +498,7 @@ GUF_DBUF_KWRDS void GUF_CAT(GUF_DBUF_NAME, _try_grow_if_full)(GUF_DBUF_NAME *dbu guf_err_set_if_not_null(err, GUF_ERR_NONE); } -static inline bool GUF_CAT(GUF_DBUF_NAME, _copy_opt_available)(guf_cpy_opt cpy_opt) +static inline bool GUF_CAT(GUF_DBUF_NAME, _copy_opt_available_)(guf_cpy_opt cpy_opt) { if (cpy_opt == GUF_CPY_DEEP) { #ifdef GUF_T_COPY @@ -541,8 +541,8 @@ GUF_DBUF_KWRDS GUF_T *GUF_CAT(GUF_DBUF_NAME, _try_insert)(GUF_DBUF_NAME *dbuf, G GUF_T *dst = dbuf->data + idx; - if (!GUF_CAT(GUF_DBUF_NAME, _copy_opt_available)(cpy_opt)) { - guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in function " GUF_STRINGIFY(GUF_CAT(GUF_DBUF_NAME, _copy_opt_available)) ": cpy_opt unavailable")); + if (!GUF_CAT(GUF_DBUF_NAME, _copy_opt_available_)(cpy_opt)) { + guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in function " GUF_STRINGIFY(GUF_CAT(GUF_DBUF_NAME, _copy_opt_available_)) ": cpy_opt unavailable")); return NULL; } else if (cpy_opt == GUF_CPY_DEEP) { #ifdef GUF_T_COPY diff --git a/src/guf_str.h b/src/guf_str.h index bcabcf2..8365b0a 100644 --- a/src/guf_str.h +++ b/src/guf_str.h @@ -55,16 +55,20 @@ typedef struct guf_str { #define GUF_CSTR_LIT_TO_VIEW(CSTR) ((guf_str_view){.str = (CSTR), .len = (ptrdiff_t)sizeof((CSTR)) - 1}) #define GUF_STR_TO_VIEW(GUF_STR_PTR) ((guf_str_view){.str = guf_str_const_cstr((GUF_STR_PTR)), .len = (ptrdiff_t)guf_str_len((GUF_STR_PTR))}) #define GUF_CSTR_TO_READONLY_STR(CSTR) ((guf_str){.allocator = NULL, .data.lng.c_str = (CSTR), .data.lng.size = strlen(CSTR) + 1, .data.lng.capacity = 0}) + +#define GUF_STR_UNINITIALISED (guf_str){.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'} + #ifdef __cplusplus // Standard C++ does not have compound literals like C99... #define GUF_CSTR_TO_VIEW_CPP(CSTR) guf_str_view{.str = (CSTR), .len = (ptrdiff_t)strlen(CSTR)} #define GUF_CSTR_LIT_TO_VIEW_CPP(CSTR) guf_str_view{.str = (CSTR), .len = (ptrdiff_t)sizeof(CSTR) - 1} + + #define GUF_STR_UNINITIALISED_CPP guf_str{.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'} #endif // guf_str_view: GUF_STR_KWRDS bool guf_str_view_is_valid(guf_str_view sv); -GUF_STR_KWRDS guf_str guf_str_substr_cpy(guf_str_view str, ptrdiff_t pos, size_t count); // not necessary -GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrdiff_t count); +GUF_STR_KWRDS guf_str_view guf_str_view_substr(guf_str_view str, ptrdiff_t pos, ptrdiff_t count); GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv); GUF_STR_KWRDS uint64_t guf_str_view_hash64(const guf_str_view *sv); @@ -98,10 +102,29 @@ GUF_STR_KWRDS guf_str *guf_str_move(guf_str *dst, guf_str *src, void *ctx); GUF_STR_KWRDS bool guf_str_equal(const guf_str *a, const guf_str *b); GUF_STR_KWRDS int guf_str_cmp(const guf_str *a, const guf_str *b); -// TODO: +// DONE: +GUF_STR_KWRDS char *guf_str_try_at(guf_str *str, ptrdiff_t idx, guf_err *err); GUF_STR_KWRDS char *guf_str_at(guf_str *str, ptrdiff_t idx); -GUF_STR_KWRDS char *guf_str_back(guf_str *str); -GUF_STR_KWRDS char *guf_str_front(guf_str *str); +GUF_STR_KWRDS char *guf_str_try_back(guf_str *str, guf_err *err); +GUF_STR_KWRDS char *guf_str_back(guf_str *str); +GUF_STR_KWRDS char *guf_str_try_front(guf_str *str, guf_err *err); +GUF_STR_KWRDS char *guf_str_front(guf_str *str); + +GUF_STR_KWRDS char guf_str_try_at_cpy(const guf_str *str, ptrdiff_t idx, guf_err *err); +GUF_STR_KWRDS char guf_str_at_cpy(const guf_str *str, ptrdiff_t idx); +GUF_STR_KWRDS char guf_str_try_back_cpy(const guf_str *str, guf_err *err); +GUF_STR_KWRDS char guf_str_back_cpy(const guf_str *str); +GUF_STR_KWRDS char guf_str_try_front_cpy(const guf_str *str, guf_err *err); +GUF_STR_KWRDS char guf_str_front_cpy(const guf_str *str); + +// DONE: +// Make substring in-place (constant time if pos == 0, otherwise copying count chars to the beginning of the str, i.e. linear time) +GUF_STR_KWRDS guf_str *guf_str_try_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count, guf_err *err); +GUF_STR_KWRDS guf_str *guf_str_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count); + +// TODO: +GUF_STR_KWRDS char guf_str_pop_back(guf_str *str); +GUF_STR_KWRDS char guf_str_pop_front(guf_str *str); // DONE: GUF_STR_KWRDS guf_str *guf_str_try_append_char(guf_str *str, char c, ptrdiff_t times, guf_err *err); @@ -115,10 +138,6 @@ GUF_STR_KWRDS guf_str *guf_str_append(guf_str *str, guf_str_view sv); GUF_STR_KWRDS guf_str *guf_str_try_append_cstr(guf_str *str, const char *c_str, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_append_cstr(guf_str *str, const char *c_str); -// TODO: -GUF_STR_KWRDS char guf_str_pop_back(guf_str *str); -GUF_STR_KWRDS char guf_str_pop_front(guf_str *str); - // DONE: GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t min_capacity, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_reserve(guf_str *str, ptrdiff_t min_capacity); @@ -183,7 +202,7 @@ GUF_STR_KWRDS bool guf_str_is_uninit(const guf_str *str); str->data.shrt.size = (unsigned char)(size_with_null << 1); } #elif defined(GUF_PLATFORM_BIG_ENDIAN) - #define GUF_STR_IS_LONG_MASK ((unsigned char)0x80) /* binary 1000 0000 */ + #define GUF_STR_IS_LONG_MASK ((unsigned char)0x80) /* binary 1000 0000 */ #define GUF_STR_GET_CAP_MASK ((size_t)SIZE_MAX >> 1u) /* binary 0111.1111 (1111.1111)* 1111.1111 */ static inline void guf_str_set_lng_cap_(guf_str *str, size_t cap_with_null) @@ -328,7 +347,7 @@ GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t new_cap_min, } if (new_cap_min >= PTRDIFF_MAX - 1) { - guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, "in guf_str_try_reserve: new_cap_min >= PTRDIFF_MAX - 1"); + guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in guf_str_try_reserve: new_cap_min >= PTRDIFF_MAX - 1")); return NULL; } @@ -358,7 +377,7 @@ GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t new_cap_min, if (guf_str_is_short_internal_(str)) { // a.) Was short string -> need initial allocation. char *c_str_new = str->allocator->alloc(new_cap_min_with_null, str->allocator->ctx); if (!c_str_new) { - guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, "in guf_str_try_reserve: Initial allocation failed."); + guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in guf_str_try_reserve: Initial allocation failed.")); return NULL; } memcpy(c_str_new, str->data.shrt.c_str, len_with_null); @@ -367,7 +386,7 @@ GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t new_cap_min, } else { // b) Was long string -> need re-allocation char *c_str_new = str->allocator->realloc(str->data.lng.c_str, old_cap_with_null, new_cap_min_with_null, str->allocator->ctx); if (!c_str_new) { - guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, "in guf_str_try_reserve: re-allocation failed."); + guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in guf_str_try_reserve: re-allocation failed.")); return NULL; } str->data.lng.c_str = c_str_new; @@ -409,11 +428,12 @@ GUF_STR_KWRDS char *guf_str_try_get_cstr(guf_str *str, guf_err *err) { GUF_ASSERT(guf_str_is_valid(str)); if (guf_str_is_readonly(str)) { - guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, "in guf_str_try_get_cstr: cannot return non-const char pointer because str is readonly"); + guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in guf_str_try_get_cstr: cannot return non-const char pointer because str is readonly")); return NULL; } char *c_str = guf_str_get_cstr_internal_(str); GUF_ASSERT(c_str); + guf_err_set_if_not_null(err, GUF_ERR_NONE); return c_str; } @@ -438,8 +458,7 @@ static void guf_str_set_len_internal_(guf_str *str, size_t len) GUF_STR_KWRDS guf_str guf_str_new_uninitialised(void) { - guf_str str = {.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'}; - return str; + return GUF_STR_UNINITIALISED; } GUF_STR_KWRDS bool guf_str_is_uninit(const guf_str *str) @@ -462,15 +481,15 @@ GUF_STR_KWRDS guf_str *guf_str_init_empty(guf_str *str, guf_allocator *allocator GUF_STR_KWRDS guf_str *guf_str_try_init(guf_str *str, guf_str_view str_view, guf_allocator *alloc, guf_err *err) { if (!str) { - guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_init: str is NULL"); + guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init: str is NULL")); return NULL; } else if (!alloc || !alloc->alloc || !alloc->realloc || !alloc->free) { - guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_init: alloc (or allocs function pointers) is/are NULL"); + guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init: alloc (or allocs function pointers) is/are NULL")); return NULL; } if (!guf_str_view_is_valid(str_view)) { - guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_init: invalid str_view"); + guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init: invalid str_view")); return NULL; } @@ -485,7 +504,7 @@ GUF_STR_KWRDS guf_str *guf_str_try_init(guf_str *str, guf_str_view str_view, guf guf_str_try_reserve(str, str_view.len, err); if (err && *err != GUF_ERR_NONE) { - guf_panic(*err, "in guf_str_try_init: Initial allocation failed"); + guf_panic(*err, GUF_ERR_MSG("in guf_str_try_init: Initial allocation failed")); return NULL; } GUF_ASSERT(guf_str_len_internal_(str) == 0); @@ -513,7 +532,7 @@ GUF_STR_KWRDS guf_str guf_str_try_new(guf_str_view str_view, guf_allocator *allo guf_str str = guf_str_new_uninitialised(); guf_str_try_init(&str, str_view, alloc, err); if (err && *err != GUF_ERR_NONE) { - guf_err_set_or_panic(err, *err, "in guf_str_try_new: failed init"); + guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_new: failed init")); return guf_str_new_uninitialised(); } else { GUF_ASSERT(!guf_str_is_uninit(&str)); @@ -521,6 +540,126 @@ GUF_STR_KWRDS guf_str guf_str_try_new(guf_str_view str_view, guf_allocator *allo } } +GUF_STR_KWRDS char *guf_str_try_at(guf_str *str, ptrdiff_t idx, guf_err *err) +{ + GUF_ASSERT_RELEASE(guf_str_is_valid(str)); + + const ptrdiff_t len = guf_str_len(str); + + if (idx < 0) { + guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at: idx < 0")); + return NULL; + } else if (idx >= len) { + guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at: idx out of range (idx >= len)")); + return NULL; + } else { + char *c_str = guf_str_try_get_cstr(str, err); + if (err && *err != GUF_ERR_NONE) { + return NULL; + } + GUF_ASSERT(c_str); + guf_err_set_if_not_null(err, GUF_ERR_NONE); + return c_str + idx; + } +} + +GUF_STR_KWRDS char *guf_str_at(guf_str *str, ptrdiff_t idx) +{ + return guf_str_try_at(str, idx, NULL); +} + +GUF_STR_KWRDS char *guf_str_try_back(guf_str *str, guf_err *err) +{ + GUF_ASSERT(guf_str_is_valid(str)); + const ptrdiff_t len = guf_str_len(str); + if (len == 0) { + guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_back: len == 0")); + return NULL; + } else { + return guf_str_try_at(str, len - 1, err); + } +} + +GUF_STR_KWRDS char *guf_str_back(guf_str *str) +{ + return guf_str_try_back(str, NULL); +} + +GUF_STR_KWRDS char *guf_str_try_front(guf_str *str, guf_err *err) +{ + GUF_ASSERT(guf_str_is_valid(str)); + const ptrdiff_t len = guf_str_len(str); + if (len == 0) { + guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_front: len == 0")); + return NULL; + } else { + return guf_str_try_at(str, 0, err); + } +} + +GUF_STR_KWRDS char *guf_str_front(guf_str *str) +{ + return guf_str_try_front(str, NULL); +} + +GUF_STR_KWRDS char guf_str_try_at_cpy(const guf_str *str, ptrdiff_t idx, guf_err *err) +{ + const ptrdiff_t len = guf_str_len(str); + + if (idx < 0) { + guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at_cpy: idx < 0")); + return '\0'; + } else if (idx >= len) { + guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at_cpy: idx out of range (idx >= len)")); + return '\0'; + } else { + const char *c_str = guf_str_const_cstr(str); + GUF_ASSERT(c_str); + guf_err_set_if_not_null(err, GUF_ERR_NONE); + return c_str[idx]; + } +} + +GUF_STR_KWRDS char guf_str_at_cpy(const guf_str *str, ptrdiff_t idx) +{ + return guf_str_try_at_cpy(str, idx, NULL); +} + +GUF_STR_KWRDS char guf_str_try_back_cpy(const guf_str *str, guf_err *err) +{ + GUF_ASSERT(guf_str_is_valid(str)); + const ptrdiff_t len = guf_str_len(str); + if (len == 0) { + guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_back_cpy: len == 0")); + return '\0'; + } else { + return guf_str_try_at_cpy(str, len - 1, err); + } +} + +GUF_STR_KWRDS char guf_str_back_cpy(const guf_str *str) +{ + return guf_str_try_back_cpy(str, NULL); +} + +GUF_STR_KWRDS char guf_str_try_front_cpy(const guf_str *str, guf_err *err) +{ + GUF_ASSERT(guf_str_is_valid(str)); + const ptrdiff_t len = guf_str_len(str); + if (len == 0) { + guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_front_cpy: len == 0")); + return '\0'; + } else { + return guf_str_try_at_cpy(str, 0, err); + } +} + +GUF_STR_KWRDS char guf_str_front_cpy(const guf_str *str) +{ + return guf_str_try_front_cpy(str, NULL); +} + + GUF_STR_KWRDS void guf_str_free(guf_str *str, void *ctx) { (void)ctx; @@ -792,6 +931,50 @@ GUF_STR_KWRDS guf_str *guf_str_append_cstr(guf_str *str, const char *c_str) return guf_str_try_append_cstr(str, c_str, NULL); } +GUF_STR_KWRDS guf_str *guf_str_try_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count, guf_err *err) +{ + GUF_ASSERT(guf_str_is_valid(str)); + + const ptrdiff_t len = guf_str_len(str); + if (pos < 0 || pos >= len) { + guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_substr: pos out of range")); + return NULL; + } else if (count < 0) { + guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_substr: count < 0")); + return NULL; + } + + char *c_str = guf_str_try_get_cstr(str, err); + if (err && *err != GUF_ERR_NONE) { + guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_substr: string is readonly (guf_str_try_get_cstr() failed)")); + return NULL; + } + GUF_ASSERT(c_str); + + const ptrdiff_t pos_plus_count = guf_add_is_overflow_ptrdiff(pos, count) ? PTRDIFF_MAX : pos + count; + const ptrdiff_t substr_len = pos_plus_count > len ? len - pos : count; + GUF_ASSERT(substr_len >= 0 && substr_len <= len && substr_len <= guf_str_capacity(str)); + GUF_ASSERT((size_t)pos + (size_t)(substr_len) <= (size_t)len); // [*] + + if (pos > 0) { + for (ptrdiff_t i = 0; i < substr_len; ++i) { + // GUF_ASSERT(pos + i < len); // cf. [*] + c_str[i] = c_str[pos + i]; + } + } + c_str[substr_len] = '\0'; + guf_str_set_len_internal_(str, substr_len); + + GUF_ASSERT(guf_str_is_valid(str)); + guf_err_set_if_not_null(err, GUF_ERR_NONE); + return str; +} + +GUF_STR_KWRDS guf_str *guf_str_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count) +{ + return guf_str_try_substr(str, pos, count, NULL); +} + // guf_str_view: @@ -831,7 +1014,7 @@ GUF_STR_KWRDS guf_str_view guf_str_next_tok(guf_str_view *input, const guf_str_v const int num_bytes = guf_utf8_char_num_bytes(&ch); for (ptrdiff_t delim_len = GUF_MIN(max_delim_len, prev_input.len); delim_len > 0; --delim_len) { - guf_str_view delim_candidate = guf_substr_view(prev_input, 0, delim_len); + guf_str_view delim_candidate = guf_str_view_substr(prev_input, 0, delim_len); for (ptrdiff_t delim_i = 0; delim_i < num_delims; ++delim_i) { if (guf_str_view_equal(&delim_candidate, delims + delim_i)) { // Found delim. bool preserved = false; @@ -906,17 +1089,18 @@ GUF_STR_KWRDS guf_str_view guf_str_view_trim_right_ascii(guf_str_view sv) return sv; } -GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrdiff_t count) +GUF_STR_KWRDS guf_str_view guf_str_view_substr(guf_str_view str, ptrdiff_t pos, ptrdiff_t count) { - GUF_ASSERT(str.str); - GUF_ASSERT(pos >= 0); - GUF_ASSERT(count >= 0); + GUF_ASSERT_RELEASE(str.str); + GUF_ASSERT_RELEASE(pos >= 0); + GUF_ASSERT_RELEASE(count >= 0); - if (str.len == 0 || count == 0 || pos >= str.len || str.str == NULL) { + if (str.len == 0 || count == 0 || pos >= str.len || pos < 0 || str.str == NULL) { return (guf_str_view){.str = str.str, .len = 0}; - } + } - const ptrdiff_t substr_len = pos + count > str.len ? str.len - pos : count; + const ptrdiff_t pos_plus_count = guf_add_is_overflow_ptrdiff(pos, count) ? PTRDIFF_MAX : pos + count; + const ptrdiff_t substr_len = pos_plus_count > str.len ? str.len - pos : count; GUF_ASSERT(substr_len >= 0); GUF_ASSERT(substr_len <= str.len);