#if defined(GUF_STR_IMPL_STATIC) #define GUF_STR_KWRDS static #else #define GUF_STR_KWRDS #endif #ifndef GUF_STR_H #define GUF_STR_H #include "guf_common.h" #include "guf_alloc.h" #include "guf_str_view_type.h" #include "guf_utf8.h" #include "guf_hash.h" // cf. libc++ short-string optimisation: https://joellaity.com/2020/01/31/string.html (last-retrieved 2025-03-10) typedef struct guf_str_internal_long_ { size_t capacity; // If long string: capacity's least significant bit always set to 1 (or its most significant bit for big-endian platforms); the actual capacity must be even size_t size; char *c_str; } guf_str_internal_long_; #define GUF_STR_SSO_BUF_CAP (sizeof(guf_str_internal_long_) - sizeof(unsigned char)) /* 23 bytes on 64-bit platforms, 11 bytes on 32-bit platforms */ #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || (defined(__cplusplus) && __cplusplus >= 201103L) static_assert(GUF_STR_SSO_BUF_CAP > 0, "GUF_STR_SSO_BUF_CAP < 0 (this is very weird)"); // Basically cannot fail. static_assert(GUF_STR_SSO_BUF_CAP < 0x80, "GUF_STR_SSO_BUF_CAP >= 128 (no support for platforms with wordsize >= 512-bits)"); // Could fail on hypothetical platforms with 512-bit wordsize (and above). #endif typedef struct guf_str_internal_short_ { unsigned char size; // size overlaps with the first byte of guf_str_internal_long_.capacity [1] char c_str[GUF_STR_SSO_BUF_CAP]; } guf_str_internal_short_; /* [1] The first byte of guf_str_internal_long_.capacity is its least-significant-byte on little-endian platforms, and its most-significant byte on big-endian platforms. */ typedef struct guf_str { union { guf_str_internal_long_ lng; guf_str_internal_short_ shrt; } data; // 24 bytes on 64-bit platforms, 12 bytes on 32-bit platforms. guf_allocator *allocator; // Wasteful (8 bytes on 64-bit platforms...), but keeping this pointer also allows us to have "read-only strings" (a string is read-only if allocator == NULL) } guf_str; // Total: 32 bytes on 64-bit platforms, 16 bytes on 32-bit platforms. #define GUF_CSTR_TO_VIEW(CSTR) ((guf_str_view){.str = (CSTR), .len = (ptrdiff_t)strlen((CSTR))}) #define GUF_CSTR_LIT_TO_VIEW(CSTR) ((guf_str_view){.str = (CSTR), .len = (ptrdiff_t)sizeof((CSTR)) - 1}) #define GUF_STR_TO_VIEW(GUF_STR_PTR) ((guf_str_view){.str = guf_str_const_cstr((GUF_STR_PTR)), .len = (ptrdiff_t)guf_str_len((GUF_STR_PTR))}) #define GUF_CSTR_TO_READONLY_STR(CSTR) ((guf_str){.allocator = NULL, .data.lng.c_str = (CSTR), .data.lng.size = strlen(CSTR) + 1, .data.lng.capacity = 0}) #ifdef __cplusplus // Standard C++ does not have compound literals like C99... #define GUF_CSTR_TO_VIEW_CPP(CSTR) guf_str_view{.str = (CSTR), .len = (ptrdiff_t)strlen(CSTR)} #define GUF_CSTR_LIT_TO_VIEW_CPP(CSTR) guf_str_view{.str = (CSTR), .len = (ptrdiff_t)sizeof(CSTR) - 1} #endif // guf_str_view: GUF_STR_KWRDS bool guf_str_view_is_valid(guf_str_view sv); GUF_STR_KWRDS guf_str guf_str_substr_cpy(guf_str_view str, ptrdiff_t pos, size_t count); // not necessary GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrdiff_t count); GUF_STR_KWRDS guf_str_view guf_str_view_trim_left(guf_str_view str); GUF_STR_KWRDS guf_str_view guf_str_view_trim_right(guf_str_view str); GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv); GUF_STR_KWRDS uint64_t guf_str_view_hash64(const guf_str_view *sv); GUF_STR_KWRDS uint32_t guf_str_view_hash32(const guf_str_view *sv); GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b); GUF_STR_KWRDS bool guf_str_view_equal_val_arg(guf_str_view a_val, guf_str_view b_val); GUF_STR_KWRDS int guf_str_view_cmp(const void *str_view_a, const void *str_view_b); // For qsort etc. GUF_STR_KWRDS guf_str_view guf_str_next_tok(guf_str_view *input, const guf_str_view *delims, ptrdiff_t num_delims, const guf_str_view *preserved_delims, ptrdiff_t num_preserved_delims); // guf_str: // DONE: GUF_STR_KWRDS guf_str *guf_str_try_init(guf_str *str, guf_str_view str_view, guf_allocator *alloc, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_init(guf_str *str, guf_str_view str_view, guf_allocator *alloc); GUF_STR_KWRDS guf_str *guf_str_try_init_from_cstr(guf_str *str, const char* c_str, guf_allocator *alloc, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_init_from_cstr(guf_str *str, const char* c_str, guf_allocator *alloc); GUF_STR_KWRDS guf_str guf_str_try_new(guf_str_view str_view, guf_allocator *alloc, guf_err *err); GUF_STR_KWRDS guf_str guf_str_new(guf_str_view str_view, guf_allocator *alloc); GUF_STR_KWRDS void guf_str_free(guf_str *str, void *ctx); GUF_STR_KWRDS guf_str *guf_str_copy(guf_str *dst, const guf_str *src, void *ctx); GUF_STR_KWRDS guf_str *guf_str_move(guf_str *dst, guf_str *src, void *ctx); // TODO: GUF_STR_KWRDS guf_str guf_str_try_new_substr(guf_str_view str_view, ptrdiff_t pos, ptrdiff_t len, guf_allocator *alloc, guf_err *err); GUF_STR_KWRDS guf_str guf_str_new_substr(guf_str_view str_view, ptrdiff_t pos, ptrdiff_t len, guf_allocator *alloc); // TODO: GUF_STR_KWRDS bool guf_str_equal(const guf_str *a, const guf_str *b); GUF_STR_KWRDS bool guf_str_equals_cstr(const guf_str *a, const char *c_str); GUF_STR_KWRDS bool guf_str_equals_strview(const guf_str *a, guf_str_view b); // DONE: GUF_STR_KWRDS guf_str *guf_str_try_append_char(guf_str *str, char c, ptrdiff_t times, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_append_char(guf_str *str, char c, ptrdiff_t times); GUF_STR_KWRDS guf_str *guf_str_try_append_one_char(guf_str *str, char c, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_append_one_char(guf_str *str, char c); GUF_STR_KWRDS guf_str *guf_str_try_append(guf_str *str, guf_str_view sv, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_append(guf_str *str, guf_str_view sv); GUF_STR_KWRDS guf_str *guf_str_try_append_cstr(guf_str *str, const char *c_str, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_append_cstr(guf_str *str, const char *c_str); GUF_STR_KWRDS guf_str *guf_str_substr(guf_str* str, size_t pos, size_t count); // DONE GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t min_capacity, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_reserve(guf_str *str, ptrdiff_t min_capacity); // TODO: GUF_STR_KWRDS guf_str *guf_str_try_shrink_to_fit(guf_str *str, guf_err *err); GUF_STR_KWRDS guf_str *guf_str_shrink_to_fit(guf_str *str); // TODO: GUF_STR_KWRDS char guf_str_pop_back(guf_str *str); GUF_STR_KWRDS char guf_str_pop_front(guf_str *str); // TODO: GUF_STR_KWRDS char *guf_str_at(guf_str *str, size_t idx); GUF_STR_KWRDS char *guf_str_back(guf_str *str); GUF_STR_KWRDS char *guf_str_front(guf_str *str); // DONE: GUF_STR_KWRDS const char *guf_str_const_cstr(const guf_str *str); GUF_STR_KWRDS char *guf_str_try_get_cstr(guf_str *str, guf_err *err); // Error if str is readonly. GUF_STR_KWRDS char *guf_str_cstr(guf_str *str); // Panics if str is readonly. GUF_STR_KWRDS ptrdiff_t guf_str_len(const guf_str *str); // The length (in chars) without the final zero-terminator. GUF_STR_KWRDS ptrdiff_t guf_str_capacity(const guf_str *str); // The capacity (in chars) without the final zero-terminator. GUF_STR_KWRDS bool guf_str_is_short(const guf_str *str); GUF_STR_KWRDS bool guf_str_is_readonly(const guf_str *str); GUF_STR_KWRDS bool guf_str_is_valid(const guf_str *str); GUF_STR_KWRDS guf_str guf_str_new_uninitialised(void); GUF_STR_KWRDS bool guf_str_is_uninit(const guf_str *str); #endif // #define GUF_STR_IMPL_STATIC /* debug */ #if defined(GUF_STR_IMPL) || defined(GUF_STR_IMPL_STATIC) #ifdef __cplusplus #error "Must compile guf_str as C99 (or above) because type-punning with unions is undefined behaviour in C++" #endif #include "guf_common.h" #include "guf_math.h" #include #ifdef GUF_STR_IMPL #define GUF_UTF8_IMPL #else #define GUF_UTF8_IMPL_STATIC #endif #include "guf_utf8.h" // TODO: find_first_of // guf_str: #if defined(GUF_PLATFORM_LITTLE_ENDIAN) #define GUF_STR_IS_LONG_MASK ((unsigned char)1) /* binary 0000.0001 */ #define GUF_STR_GET_CAP_MASK (~(size_t)1) /* binary 1111.1111 (1111.1111)* 1111.1110 */ static inline void guf_str_set_lng_cap_(guf_str *str, size_t cap_with_null) { GUF_ASSERT(cap_with_null % 2 == 0); GUF_ASSERT(cap_with_null <= PTRDIFF_MAX); GUF_ASSERT(cap_with_null > GUF_STR_SSO_BUF_CAP); str->data.lng.capacity = cap_with_null | ((size_t)1); } static inline void guf_str_set_shrt_size_(guf_str *str, unsigned char size_with_null) { GUF_ASSERT(size_with_null < GUF_STR_SSO_BUF_CAP && size_with_null < 0x80); str->data.shrt.size = (unsigned char)(size_with_null << 1); } #elif defined(GUF_PLATFORM_BIG_ENDIAN) #define GUF_STR_IS_LONG_MASK ((unsigned char)0x80) /* binary 1000 0000 */ #define GUF_STR_GET_CAP_MASK ((size_t)SIZE_MAX >> 1u) /* binary 0111.1111 (1111.1111)* 1111.1111 */ static inline void guf_str_set_lng_cap_(guf_str *str, size_t cap_with_null) { GUF_ASSERT(cap_with_null % 2 == 0); GUF_ASSERT(cap_with_null <= PTRDIFF_MAX); GUF_ASSERT(cap_with_null > GUF_STR_SSO_BUF_CAP); str->data.lng.capacity = ~GUF_STR_GET_CAP_MASK | (cap_with_null >> 1); } static inline void guf_str_set_shrt_size_(guf_str *str, unsigned char size_with_null) { GUF_ASSERT(size_with_null < GUF_STR_SSO_BUF_CAP && size_with_null < 0x80); str->data.shrt.size = size_with_null; } #else #error "guf_str: neither GUF_PLATFORM_LITTLE_ENDIAN nor GUF_PLATFORM_BIG_ENDIAN is defined" #endif GUF_STR_KWRDS bool guf_str_is_readonly(const guf_str *str) { GUF_ASSERT(str); return !str->allocator; } static bool guf_str_is_short_internal_(const guf_str *str) { if (guf_str_is_readonly(str)) { return false; } const unsigned char first_byte = str->data.shrt.size; // union type-punning (only legal in C99 and above; undefined behaviour in C++ I think). return (first_byte & GUF_STR_IS_LONG_MASK) == 0; } // Returns the capacity without the final null-terminator static size_t guf_str_cap_internal_(const guf_str *str) { if (guf_str_is_short_internal_(str)) { return GUF_STR_SSO_BUF_CAP - 1; } else if (guf_str_is_readonly(str)) { return 0; } else { // Precondition: all capacities for data.lng must be even. #if defined(GUF_PLATFORM_LITTLE_ENDIAN) GUF_ASSERT(str->data.lng.capacity & ~GUF_STR_GET_CAP_MASK); // Assert the is_long bit is actually set. const size_t cap_with_null = str->data.lng.capacity & GUF_STR_GET_CAP_MASK; GUF_ASSERT(cap_with_null % 2 == 0); #elif defined(GUF_PLATFORM_BIG_ENDIAN) GUF_ASSERT(str->data.lng.capacity & ~GUF_STR_GET_CAP_MASK); // Assert the is_long bit is actually set. const size_t cap_with_null = (str->data.lng.capacity & GUF_STR_GET_CAP_MASK) << 1; GUF_ASSERT(cap_with_null % 2 == 0); #endif GUF_ASSERT(cap_with_null > 0 && cap_with_null > GUF_STR_SSO_BUF_CAP); GUF_ASSERT(cap_with_null <= PTRDIFF_MAX); return cap_with_null - 1; } } static size_t guf_str_size_internal_(const guf_str *str) { if (guf_str_is_short_internal_(str)) { GUF_ASSERT(str->data.shrt.size > 0); #if defined(GUF_PLATFORM_LITTLE_ENDIAN) const size_t size = (str->data.shrt.size >> 1); #elif defined(GUF_PLATFORM_BIG_ENDIAN) const size_t size = (str->data.shrt.size); #endif GUF_ASSERT(size > 0 && size <= GUF_STR_SSO_BUF_CAP); return size; } else { const size_t size = str->data.lng.size; GUF_ASSERT(size > 0 && size <= PTRDIFF_MAX); return size; } } static size_t guf_str_len_internal_(const guf_str *str) { const size_t size = guf_str_size_internal_(str); GUF_ASSERT(size > 0); if (size == 0) { return 0; } else { return size - 1; } } GUF_STR_KWRDS bool guf_str_is_short(const guf_str *str) { GUF_ASSERT(guf_str_is_valid(str)); return guf_str_is_short_internal_(str); } GUF_STR_KWRDS ptrdiff_t guf_str_capacity(const guf_str *str) { GUF_ASSERT(guf_str_is_valid(str)); return (ptrdiff_t)guf_str_cap_internal_(str); } GUF_STR_KWRDS ptrdiff_t guf_str_len(const guf_str *str) { GUF_ASSERT(guf_str_is_valid(str)); return (ptrdiff_t)guf_str_len_internal_(str); } GUF_STR_KWRDS bool guf_str_is_valid(const guf_str *str) { GUF_ASSERT(str); if (!str || guf_str_is_uninit(str)) { return false; } const bool is_readonly = !str->allocator; if (is_readonly) { bool valid_readonly = str->data.lng.c_str && str->data.lng.capacity == 0 && str->data.lng.size > 0; return valid_readonly; } const bool valid_allocator = str->allocator && str->allocator->alloc && str->allocator->free && str->allocator->realloc; if (!valid_allocator) { return false; } if (guf_str_is_short_internal_(str)) { const size_t size = guf_str_size_internal_(str); // len + 1 return size > 0 && size <= GUF_STR_SSO_BUF_CAP && str->data.shrt.c_str[size - 1] == '\0'; } else { const size_t cap_with_null = guf_str_cap_internal_(str) + 1; const bool valid_cap = cap_with_null > GUF_STR_SSO_BUF_CAP && cap_with_null <= PTRDIFF_MAX && (cap_with_null % 2 == 0); return valid_cap && str->data.lng.c_str && str->data.lng.size > 0 && str->data.lng.size <= cap_with_null; } } GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t new_cap_min, guf_err *err) { GUF_ASSERT(guf_str_is_valid(str)); GUF_ASSERT(!guf_str_is_readonly(str)); const size_t old_cap_with_null = guf_str_cap_internal_(str) + 1; const size_t len_with_null = guf_str_len_internal_(str) + 1; if (new_cap_min <= (ptrdiff_t)old_cap_with_null) { // No need to grow. guf_err_set_if_not_null(err, GUF_ERR_NONE); return str; } if (new_cap_min >= PTRDIFF_MAX - 1) { guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, "in guf_str_try_reserve: new_cap_min >= PTRDIFF_MAX - 1"); return NULL; } size_t new_cap_min_with_null = (size_t)new_cap_min + 1; if (new_cap_min_with_null % 2 != 0) { // Only an even lng.capacity is allowed. new_cap_min_with_null += 1; } // Try if we can reach at least new_cap_min_with_null by doubling the capacity. const size_t GUF_STR_GROWTH_FAC = 2; size_t times_two_cap = old_cap_with_null * GUF_STR_GROWTH_FAC; if (guf_mul_is_overflow_size_t(old_cap_with_null, GUF_STR_GROWTH_FAC) || times_two_cap >= PTRDIFF_MAX) { times_two_cap = (PTRDIFF_MAX % 2 == 0) ? PTRDIFF_MAX : PTRDIFF_MAX - 1; } if (times_two_cap > new_cap_min_with_null) { new_cap_min_with_null = times_two_cap; } GUF_ASSERT(new_cap_min_with_null >= len_with_null && new_cap_min_with_null <= PTRDIFF_MAX); const size_t space_remaining = (new_cap_min_with_null - len_with_null); if (new_cap_min_with_null < (PTRDIFF_MAX - 8) && space_remaining < 4) { new_cap_min_with_null += 4 - space_remaining; // Have some leeway. } GUF_ASSERT(new_cap_min_with_null % 2 == 0); if (guf_str_is_short_internal_(str)) { // a.) Was short string -> need initial allocation. char *c_str_new = str->allocator->alloc(new_cap_min_with_null, str->allocator->ctx); if (!c_str_new) { guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, "in guf_str_try_grow_if_necessary: Initial allocation failed."); return NULL; } memcpy(c_str_new, str->data.shrt.c_str, len_with_null); str->data.lng.c_str = c_str_new; guf_str_set_lng_cap_(str, new_cap_min_with_null); } else { // b) Was long string -> need re-allocation char *c_str_new = str->allocator->realloc(str->data.lng.c_str, old_cap_with_null, new_cap_min_with_null, str->allocator->ctx); if (!c_str_new) { guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, "in guf_str_try_grow_if_necessary: re-allocation failed."); return NULL; } str->data.lng.c_str = c_str_new; guf_str_set_lng_cap_(str, new_cap_min_with_null); } GUF_ASSERT(guf_str_is_valid(str)); guf_err_set_if_not_null(err, GUF_ERR_NONE); return str; } static char *guf_str_get_cstr_internal_(guf_str *str) { if (guf_str_is_short(str)) { return str->data.shrt.c_str; } else { return str->data.lng.c_str; } } static const char *guf_str_get_const_cstr_internal_(const guf_str *str) { if (guf_str_is_short(str)) { return str->data.shrt.c_str; } else { return str->data.lng.c_str; } } GUF_STR_KWRDS const char *guf_str_const_cstr(const guf_str *str) { GUF_ASSERT(guf_str_is_valid(str)); const char* c_str = guf_str_get_const_cstr_internal_(str); GUF_ASSERT(c_str); return c_str; } GUF_STR_KWRDS char *guf_str_try_get_cstr(guf_str *str, guf_err *err) { GUF_ASSERT(guf_str_is_valid(str)); if (guf_str_is_readonly(str)) { guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, "in guf_str_try_get_cstr: cannot return non-const char pointer because str is readonly"); return NULL; } char *c_str = guf_str_get_cstr_internal_(str); GUF_ASSERT(c_str); return c_str; } GUF_STR_KWRDS char *guf_str_cstr(guf_str *str) { return guf_str_try_get_cstr(str, NULL); } static void guf_str_set_len_internal_(guf_str *str, size_t len) { GUF_ASSERT(len <= guf_str_cap_internal_(str)); GUF_ASSERT(!guf_str_is_readonly(str)); const size_t len_with_null = len + 1; if (guf_str_is_short_internal_(str)) { GUF_ASSERT(len_with_null <= UCHAR_MAX) guf_str_set_shrt_size_(str, (unsigned char)len_with_null); } else { str->data.lng.size = len_with_null; } } GUF_STR_KWRDS guf_str guf_str_new_uninitialised(void) { guf_str str = {.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'}; return str; } GUF_STR_KWRDS bool guf_str_is_uninit(const guf_str *str) { GUF_ASSERT(str); return !str->allocator && !str->data.shrt.size && str->data.shrt.c_str[0] == '\0'; } GUF_STR_KWRDS guf_str *guf_str_init_empty(guf_str *str, guf_allocator *allocator) { GUF_ASSERT(str && allocator); str->allocator = allocator; guf_str_set_shrt_size_(str, 1); str->data.shrt.c_str[0] = '\0'; return str; } GUF_STR_KWRDS guf_str *guf_str_try_init(guf_str *str, guf_str_view str_view, guf_allocator *alloc, guf_err *err) { if (!str) { guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_init: str is NULL"); return NULL; } else if (!alloc || !alloc->alloc || !alloc->realloc || !alloc->free) { guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_init: alloc (or allocs function pointers) is/are NULL"); return NULL; } if (!guf_str_view_is_valid(str_view)) { guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_init: invalid str_view"); return NULL; } guf_str_init_empty(str, alloc); if (str_view.len == 0) { GUF_ASSERT(!guf_str_is_readonly(str)); GUF_ASSERT(guf_str_is_valid(str)); return str; } GUF_ASSERT(str_view.str && str_view.len > 0); guf_str_try_reserve(str, str_view.len, err); if (err && *err != GUF_ERR_NONE) { guf_panic(*err, "in guf_str_try_init: Initial allocation failed"); return NULL; } GUF_ASSERT(guf_str_len_internal_(str) == 0); GUF_ASSERT(guf_str_cap_internal_(str) >= (size_t)str_view.len); GUF_ASSERT(!guf_str_is_readonly(str)); char *c_str_dst = guf_str_get_cstr_internal_(str); GUF_ASSERT_RELEASE(c_str_dst); memcpy(c_str_dst, str_view.str, str_view.len); c_str_dst[str_view.len] = '\0'; GUF_ASSERT(!guf_str_is_readonly(str)); GUF_ASSERT(guf_str_is_valid(str)); guf_err_set_if_not_null(err, GUF_ERR_NONE); return str; } GUF_STR_KWRDS guf_str *guf_str_init(guf_str *str, guf_str_view str_view, guf_allocator *alloc) { return guf_str_try_init(str, str_view, alloc, NULL); } GUF_STR_KWRDS guf_str guf_str_try_new(guf_str_view str_view, guf_allocator *alloc, guf_err *err) { guf_str str = guf_str_new_uninitialised(); guf_str_try_init(&str, str_view, alloc, err); if (err && *err != GUF_ERR_NONE) { guf_err_set_or_panic(err, *err, "in guf_str_try_new: failed init"); return guf_str_new_uninitialised(); } else { GUF_ASSERT(!guf_str_is_uninit(&str)); return str; } } GUF_STR_KWRDS void guf_str_free(guf_str *str, void *ctx) { (void)ctx; if (!str || guf_str_is_uninit(str)) { return; } else if (guf_str_is_readonly(str)) { // Don't need to de-allocate anything for read-only strings. *str = guf_str_new_uninitialised(); return; } else if (!guf_str_is_short(str)) { // Need to de-allocate. GUF_ASSERT(guf_str_capacity(str) < PTRDIFF_MAX); const ptrdiff_t cap_with_null = guf_str_capacity(str) + 1; GUF_ASSERT((cap_with_null % 2) == 0); char *c_str = guf_str_cstr(str); GUF_ASSERT(str->allocator->free); if (str->allocator->free) { str->allocator->free(c_str, cap_with_null, str->allocator->ctx); } *str = guf_str_new_uninitialised(); return; } else { GUF_ASSERT(guf_str_is_short(str)); *str = guf_str_new_uninitialised(); } } GUF_STR_KWRDS guf_str *guf_str_copy(guf_str *dst, const guf_str *src, void *ctx) { (void)ctx; GUF_ASSERT_RELEASE(dst); GUF_ASSERT_RELEASE(guf_str_is_valid(src)); guf_str_init_empty(dst, src->allocator); GUF_ASSERT(guf_str_is_short_internal_(dst)); if (!guf_str_is_short_internal_(src)) { const size_t src_cap_with_null = guf_str_cap_internal_(src) + 1; char *dst_cstr = src->allocator->alloc(src_cap_with_null, src->allocator->ctx); if (!dst_cstr) { *dst = guf_str_new_uninitialised(); return NULL; } dst->data.lng.c_str = dst_cstr; dst->data.lng.capacity = src->data.lng.capacity; dst->data.lng.size = src->data.lng.size; } else { dst->data.shrt.size = src->data.shrt.size; } const size_t src_len_with_null = guf_str_len_internal_(src) + 1; GUF_ASSERT(src_len_with_null == (guf_str_len_internal_(dst) + 1)); GUF_ASSERT(guf_str_is_short(dst) == guf_str_is_short(src)); const char *src_cstr = guf_str_const_cstr(src); char *dst_cstr = guf_str_cstr(dst); GUF_ASSERT(src_cstr && dst_cstr); memcpy(dst_cstr, src_cstr, src_len_with_null); GUF_ASSERT(guf_str_is_valid(dst)); return dst; } GUF_STR_KWRDS guf_str *guf_str_move(guf_str *dst, guf_str *src, void *ctx) { (void)ctx; GUF_ASSERT_RELEASE(dst); GUF_ASSERT_RELEASE(guf_str_is_valid(src)); *dst = *src; *src = guf_str_new_uninitialised(); return dst; } GUF_STR_KWRDS guf_str *guf_str_try_append_char(guf_str *str, char c, ptrdiff_t times, guf_err *err) { GUF_ASSERT(guf_str_is_valid(str)); if (guf_str_is_readonly(str)) { guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_append_char: str is readonly"); return NULL; } if (times < 0) { guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, "in guf_str_try_append_char: repeats < 0"); return NULL; } else if (times == 0) { guf_err_set_if_not_null(err, GUF_ERR_NONE); return str; } GUF_ASSERT(guf_str_len_internal_(str) <= guf_str_cap_internal_(str)); const size_t old_cap = guf_str_cap_internal_(str); const size_t old_len = guf_str_len_internal_(str); const size_t new_len = old_len + (size_t)times; if (new_len <= old_len || new_len > (size_t)PTRDIFF_MAX) { // Handle overflow. guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, "in guf_str_try_append_char: new length would overflow ptrdiff_t"); return NULL; } else if (new_len > old_cap) { // Need to grow capacity. guf_str_try_reserve(str, new_len, err); if (err && *err != GUF_ERR_NONE) { guf_err_set_or_panic(err, *err, "in guf_str_try_append_char: failed to reserve capacity"); return NULL; } } const size_t new_cap = guf_str_cap_internal_(str); GUF_ASSERT_RELEASE(new_cap >= new_len && new_cap >= old_cap); GUF_ASSERT(guf_str_len_internal_(str) == old_len); GUF_ASSERT(((ptrdiff_t)new_cap - (ptrdiff_t)old_len) >= times); char *c_str = guf_str_get_cstr_internal_(str); for (size_t i = old_len; i < new_len; ++i) { c_str[i] = c; } guf_str_set_len_internal_(str, new_len); c_str[new_len] = '\0'; guf_err_set_if_not_null(err, GUF_ERR_NONE); return str; } GUF_STR_KWRDS guf_str *guf_str_append_char(guf_str *str, char c, ptrdiff_t times) { return guf_str_try_append_char(str, c, times, NULL); } GUF_STR_KWRDS guf_str *guf_str_try_append_one_char(guf_str *str, char c, guf_err *err) { return guf_str_try_append_char(str, c, 1, err); } GUF_STR_KWRDS guf_str *guf_str_append_one_char(guf_str *str, char c) { return guf_str_try_append_one_char(str, c, NULL); } GUF_STR_KWRDS guf_str *guf_str_try_append(guf_str *str, guf_str_view sv, guf_err *err) { GUF_ASSERT(guf_str_is_valid(str)); if (!guf_str_view_is_valid(sv)) { guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, "in guf_str_try_append_view: str_view is invalid"); return NULL; } else if (guf_str_is_readonly(str)) { guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in in guf_str_try_append_view: str is readonly"); return NULL; } if (sv.len == 0) { guf_err_set_if_not_null(err, GUF_ERR_NONE); return str; } GUF_ASSERT(sv.str && sv.len > 0); const size_t old_cap = guf_str_cap_internal_(str); const size_t old_len = guf_str_len_internal_(str); const size_t new_len = old_len + (size_t)sv.len; if (new_len <= old_len || new_len > (size_t)PTRDIFF_MAX) { // Handle overflow. guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, "in guf_str_try_append_view: new length would overflow ptrdiff_t"); return NULL; } else if (new_len > old_cap) { // Growth necessary. guf_str_try_reserve(str, new_len, err); if (err && *err != GUF_ERR_NONE) { guf_err_set_or_panic(err, *err, "in guf_str_try_append_view: failed to reserve capacity"); return NULL; } } const size_t new_cap = guf_str_cap_internal_(str); GUF_ASSERT_RELEASE(new_cap >= old_cap && new_cap >= new_len); GUF_ASSERT(((ptrdiff_t)new_cap - (ptrdiff_t)old_len) >= sv.len); char *c_str_dst = guf_str_get_cstr_internal_(str); for (size_t dst_i = old_len, src_i = 0; dst_i < new_len; ++dst_i, ++src_i) { GUF_ASSERT(src_i < (size_t)sv.len); c_str_dst[dst_i] = sv.str[src_i]; } c_str_dst[new_len] = '\0'; guf_str_set_len_internal_(str, new_len); GUF_ASSERT(guf_str_is_valid(str)); guf_err_set_if_not_null(err, GUF_ERR_NONE); return str; } GUF_STR_KWRDS guf_str *guf_str_append(guf_str *str, guf_str_view sv) { return guf_str_try_append(str, sv, NULL); } GUF_STR_KWRDS guf_str *guf_str_try_append_cstr(guf_str *str, const char *c_str, guf_err *err) { GUF_ASSERT(guf_str_is_valid(str)); if (!c_str) { guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_append_cstr: c_str is NULL"); return NULL; } else if (guf_str_is_readonly(str)) { guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, "in guf_str_try_append_cstr: str is readonly"); return NULL; } char *dst_cstr = guf_str_get_cstr_internal_(str); size_t i = 0; do { size_t cap = guf_str_cap_internal_(str); size_t len = guf_str_len_internal_(str); GUF_ASSERT(len <= cap); if (len == cap) { // Grow if necessary. guf_str_try_reserve(str, cap < PTRDIFF_MAX ? cap + 1 : PTRDIFF_MAX, err); if (err && *err != GUF_ERR_NONE) { guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, "in guf_str_try_append_cstr: failed to reserve"); return NULL; } cap = guf_str_cap_internal_(str); len = guf_str_len_internal_(str); } dst_cstr[len] = c_str[i]; guf_str_set_len_internal_(str, len + 1); } while (c_str[i++] != '\0'); GUF_ASSERT(guf_str_is_valid(str)); GUF_ASSERT(dst_cstr[guf_str_len_internal_(str)] == '\0'); guf_err_set_if_not_null(err, GUF_ERR_NONE); return str; } GUF_STR_KWRDS guf_str *guf_str_append_cstr(guf_str *str, const char *c_str) { return guf_str_try_append_cstr(str, c_str, NULL); } // guf_str_view: GUF_STR_KWRDS bool guf_str_view_is_valid(guf_str_view sv) { if (sv.str) { return sv.len >= 0; } else { return sv.len == 0; } } GUF_STR_KWRDS guf_str_view guf_str_next_tok(guf_str_view *input, const guf_str_view *delims, ptrdiff_t num_delims, const guf_str_view *preserved_delims, ptrdiff_t num_preserved_delims) { if (input->len <= 0 || input->str == NULL) { return (guf_str_view){.str = NULL, .len = 0}; } ptrdiff_t max_delim_len = -1; for (ptrdiff_t i = 0; i < num_delims; ++i) { if (delims[i].len > max_delim_len) { max_delim_len = delims[i].len; } } guf_str_view tok = {.str = input->str, .len = 0}; guf_str_view prev_input = *input; guf_utf8_char ch = {0}; for (guf_utf8_stat stat = guf_utf8_char_next(&ch, input); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, input)) { if (stat != GUF_UTF8_READ_VALID) { prev_input = *input; continue; } const int num_bytes = guf_utf8_char_num_bytes(&ch); for (ptrdiff_t delim_len = GUF_MIN(max_delim_len, prev_input.len); delim_len > 0; --delim_len) { guf_str_view delim_candidate = guf_substr_view(prev_input, 0, delim_len); for (ptrdiff_t delim_i = 0; delim_i < num_delims; ++delim_i) { if (guf_str_view_equal(&delim_candidate, delims + delim_i)) { // Found delim. bool preserved = false; if (preserved_delims && num_preserved_delims > 0) { for (ptrdiff_t preserved_i = 0; preserved_i < num_preserved_delims; ++preserved_i) { if (guf_str_view_equal(&delim_candidate, preserved_delims + preserved_i)) { preserved = true; break; } } } if (!preserved) { input->len = prev_input.len - delim_len; input->str = prev_input.len > 0 ? prev_input.str + delim_len : NULL; GUF_ASSERT(input->len >= 0); } else { input->str -= num_bytes; input->len += num_bytes; } if (tok.len == 0) { if (preserved) { input->str += num_bytes; input->len -= num_bytes; return delim_candidate; } tok.str = input->str; goto end; } else { return tok; } } } } tok.len += num_bytes; end:; prev_input = *input; } return tok; } GUF_STR_KWRDS guf_str_view guf_str_view_trim_left_ascii(guf_str_view sv) { if (sv.len <= 0 || sv.str == NULL) { return sv; } for (; sv.len > 0 && guf_char_isspace_ascii(*sv.str); --sv.len, ++sv.str) ; GUF_ASSERT(sv.len >= 0); GUF_ASSERT(sv.len == 0 || !guf_char_isspace_ascii(*sv.str)); return sv; } GUF_STR_KWRDS guf_str_view guf_str_view_trim_right_ascii(guf_str_view sv) { if (sv.len <= 0 || sv.str == NULL) { return sv; } for (; sv.len > 0 && guf_char_isspace_ascii(sv.str[sv.len - 1]); --sv.len) ; GUF_ASSERT(sv.len >= 0); GUF_ASSERT(sv.len == 0 || !guf_char_isspace_ascii(sv.str[sv.len - 1])); return sv; } GUF_STR_KWRDS guf_str_view guf_str_view_trim_right(guf_str_view sv) { if (sv.len <= 0 || sv.str == NULL) { return sv; } char c = sv.str[sv.len - 1]; while (sv.len > 0 && sv.str && c != ' ' && c != '\n' && c != '\t' && c != '\v' && c != '\f' && c != '\r') { --sv.len; ++sv.str; c = sv.str[0]; } return sv; } GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrdiff_t count) { GUF_ASSERT(str.str); GUF_ASSERT(pos >= 0); GUF_ASSERT(count >= 0); if (str.len == 0 || count == 0 || pos >= str.len || str.str == NULL) { return (guf_str_view){.str = str.str, .len = 0}; } const ptrdiff_t substr_len = pos + count > str.len ? str.len - pos : count; GUF_ASSERT(substr_len >= 0); GUF_ASSERT(substr_len <= str.len); return (guf_str_view){.str = str.str + pos, .len = substr_len}; } GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv) { GUF_ASSERT(sv); if (!sv->str || sv->len <= 0) { return GUF_HASH_INIT; } return guf_hash(sv->str, sv->len, GUF_HASH_INIT); } GUF_STR_KWRDS uint64_t guf_str_view_hash64(const guf_str_view *sv) { GUF_ASSERT(sv); if (!sv->str || sv->len <= 0) { return GUF_HASH64_INIT; } return guf_hash64(sv->str, sv->len, GUF_HASH64_INIT); } GUF_STR_KWRDS uint32_t guf_str_view_hash32(const guf_str_view *sv) { GUF_ASSERT(sv); if (!sv->str || sv->len <= 0) { return GUF_HASH32_INIT; } return guf_hash32(sv->str, sv->len, GUF_HASH32_INIT); } GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b) { GUF_ASSERT(a && b); if (a->len != b->len) { return false; } if ((!a->str && b->str) || (!b->str && a->str)) { return false; } else if (!a->str && !b->str) { return a->len == b->len; } GUF_ASSERT(a->str && b->str); if (a->len <= 0) { return true; } return 0 == memcmp(a->str, b->str, a->len); } GUF_STR_KWRDS bool guf_str_view_equal_val_arg(guf_str_view a_val, guf_str_view b_val) { return guf_str_view_equal(&a_val, &b_val); } #undef GUF_STR_IMPL #undef GUF_STR_IMPL_STATIC #endif /* end impl */ #undef GUF_STR_KWRDS