1490 lines
55 KiB
C++
1490 lines
55 KiB
C++
/*
|
|
is parametrized: no
|
|
NOTE: automatically includes/implements guf_utf8.h
|
|
*/
|
|
|
|
#if defined(GUF_STR_IMPL_STATIC)
|
|
#define GUF_STR_KWRDS static
|
|
#else
|
|
#define GUF_STR_KWRDS
|
|
#endif
|
|
|
|
#ifndef GUF_STR_H
|
|
#define GUF_STR_H
|
|
#include "guf_common.h"
|
|
#include "guf_alloc.h"
|
|
#include "guf_str_view_type.h"
|
|
#include "guf_utf8.h"
|
|
#include "guf_hash.h"
|
|
|
|
// cf. libc++ short-string optimisation: https://joellaity.com/2020/01/31/string.html (last-retrieved 2025-03-10)
|
|
|
|
typedef struct guf_str_internal_long_ {
|
|
size_t capacity; // If long string: capacity's least significant bit always set to 1 (or its most significant bit for big-endian platforms); the actual capacity must be even
|
|
size_t size;
|
|
char *c_str;
|
|
} guf_str_internal_long_;
|
|
|
|
#define GUF_STR_SSO_BUF_CAP (sizeof(guf_str_internal_long_) - sizeof(unsigned char)) /* 23 bytes on 64-bit platforms, 11 bytes on 32-bit platforms */
|
|
|
|
#if defined(GUF_STDC_AT_LEAST_C11) || defined(GUF_STDCPP_AT_LEAST_CPP11)
|
|
static_assert(GUF_STR_SSO_BUF_CAP > 0, "GUF_STR_SSO_BUF_CAP < 0 (this is very weird)"); // Basically cannot fail.
|
|
static_assert(GUF_STR_SSO_BUF_CAP < 0x80, "GUF_STR_SSO_BUF_CAP >= 128 (no support for platforms with wordsize >= 512-bits)"); // Could fail on hypothetical platforms with 512-bit wordsize (and above).
|
|
#endif
|
|
|
|
typedef struct guf_str_internal_short_ {
|
|
unsigned char size; // size overlaps with the first byte of guf_str_internal_long_.capacity [1]
|
|
char c_str[GUF_STR_SSO_BUF_CAP];
|
|
} guf_str_internal_short_;
|
|
|
|
/*
|
|
[1] The first byte of guf_str_internal_long_.capacity is its least-significant-byte on little-endian
|
|
platforms, and its most-significant byte on big-endian platforms.
|
|
*/
|
|
|
|
typedef struct guf_str {
|
|
union {
|
|
guf_str_internal_long_ lng;
|
|
guf_str_internal_short_ shrt;
|
|
} data; // 24 bytes on 64-bit platforms, 12 bytes on 32-bit platforms.
|
|
guf_allocator *allocator; // Wasteful (8 bytes on 64-bit platforms...), but keeping this pointer also allows us to have "read-only strings" (a string is read-only if allocator == NULL)
|
|
} guf_str; // Total: 32 bytes on 64-bit platforms, 16 bytes on 32-bit platforms.
|
|
|
|
typedef enum guf_str_tok_delim_opt {
|
|
GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST = 0,
|
|
GUF_STR_TOK_DELIM_OPT_MATCH_SHORTEST,
|
|
GUF_STR_TOK_DELIM_OPT_MATCH_IN_ORDER,
|
|
} guf_str_tok_delim_opt;
|
|
|
|
typedef struct guf_str_tok_state {
|
|
guf_str_view input;
|
|
guf_str_view cur_tok, cur_delim;
|
|
const guf_str_view *delims;
|
|
const ptrdiff_t delim_count;
|
|
ptrdiff_t num_toks_read, num_delims_read;
|
|
bool done;
|
|
} guf_str_tok_state;
|
|
|
|
#define GUF_CSTR_TO_VIEW(CSTR) ((guf_str_view){.str = (CSTR), .len = (ptrdiff_t)strlen((CSTR))})
|
|
#define GUF_CSTR_LIT_TO_VIEW(CSTR) ((guf_str_view){.str = (CSTR), .len = (ptrdiff_t)sizeof((CSTR)) - 1})
|
|
#define GUF_STR_TO_VIEW(GUF_STR_PTR) ((guf_str_view){.str = guf_str_const_cstr((GUF_STR_PTR)), .len = (ptrdiff_t)guf_str_len((GUF_STR_PTR))})
|
|
#define GUF_CSTR_TO_READONLY_STR(CSTR) ((guf_str){.allocator = NULL, .data.lng.c_str = (CSTR), .data.lng.size = strlen(CSTR) + 1, .data.lng.capacity = 0})
|
|
|
|
#define GUF_STR_UNINITIALISED (guf_str){.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'}
|
|
|
|
#ifdef __cplusplus
|
|
// Standard C++ does not have compound literals like C99...
|
|
#define GUF_CSTR_TO_VIEW_CPP(CSTR) guf_str_view {.str = (CSTR), .len = (ptrdiff_t)strlen(CSTR)}
|
|
#define GUF_CSTR_LIT_TO_VIEW_CPP(CSTR) guf_str_view {.str = (CSTR), .len = (ptrdiff_t)sizeof(CSTR) - 1}
|
|
|
|
#define GUF_STR_UNINITIALISED_CPP guf_str{.allocator = NULL, .data.shrt.size = 0, .data.shrt.c_str[0] = '\0'}
|
|
#endif
|
|
|
|
// 1.) guf_str_view:
|
|
|
|
// Return a new guf_str_view corresponding to the substring in range [pos, pos + count) of str
|
|
GUF_STR_KWRDS guf_str_view guf_str_view_substr(guf_str_view str, ptrdiff_t pos, ptrdiff_t count);
|
|
|
|
// Equality- and comparison-operators
|
|
GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b);
|
|
GUF_STR_KWRDS bool guf_str_view_equal_val_arg(guf_str_view a_val, guf_str_view b_val);
|
|
GUF_STR_KWRDS int guf_str_view_cmp(const void *str_view_a, const void *str_view_b); // For qsort etc.
|
|
|
|
// Hash functions.
|
|
GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv);
|
|
GUF_STR_KWRDS uint64_t guf_str_view_hash64(const guf_str_view *sv);
|
|
GUF_STR_KWRDS uint32_t guf_str_view_hash32(const guf_str_view *sv);
|
|
|
|
// Return a new guf_str_view corresponding to the substring with leading/trailing ascii-whitespace chars removed from the left/right
|
|
GUF_STR_KWRDS guf_str_view guf_str_view_trim_left_ascii(guf_str_view sv);
|
|
GUF_STR_KWRDS guf_str_view guf_str_view_trim_right_ascii(guf_str_view sv);
|
|
|
|
// Return true if sv does not violate any of its invariants (.len must be >= 0, .str must not be NULL unless len is 0)
|
|
GUF_STR_KWRDS bool guf_str_view_is_valid(guf_str_view sv);
|
|
|
|
/*
|
|
Return the substring up to the first delimiter "delim" and advance src to one past the delim (so the function can be called repeatedly)
|
|
cf. "str_pop_first_split":
|
|
- https://accu.org/conf-docs/PDFs_2021/luca_sass_modern_c_and_what_we_can_learn_from_it.pdf ("String handling in Modern C", page 128 of the pdf)
|
|
- https://youtu.be/QpAhX-gsHMs?si=lCvm6o60LrYHaAHc&t=3059 (last-retrieved 2025-04-30)
|
|
*/
|
|
GUF_STR_KWRDS guf_str_view guf_str_view_pop_split(guf_str_view *src, guf_str_view delim);
|
|
|
|
// Create a new tokeniser-state for guf_str_tok_next.
|
|
GUF_STR_KWRDS guf_str_tok_state guf_str_tok_state_new(guf_str_view str, guf_str_view *delims, ptrdiff_t delim_count, guf_str_tok_delim_opt delim_match_opt);
|
|
/*
|
|
Return true when the next token (or delimiter if preserve_delims == true) was encountered.
|
|
Put the current token into state->cur_tok.
|
|
If preserve_delims is true, every delimiter will be put into state->cur_delim.
|
|
If preserve_delims is false, delimiters will only be put into state->cur_delim if the current token is not empty
|
|
(otherwise, empty tokens are skipped for preserve_delims == false).
|
|
|
|
Example: delims = ["-", "+"]
|
|
- "-1+2": tok_next(preserve_delims=false) will set state->cur_tok = "1" and state->cur_delim = "+"
|
|
- "-1+2": tok_next(preserve_delims=true) will set state->cur_tok = "" and state->cur_delim = "-"
|
|
Set preserve_delims to false if you don't care about processing the delimiters
|
|
*/
|
|
GUF_STR_KWRDS bool guf_str_tok_next(guf_str_tok_state *state, bool preserve_delims);
|
|
|
|
|
|
// 2.) guf_str:
|
|
|
|
// Initialise the guf_str pointed to by str -> return the initalised str on success (or NULL on error)
|
|
GUF_STR_KWRDS guf_str *guf_str_try_init(guf_str *str, guf_str_view str_view, guf_allocator *alloc, guf_err *err);
|
|
GUF_STR_KWRDS guf_str *guf_str_init(guf_str *str, guf_str_view str_view, guf_allocator *alloc);
|
|
GUF_STR_KWRDS guf_str *guf_str_init_empty(guf_str *str, guf_allocator *alloc);
|
|
GUF_STR_KWRDS guf_str *guf_str_try_init_from_cstr(guf_str *str, const char* c_str, guf_allocator *alloc, guf_err *err);
|
|
GUF_STR_KWRDS guf_str *guf_str_init_from_cstr(guf_str *str, const char* c_str, guf_allocator *alloc);
|
|
|
|
// Return an initialised guf_str (or GUF_STR_UNINITIALISED on error)
|
|
GUF_STR_KWRDS guf_str guf_str_try_new(guf_str_view str_view, guf_allocator *alloc, guf_err *err);
|
|
GUF_STR_KWRDS guf_str guf_str_new(guf_str_view str_view, guf_allocator *alloc);
|
|
|
|
// Destructor, copy-constructor and move-constructor; equality- and comparison-operator (NOTE: ctx is ignored, just pass NULL)
|
|
GUF_STR_KWRDS void guf_str_free(guf_str *str, void *ctx);
|
|
GUF_STR_KWRDS guf_str *guf_str_copy(guf_str *dst, const guf_str *src, void *ctx);
|
|
GUF_STR_KWRDS guf_str *guf_str_move(guf_str *dst, guf_str *src, void *ctx);
|
|
GUF_STR_KWRDS bool guf_str_equal(const guf_str *a, const guf_str *b);
|
|
GUF_STR_KWRDS int guf_str_cmp(const guf_str *a, const guf_str *b);
|
|
|
|
// Hash-functions.
|
|
GUF_STR_KWRDS guf_hash_size_t guf_str_hash(const guf_str *str);
|
|
GUF_STR_KWRDS uint64_t guf_str_hash64(const guf_str *str);
|
|
GUF_STR_KWRDS uint32_t guf_str_hash32(const guf_str *str);
|
|
|
|
// Reserve at least min_capacity characters (excluding the null-terminator) (try to double the current capacity first; if that's not at least min_capacity, set the new capacity to min_capacity instead).
|
|
GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t min_capacity, guf_err *err);
|
|
GUF_STR_KWRDS guf_str *guf_str_reserve(guf_str *str, ptrdiff_t min_capacity);
|
|
// Shrink the capacity of the string so it does not waste space (short-string-optimisation will be applied if the new capacity <= GUF_STR_SSO_BUF_CAP)
|
|
GUF_STR_KWRDS guf_str *guf_str_try_shrink_to_fit(guf_str *str, guf_err *err);
|
|
GUF_STR_KWRDS guf_str *guf_str_shrink_to_fit(guf_str *str);
|
|
|
|
// Set the contents of str to the given string view (mutating str) -> return the mutated str
|
|
GUF_STR_KWRDS guf_str *guf_str_try_set(guf_str *str, guf_str_view str_view, guf_err *err);
|
|
GUF_STR_KWRDS guf_str *guf_str_set(guf_str *str, guf_str_view str_view);
|
|
|
|
// Return a view of the string.
|
|
GUF_STR_KWRDS guf_str_view guf_str_to_view(const guf_str *str);
|
|
|
|
// Return a non-const pointer to the character at the specified index of str (if possible)
|
|
GUF_STR_KWRDS char *guf_str_try_at(guf_str *str, ptrdiff_t idx, guf_err *err);
|
|
GUF_STR_KWRDS char *guf_str_at(guf_str *str, ptrdiff_t idx);
|
|
GUF_STR_KWRDS char *guf_str_try_back(guf_str *str, guf_err *err);
|
|
GUF_STR_KWRDS char *guf_str_back(guf_str *str);
|
|
GUF_STR_KWRDS char *guf_str_try_front(guf_str *str, guf_err *err);
|
|
GUF_STR_KWRDS char *guf_str_front(guf_str *str);
|
|
|
|
// Return a copy of the char at the specified index of str (if possible)
|
|
GUF_STR_KWRDS char guf_str_try_at_cpy(const guf_str *str, ptrdiff_t idx, guf_err *err);
|
|
GUF_STR_KWRDS char guf_str_at_cpy(const guf_str *str, ptrdiff_t idx);
|
|
GUF_STR_KWRDS char guf_str_try_back_cpy(const guf_str *str, guf_err *err);
|
|
GUF_STR_KWRDS char guf_str_back_cpy(const guf_str *str);
|
|
GUF_STR_KWRDS char guf_str_try_front_cpy(const guf_str *str, guf_err *err);
|
|
GUF_STR_KWRDS char guf_str_front_cpy(const guf_str *str);
|
|
|
|
/*
|
|
Turn str into the substring in range [pos, pos + count) (mutating str) -> return the mutated str
|
|
(Constant time if pos == 0, otherwise copying count chars to the beginning of the str, i.e. linear time.)
|
|
NOTE: To make a substring-copy (instead of mutating str), create a guf_str_view of str and use guf_str_view_substr
|
|
*/
|
|
GUF_STR_KWRDS guf_str *guf_str_try_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count, guf_err *err);
|
|
GUF_STR_KWRDS guf_str *guf_str_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count);
|
|
|
|
// Remove the last character from str if possible (mutating str) -> return the popped char
|
|
GUF_STR_KWRDS char guf_str_try_pop_back(guf_str *str, guf_err *err);
|
|
GUF_STR_KWRDS char guf_str_pop_back(guf_str *str);
|
|
|
|
// Append a char to str (n times; times must be >= 0) (mutating str) -> return the mutated str
|
|
GUF_STR_KWRDS guf_str *guf_str_try_append_char(guf_str *str, char c, ptrdiff_t times, guf_err *err);
|
|
GUF_STR_KWRDS guf_str *guf_str_append_char(guf_str *str, char c, ptrdiff_t times);
|
|
GUF_STR_KWRDS guf_str *guf_str_try_append_one_char(guf_str *str, char c, guf_err *err);
|
|
GUF_STR_KWRDS guf_str *guf_str_append_one_char(guf_str *str, char c);
|
|
|
|
// Append str_view to str (mutating str) -> return the mutated str
|
|
GUF_STR_KWRDS guf_str *guf_str_try_append(guf_str *str, guf_str_view sv, guf_err *err);
|
|
GUF_STR_KWRDS guf_str *guf_str_append(guf_str *str, guf_str_view sv);
|
|
|
|
// Return a pointer to the null-terminated char array representing the string (works like std::string::c_str in C++)
|
|
GUF_STR_KWRDS const char *guf_str_const_cstr(const guf_str *str);
|
|
GUF_STR_KWRDS char *guf_str_try_get_cstr(guf_str *str, guf_err *err); // Error if str is readonly.
|
|
GUF_STR_KWRDS char *guf_str_cstr(guf_str *str); // Panics if str is readonly.
|
|
|
|
// Return the length/capacity (in chars) *without* the final null-terminator.
|
|
GUF_STR_KWRDS ptrdiff_t guf_str_len(const guf_str *str);
|
|
GUF_STR_KWRDS ptrdiff_t guf_str_capacity(const guf_str *str);
|
|
|
|
// Return true if the char data of the string lives directly within the guf_str itself (short-string optimisation) instead of in a separate dynamic allocation
|
|
GUF_STR_KWRDS bool guf_str_is_short(const guf_str *str);
|
|
// Return true if the string is in readonly ("view") mode, i.e. can't be modified, copied etc. which is useful for guf_dict so we don't have to use guf_str_view but can use guf_str (by passing a read-only guf_str) for the lookup functions.
|
|
GUF_STR_KWRDS bool guf_str_is_readonly(const guf_str *str);
|
|
|
|
// Return an guf_str which is in explicitly uninitialised state.
|
|
GUF_STR_KWRDS guf_str guf_str_new_uninitialised(void);
|
|
// Return true if str is explicitly uninitialised.
|
|
GUF_STR_KWRDS bool guf_str_is_uninit(const guf_str *str);
|
|
|
|
// Return true if the string's data does not violate its invariants (useful for debugging the library, should never be false after initialising a guf_str).
|
|
GUF_STR_KWRDS bool guf_str_is_valid(const guf_str *str);
|
|
|
|
#endif
|
|
|
|
// #define GUF_STR_IMPL_STATIC /* debug */
|
|
|
|
#if defined(GUF_STR_IMPL) || defined(GUF_STR_IMPL_STATIC)
|
|
|
|
#ifdef __cplusplus
|
|
#error "Must compile guf_str as C99 (or above) because type-punning with unions is undefined behaviour in C++"
|
|
#endif
|
|
|
|
#include "guf_common.h"
|
|
#include "guf_math.h"
|
|
#include <string.h>
|
|
|
|
#ifdef GUF_STR_IMPL
|
|
#define GUF_UTF8_IMPL
|
|
#else
|
|
#define GUF_UTF8_IMPL_STATIC
|
|
#endif
|
|
#include "guf_utf8.h"
|
|
|
|
// TODO: find_first_of
|
|
|
|
// guf_str:
|
|
#if defined(GUF_PLATFORM_LITTLE_ENDIAN)
|
|
#define GUF_STR_IS_LONG_MASK ((unsigned char)1) /* binary 0000.0001 */
|
|
#define GUF_STR_GET_CAP_MASK (~(size_t)1) /* binary 1111.1111 (1111.1111)* 1111.1110 */
|
|
|
|
static inline void guf_str_set_lng_cap_(guf_str *str, size_t cap_with_null)
|
|
{
|
|
GUF_ASSERT(cap_with_null % 2 == 0);
|
|
GUF_ASSERT(cap_with_null <= PTRDIFF_MAX);
|
|
GUF_ASSERT(cap_with_null > GUF_STR_SSO_BUF_CAP);
|
|
str->data.lng.capacity = cap_with_null | ((size_t)1);
|
|
}
|
|
static inline void guf_str_set_shrt_size_(guf_str *str, unsigned char size_with_null)
|
|
{
|
|
GUF_ASSERT(size_with_null <= GUF_STR_SSO_BUF_CAP && size_with_null < 0x80); // TODO: was < SSO_CAP, should be <= SSO_CAP?
|
|
str->data.shrt.size = (unsigned char)(size_with_null << 1);
|
|
}
|
|
#elif defined(GUF_PLATFORM_BIG_ENDIAN)
|
|
#define GUF_STR_IS_LONG_MASK ((unsigned char)0x80) /* binary 1000 0000 */
|
|
#define GUF_STR_GET_CAP_MASK ((size_t)SIZE_MAX >> 1u) /* binary 0111.1111 (1111.1111)* 1111.1111 */
|
|
|
|
static inline void guf_str_set_lng_cap_(guf_str *str, size_t cap_with_null)
|
|
{
|
|
GUF_ASSERT(cap_with_null % 2 == 0);
|
|
GUF_ASSERT(cap_with_null <= PTRDIFF_MAX);
|
|
GUF_ASSERT(cap_with_null > GUF_STR_SSO_BUF_CAP);
|
|
str->data.lng.capacity = ~GUF_STR_GET_CAP_MASK | (cap_with_null >> 1);
|
|
}
|
|
static inline void guf_str_set_shrt_size_(guf_str *str, unsigned char size_with_null)
|
|
{
|
|
GUF_ASSERT(size_with_null <= GUF_STR_SSO_BUF_CAP && size_with_null < 0x80); // TODO: was < SSO_CAP, should be <=
|
|
str->data.shrt.size = size_with_null;
|
|
}
|
|
#else
|
|
#error "guf_str: neither GUF_PLATFORM_LITTLE_ENDIAN nor GUF_PLATFORM_BIG_ENDIAN is defined"
|
|
#endif
|
|
|
|
GUF_STR_KWRDS bool guf_str_is_readonly(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(str);
|
|
return !str->allocator;
|
|
}
|
|
|
|
static bool guf_str_is_short_internal_(const guf_str *str)
|
|
{
|
|
if (guf_str_is_readonly(str)) {
|
|
return false;
|
|
}
|
|
const unsigned char first_byte = str->data.shrt.size; // union type-punning (only legal in C99 and above; undefined behaviour in C++ I think).
|
|
return (first_byte & GUF_STR_IS_LONG_MASK) == 0;
|
|
}
|
|
|
|
// Returns the capacity without the final null-terminator
|
|
static size_t guf_str_cap_internal_(const guf_str *str)
|
|
{
|
|
if (guf_str_is_short_internal_(str)) {
|
|
return GUF_STR_SSO_BUF_CAP - 1;
|
|
} else if (guf_str_is_readonly(str)) {
|
|
return 0;
|
|
} else {
|
|
// Precondition: all capacities for data.lng must be even.
|
|
#if defined(GUF_PLATFORM_LITTLE_ENDIAN)
|
|
GUF_ASSERT(str->data.lng.capacity & ~GUF_STR_GET_CAP_MASK); // Assert the is_long bit is actually set.
|
|
const size_t cap_with_null = str->data.lng.capacity & GUF_STR_GET_CAP_MASK;
|
|
GUF_ASSERT(cap_with_null % 2 == 0);
|
|
#elif defined(GUF_PLATFORM_BIG_ENDIAN)
|
|
GUF_ASSERT(str->data.lng.capacity & ~GUF_STR_GET_CAP_MASK); // Assert the is_long bit is actually set.
|
|
const size_t cap_with_null = (str->data.lng.capacity & GUF_STR_GET_CAP_MASK) << 1;
|
|
GUF_ASSERT(cap_with_null % 2 == 0);
|
|
#endif
|
|
GUF_ASSERT(cap_with_null > 0 && cap_with_null > GUF_STR_SSO_BUF_CAP);
|
|
GUF_ASSERT(cap_with_null <= PTRDIFF_MAX);
|
|
return cap_with_null - 1;
|
|
}
|
|
}
|
|
|
|
static size_t guf_str_size_internal_(const guf_str *str)
|
|
{
|
|
if (guf_str_is_short_internal_(str)) {
|
|
GUF_ASSERT(str->data.shrt.size > 0);
|
|
#if defined(GUF_PLATFORM_LITTLE_ENDIAN)
|
|
const size_t size = (str->data.shrt.size >> 1);
|
|
#elif defined(GUF_PLATFORM_BIG_ENDIAN)
|
|
const size_t size = (str->data.shrt.size);
|
|
#endif
|
|
GUF_ASSERT(size > 0 && size <= GUF_STR_SSO_BUF_CAP);
|
|
return size;
|
|
} else {
|
|
const size_t size = str->data.lng.size;
|
|
GUF_ASSERT(size > 0 && size <= PTRDIFF_MAX);
|
|
return size;
|
|
}
|
|
}
|
|
|
|
static size_t guf_str_len_internal_(const guf_str *str)
|
|
{
|
|
const size_t size = guf_str_size_internal_(str);
|
|
GUF_ASSERT(size > 0);
|
|
if (size == 0) {
|
|
return 0;
|
|
} else {
|
|
return size - 1;
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS bool guf_str_is_short(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
return guf_str_is_short_internal_(str);
|
|
}
|
|
|
|
GUF_STR_KWRDS ptrdiff_t guf_str_capacity(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
return (ptrdiff_t)guf_str_cap_internal_(str);
|
|
}
|
|
|
|
GUF_STR_KWRDS ptrdiff_t guf_str_len(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
return (ptrdiff_t)guf_str_len_internal_(str);
|
|
}
|
|
|
|
GUF_STR_KWRDS bool guf_str_is_valid(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(str);
|
|
if (!str || guf_str_is_uninit(str)) {
|
|
return false;
|
|
}
|
|
const bool is_readonly = !str->allocator;
|
|
if (is_readonly) {
|
|
bool valid_readonly = str->data.lng.c_str && str->data.lng.capacity == 0 && str->data.lng.size > 0;
|
|
return valid_readonly;
|
|
}
|
|
const bool valid_allocator = str->allocator && str->allocator->alloc && str->allocator->free && str->allocator->realloc;
|
|
if (!valid_allocator) {
|
|
return false;
|
|
}
|
|
|
|
if (guf_str_is_short_internal_(str)) {
|
|
const size_t size = guf_str_size_internal_(str); // len + 1
|
|
return size > 0 && size <= GUF_STR_SSO_BUF_CAP && str->data.shrt.c_str[size - 1] == '\0';
|
|
} else {
|
|
const size_t cap_with_null = guf_str_cap_internal_(str) + 1;
|
|
const size_t size = guf_str_size_internal_(str); // len + 1
|
|
const bool valid_cap = cap_with_null > GUF_STR_SSO_BUF_CAP && cap_with_null <= PTRDIFF_MAX && (cap_with_null % 2 == 0);
|
|
return valid_cap && size >= 1 && str->data.lng.c_str && str->data.lng.size > 0 && str->data.lng.size <= cap_with_null && str->data.lng.c_str[size - 1] == '\0';
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_try_reserve(guf_str *str, ptrdiff_t new_cap_min, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
|
|
if (guf_str_is_readonly(str)) {
|
|
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in guf_str_try_reserve: guf_str is readonly"));
|
|
return NULL;
|
|
}
|
|
|
|
const size_t old_cap_with_null = guf_str_cap_internal_(str) + 1;
|
|
const size_t len_with_null = guf_str_len_internal_(str) + 1;
|
|
|
|
if (new_cap_min < (ptrdiff_t)old_cap_with_null) { // No need to grow. TODO: was <=, should be < ?
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
|
|
if (new_cap_min >= PTRDIFF_MAX - 1) {
|
|
guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in guf_str_try_reserve: new_cap_min >= PTRDIFF_MAX - 1"));
|
|
return NULL;
|
|
}
|
|
|
|
size_t new_cap_min_with_null = (size_t)new_cap_min + 1;
|
|
if (new_cap_min_with_null % 2 != 0) { // Only an even lng.capacity is allowed.
|
|
new_cap_min_with_null += 1;
|
|
}
|
|
|
|
// Try if we can reach at least new_cap_min_with_null by doubling the capacity.
|
|
const size_t GUF_STR_GROWTH_FAC = 2;
|
|
size_t times_two_cap = old_cap_with_null * GUF_STR_GROWTH_FAC;
|
|
if (guf_mul_is_overflow_size_t(old_cap_with_null, GUF_STR_GROWTH_FAC) || times_two_cap >= PTRDIFF_MAX) {
|
|
times_two_cap = (PTRDIFF_MAX % 2 == 0) ? PTRDIFF_MAX : PTRDIFF_MAX - 1;
|
|
}
|
|
if (times_two_cap > new_cap_min_with_null) {
|
|
new_cap_min_with_null = times_two_cap;
|
|
}
|
|
GUF_ASSERT(new_cap_min_with_null > len_with_null && new_cap_min_with_null <= PTRDIFF_MAX);
|
|
|
|
const size_t space_remaining = (new_cap_min_with_null - len_with_null);
|
|
if (new_cap_min_with_null < (PTRDIFF_MAX - 8) && space_remaining < 4) {
|
|
new_cap_min_with_null += 4 - space_remaining; // Have some leeway.
|
|
}
|
|
|
|
GUF_ASSERT(new_cap_min_with_null % 2 == 0);
|
|
|
|
if (guf_str_is_short_internal_(str)) { // a.) Was short string -> need initial allocation.
|
|
char *c_str_new = str->allocator->alloc(new_cap_min_with_null, str->allocator->ctx);
|
|
if (!c_str_new) {
|
|
guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in guf_str_try_reserve: Initial allocation failed."));
|
|
return NULL;
|
|
}
|
|
memcpy(c_str_new, str->data.shrt.c_str, len_with_null);
|
|
str->data.lng.c_str = c_str_new;
|
|
str->data.lng.size = len_with_null;
|
|
guf_str_set_lng_cap_(str, new_cap_min_with_null);
|
|
} else { // b) Was long string -> need re-allocation
|
|
char *c_str_new = str->allocator->realloc(str->data.lng.c_str, old_cap_with_null, new_cap_min_with_null, str->allocator->ctx);
|
|
if (!c_str_new) {
|
|
guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in guf_str_try_reserve: re-allocation failed."));
|
|
return NULL;
|
|
}
|
|
str->data.lng.c_str = c_str_new;
|
|
guf_str_set_lng_cap_(str, new_cap_min_with_null);
|
|
}
|
|
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_reserve(guf_str *str, ptrdiff_t new_cap_min)
|
|
{
|
|
return guf_str_try_reserve(str, new_cap_min, NULL);
|
|
}
|
|
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_try_shrink_to_fit(guf_str *str, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
|
|
if (guf_str_is_readonly(str)) {
|
|
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in guf_str_try_shrink_to_fit: guf_str is readonly"));
|
|
return NULL;
|
|
}
|
|
|
|
const size_t old_cap_with_null = guf_str_cap_internal_(str) + 1;
|
|
const size_t len_with_null = guf_str_len_internal_(str) + 1;
|
|
GUF_ASSERT(len_with_null <= old_cap_with_null);
|
|
|
|
if (old_cap_with_null == len_with_null || guf_str_is_short_internal_(str)) {
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
|
|
char *c_str_old = guf_str_cstr(str);
|
|
GUF_ASSERT(c_str_old);
|
|
|
|
if (len_with_null <= GUF_STR_SSO_BUF_CAP) { // a) Shrunk size fits into short.string.
|
|
GUF_ASSERT(len_with_null <= UCHAR_MAX)
|
|
guf_str_set_shrt_size_(str, (unsigned char)len_with_null);
|
|
memcpy(str->data.shrt.c_str, c_str_old, len_with_null);
|
|
str->allocator->free(c_str_old, old_cap_with_null, str->allocator->ctx);
|
|
GUF_ASSERT(guf_str_is_short(str));
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
// b) Shrunk size does not fit into short-string.
|
|
char *c_str_new = str->allocator->realloc(c_str_old, old_cap_with_null, len_with_null, str->allocator->ctx);
|
|
if (!c_str_new) {
|
|
guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in guf_str_try_shrink_to_fit: realloc failed"));
|
|
return NULL;
|
|
} else {
|
|
str->data.lng.c_str = c_str_new;
|
|
guf_str_set_lng_cap_(str, len_with_null);
|
|
GUF_ASSERT(!guf_str_is_short(str));
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_shrink_to_fit(guf_str *str)
|
|
{
|
|
return guf_str_try_shrink_to_fit(str, NULL);
|
|
}
|
|
|
|
|
|
static char *guf_str_get_cstr_internal_(guf_str *str)
|
|
{
|
|
if (guf_str_is_short_internal_(str)) {
|
|
return str->data.shrt.c_str;
|
|
} else {
|
|
return str->data.lng.c_str;
|
|
}
|
|
}
|
|
|
|
static const char *guf_str_get_const_cstr_internal_(const guf_str *str)
|
|
{
|
|
if (guf_str_is_short(str)) {
|
|
return str->data.shrt.c_str;
|
|
} else {
|
|
return str->data.lng.c_str;
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS const char *guf_str_const_cstr(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
const char* c_str = guf_str_get_const_cstr_internal_(str);
|
|
GUF_ASSERT(c_str);
|
|
return c_str;
|
|
}
|
|
|
|
GUF_STR_KWRDS char *guf_str_try_get_cstr(guf_str *str, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
if (guf_str_is_readonly(str)) {
|
|
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in guf_str_try_get_cstr: cannot return non-const char pointer because str is readonly"));
|
|
return NULL;
|
|
}
|
|
char *c_str = guf_str_get_cstr_internal_(str);
|
|
GUF_ASSERT(c_str);
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return c_str;
|
|
}
|
|
|
|
GUF_STR_KWRDS char *guf_str_cstr(guf_str *str)
|
|
{
|
|
return guf_str_try_get_cstr(str, NULL);
|
|
}
|
|
|
|
|
|
static void guf_str_set_len_internal_(guf_str *str, size_t len)
|
|
{
|
|
GUF_ASSERT(len <= guf_str_cap_internal_(str));
|
|
GUF_ASSERT(!guf_str_is_readonly(str));
|
|
const size_t len_with_null = len + 1;
|
|
if (guf_str_is_short_internal_(str)) {
|
|
GUF_ASSERT(len_with_null <= UCHAR_MAX)
|
|
guf_str_set_shrt_size_(str, (unsigned char)len_with_null);
|
|
} else {
|
|
str->data.lng.size = len_with_null;
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str guf_str_new_uninitialised(void)
|
|
{
|
|
return GUF_STR_UNINITIALISED;
|
|
}
|
|
|
|
GUF_STR_KWRDS bool guf_str_is_uninit(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(str);
|
|
return !str->allocator && !str->data.shrt.size && str->data.shrt.c_str[0] == '\0';
|
|
}
|
|
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_init_empty(guf_str *str, guf_allocator *allocator)
|
|
{
|
|
GUF_ASSERT_RELEASE(str && allocator);
|
|
GUF_ASSERT_RELEASE(allocator->alloc && allocator->realloc && allocator->free);
|
|
str->allocator = allocator;
|
|
guf_str_set_shrt_size_(str, 1);
|
|
str->data.shrt.c_str[0] = '\0';
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
return str;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_try_init(guf_str *str, guf_str_view str_view, guf_allocator *alloc, guf_err *err)
|
|
{
|
|
if (!str) {
|
|
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init: str is NULL"));
|
|
return NULL;
|
|
} else if (!alloc || !alloc->alloc || !alloc->realloc || !alloc->free) {
|
|
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init: alloc (or allocs function pointers) is/are NULL"));
|
|
return NULL;
|
|
}
|
|
|
|
if (!guf_str_view_is_valid(str_view)) {
|
|
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init: invalid str_view"));
|
|
return NULL;
|
|
}
|
|
|
|
guf_str_init_empty(str, alloc);
|
|
|
|
if (str_view.len == 0) {
|
|
GUF_ASSERT(!guf_str_is_readonly(str));
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
GUF_ASSERT(str_view.str && str_view.len > 0);
|
|
|
|
guf_str_try_reserve(str, str_view.len, err);
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
|
|
if (err && *err != GUF_ERR_NONE) {
|
|
guf_panic(*err, GUF_ERR_MSG("in guf_str_try_init: Initial allocation failed"));
|
|
return NULL;
|
|
}
|
|
GUF_ASSERT(guf_str_len_internal_(str) == 0);
|
|
GUF_ASSERT(guf_str_cap_internal_(str) >= (size_t)str_view.len);
|
|
GUF_ASSERT(!guf_str_is_readonly(str));
|
|
|
|
char *c_str_dst = guf_str_get_cstr_internal_(str);
|
|
GUF_ASSERT(c_str_dst);
|
|
memcpy(c_str_dst, str_view.str, str_view.len);
|
|
c_str_dst[str_view.len] = '\0';
|
|
guf_str_set_len_internal_(str, str_view.len);
|
|
|
|
GUF_ASSERT(!guf_str_is_readonly(str));
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_init(guf_str *str, guf_str_view str_view, guf_allocator *alloc)
|
|
{
|
|
return guf_str_try_init(str, str_view, alloc, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str guf_str_try_new(guf_str_view str_view, guf_allocator *alloc, guf_err *err)
|
|
{
|
|
guf_str str = guf_str_new_uninitialised();
|
|
guf_str_try_init(&str, str_view, alloc, err);
|
|
if (err && *err != GUF_ERR_NONE) {
|
|
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_new: failed init"));
|
|
return guf_str_new_uninitialised();
|
|
} else {
|
|
GUF_ASSERT(!guf_str_is_uninit(&str));
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str guf_str_new(guf_str_view str_view, guf_allocator *alloc)
|
|
{
|
|
return guf_str_try_new(str_view, alloc, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_try_init_from_cstr(guf_str *str, const char* c_str, guf_allocator *alloc, guf_err *err)
|
|
{
|
|
GUF_ASSERT(str);
|
|
if (!str) {
|
|
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init_from_cstr: str is NULL"));
|
|
return NULL;
|
|
} else if (!c_str) {
|
|
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init_from_cstr: c_str is NULL"));
|
|
return NULL;
|
|
} else if (!alloc || !alloc->alloc || !alloc->realloc || !alloc->free) {
|
|
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_init_from_cstr: alloc (or allocs function pointers) is/are NULL"));
|
|
return NULL;
|
|
}
|
|
|
|
const size_t len = strlen(c_str);
|
|
if (len >= PTRDIFF_MAX) {
|
|
guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in guf_str_try_init_from_cstr: stlen(c_str) >= PTRDIFF_MAX"));
|
|
return NULL;
|
|
}
|
|
|
|
guf_str_try_init(str, (guf_str_view){.str = c_str, .len = (ptrdiff_t)len}, alloc, err);
|
|
if (err && *err != GUF_ERR_NONE) {
|
|
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_init_from_cstr: guf_str_try_init failed"));
|
|
return NULL;
|
|
} else {
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_init_from_cstr(guf_str *str, const char* c_str, guf_allocator *alloc)
|
|
{
|
|
return guf_str_try_init_from_cstr(str, c_str, alloc, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str_view guf_str_to_view(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
guf_str_view sv = {
|
|
.str = guf_str_const_cstr(str),
|
|
.len = guf_str_len(str)
|
|
};
|
|
GUF_ASSERT(guf_str_view_is_valid(sv));
|
|
return sv;
|
|
}
|
|
|
|
GUF_STR_KWRDS char *guf_str_try_at(guf_str *str, ptrdiff_t idx, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
|
|
const ptrdiff_t len = guf_str_len(str);
|
|
|
|
if (idx < 0) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at: idx < 0"));
|
|
return NULL;
|
|
} else if (idx >= len) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at: idx out of range (idx >= len)"));
|
|
return NULL;
|
|
} else {
|
|
char *c_str = guf_str_try_get_cstr(str, err);
|
|
if (err && *err != GUF_ERR_NONE) {
|
|
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_at: guf_str_try_get_cstr failed (guf_str is readonly)"));
|
|
return NULL;
|
|
}
|
|
GUF_ASSERT(c_str);
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return c_str + idx;
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS char *guf_str_at(guf_str *str, ptrdiff_t idx)
|
|
{
|
|
return guf_str_try_at(str, idx, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS char *guf_str_try_back(guf_str *str, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
const ptrdiff_t len = guf_str_len(str);
|
|
if (len == 0) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_back: len == 0"));
|
|
return NULL;
|
|
} else {
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return guf_str_try_at(str, len - 1, err);
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS char *guf_str_back(guf_str *str)
|
|
{
|
|
return guf_str_try_back(str, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS char *guf_str_try_front(guf_str *str, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
const ptrdiff_t len = guf_str_len(str);
|
|
if (len == 0) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_front: len == 0"));
|
|
return NULL;
|
|
} else {
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return guf_str_try_at(str, 0, err);
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS char *guf_str_front(guf_str *str)
|
|
{
|
|
return guf_str_try_front(str, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS char guf_str_try_at_cpy(const guf_str *str, ptrdiff_t idx, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
const ptrdiff_t len = guf_str_len(str);
|
|
if (idx < 0) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at_cpy: idx < 0"));
|
|
return '\0';
|
|
} else if (idx >= len) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_at_cpy: idx out of range (idx >= len)"));
|
|
return '\0';
|
|
} else {
|
|
const char *c_str = guf_str_const_cstr(str);
|
|
GUF_ASSERT(c_str);
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return c_str[idx];
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS char guf_str_at_cpy(const guf_str *str, ptrdiff_t idx)
|
|
{
|
|
return guf_str_try_at_cpy(str, idx, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS char guf_str_try_back_cpy(const guf_str *str, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
const ptrdiff_t len = guf_str_len(str);
|
|
if (len == 0) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_back_cpy: len == 0"));
|
|
return '\0';
|
|
} else {
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return guf_str_try_at_cpy(str, len - 1, err);
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS char guf_str_back_cpy(const guf_str *str)
|
|
{
|
|
return guf_str_try_back_cpy(str, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS char guf_str_try_front_cpy(const guf_str *str, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
const ptrdiff_t len = guf_str_len(str);
|
|
if (len == 0) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_front_cpy: len == 0"));
|
|
return '\0';
|
|
} else {
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return guf_str_try_at_cpy(str, 0, err);
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS char guf_str_front_cpy(const guf_str *str)
|
|
{
|
|
return guf_str_try_front_cpy(str, NULL);
|
|
}
|
|
|
|
|
|
GUF_STR_KWRDS void guf_str_free(guf_str *str, void *ctx)
|
|
{
|
|
(void)ctx;
|
|
|
|
if (!str || guf_str_is_uninit(str)) {
|
|
return;
|
|
} else if (guf_str_is_readonly(str)) { // Don't need to de-allocate anything for read-only strings.
|
|
*str = guf_str_new_uninitialised();
|
|
return;
|
|
} else if (!guf_str_is_short(str)) { // Need to de-allocate.
|
|
GUF_ASSERT(guf_str_capacity(str) < PTRDIFF_MAX);
|
|
const ptrdiff_t cap_with_null = guf_str_capacity(str) + 1;
|
|
GUF_ASSERT((cap_with_null % 2) == 0);
|
|
char *c_str = guf_str_cstr(str);
|
|
GUF_ASSERT(str->allocator->free);
|
|
if (str->allocator->free) {
|
|
str->allocator->free(c_str, cap_with_null, str->allocator->ctx);
|
|
}
|
|
*str = guf_str_new_uninitialised();
|
|
return;
|
|
} else {
|
|
GUF_ASSERT(guf_str_is_short(str));
|
|
*str = guf_str_new_uninitialised();
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_copy(guf_str *dst, const guf_str *src, void *ctx)
|
|
{
|
|
(void)ctx;
|
|
GUF_ASSERT_RELEASE(dst);
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(src));
|
|
GUF_ASSERT_RELEASE(!guf_str_is_readonly(src)); // // Doesn't make sense to deep-cpy in readonly mode (I think).
|
|
|
|
guf_str_init_empty(dst, src->allocator);
|
|
GUF_ASSERT(guf_str_is_short_internal_(dst));
|
|
|
|
if (!guf_str_is_short_internal_(src)) {
|
|
const size_t src_cap_with_null = guf_str_cap_internal_(src) + 1;
|
|
char *dst_cstr = src->allocator->alloc(src_cap_with_null, src->allocator->ctx);
|
|
if (!dst_cstr) {
|
|
*dst = guf_str_new_uninitialised();
|
|
return NULL;
|
|
}
|
|
dst->data.lng.c_str = dst_cstr;
|
|
dst->data.lng.capacity = src->data.lng.capacity;
|
|
dst->data.lng.size = src->data.lng.size;
|
|
} else {
|
|
dst->data.shrt.size = src->data.shrt.size;
|
|
}
|
|
|
|
const size_t src_len_with_null = guf_str_len_internal_(src) + 1;
|
|
GUF_ASSERT(src_len_with_null == (guf_str_len_internal_(dst) + 1));
|
|
GUF_ASSERT(guf_str_is_short(dst) == guf_str_is_short(src));
|
|
|
|
const char *src_cstr = guf_str_const_cstr(src);
|
|
char *dst_cstr = guf_str_cstr(dst);
|
|
GUF_ASSERT(src_cstr && dst_cstr);
|
|
memcpy(dst_cstr, src_cstr, src_len_with_null);
|
|
|
|
GUF_ASSERT(guf_str_is_valid(dst));
|
|
return dst;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_move(guf_str *dst, guf_str *src, void *ctx)
|
|
{
|
|
(void)ctx;
|
|
GUF_ASSERT_RELEASE(dst);
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(src));
|
|
GUF_ASSERT_RELEASE(!guf_str_is_readonly(src)); // Doesn't make sense to move in readonly mode (I think).
|
|
|
|
*dst = *src;
|
|
*src = guf_str_new_uninitialised();
|
|
return dst;
|
|
}
|
|
|
|
GUF_STR_KWRDS bool guf_str_equal(const guf_str *a, const guf_str *b)
|
|
{
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(a) && guf_str_is_valid(b));
|
|
|
|
if (guf_str_len(a) != guf_str_len(b)) {
|
|
return false;
|
|
}
|
|
const char *a_cstr = guf_str_const_cstr(a);
|
|
const char *b_cstr = guf_str_const_cstr(b);
|
|
GUF_ASSERT(a_cstr && b_cstr);
|
|
|
|
return 0 == memcmp(a_cstr, b_cstr, guf_str_len(a));
|
|
}
|
|
|
|
GUF_STR_KWRDS int guf_str_cmp(const guf_str *a, const guf_str *b)
|
|
{
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(a) && guf_str_is_valid(b));
|
|
GUF_ASSERT_RELEASE(guf_str_is_valid(a) && guf_str_is_valid(b));
|
|
|
|
|
|
const ptrdiff_t shorter_len = guf_min_ptrdiff_t(guf_str_len(a), guf_str_len(b));
|
|
const char *a_cstr = guf_str_const_cstr(a);
|
|
const char *b_cstr = guf_str_const_cstr(b);
|
|
GUF_ASSERT(a_cstr && b_cstr);
|
|
|
|
return memcmp(a_cstr, b_cstr, shorter_len);
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_try_set(guf_str *str, guf_str_view sv, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
if (guf_str_is_readonly(str)) {
|
|
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in guf_str_try_set: guf_str is readonly"));
|
|
return NULL;
|
|
} else if (!guf_str_view_is_valid(sv)) {
|
|
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in guf_str_try_set: str_view is invalid"));
|
|
return NULL;
|
|
}
|
|
|
|
guf_str_try_reserve(str, sv.len, err);
|
|
if (err && *err != GUF_ERR_NONE) {
|
|
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_set: guf_str_try_reserve failed"));
|
|
return NULL;
|
|
}
|
|
|
|
char *c_str_dst = guf_str_cstr(str);
|
|
GUF_ASSERT(c_str_dst);
|
|
if (sv.len > 0) {
|
|
GUF_ASSERT(sv.str);
|
|
memcpy(c_str_dst, sv.str, sv.len);
|
|
}
|
|
c_str_dst[sv.len] = '\0';
|
|
guf_str_set_len_internal_(str, sv.len);
|
|
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_set(guf_str *str, guf_str_view sv)
|
|
{
|
|
return guf_str_try_set(str, sv, NULL);
|
|
}
|
|
|
|
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_try_append_char(guf_str *str, char c, ptrdiff_t times, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
|
|
if (guf_str_is_readonly(str)) {
|
|
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in guf_str_try_append_char: str is readonly"));
|
|
return NULL;
|
|
}
|
|
|
|
if (times < 0) {
|
|
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in guf_str_try_append_char: repeats < 0"));
|
|
return NULL;
|
|
} else if (times == 0) {
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
|
|
GUF_ASSERT(guf_str_len_internal_(str) <= guf_str_cap_internal_(str));
|
|
|
|
const size_t old_cap = guf_str_cap_internal_(str);
|
|
const size_t old_len = guf_str_len_internal_(str);
|
|
|
|
const size_t new_len = old_len + (size_t)times;
|
|
if (new_len <= old_len || new_len >= (size_t)PTRDIFF_MAX) { // Handle overflow.
|
|
guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in guf_str_try_append_char: new length would overflow ptrdiff_t"));
|
|
return NULL;
|
|
} else if (new_len > old_cap) { // Need to grow capacity.
|
|
guf_str_try_reserve(str, new_len, err);
|
|
if (err && *err != GUF_ERR_NONE) {
|
|
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_append_char: failed to reserve capacity"));
|
|
return NULL;
|
|
}
|
|
}
|
|
const size_t new_cap = guf_str_cap_internal_(str);
|
|
GUF_ASSERT_RELEASE(new_cap >= new_len && new_cap >= old_cap);
|
|
GUF_ASSERT(guf_str_len_internal_(str) == old_len);
|
|
GUF_ASSERT(((ptrdiff_t)new_cap - (ptrdiff_t)old_len) >= times);
|
|
|
|
char *c_str = guf_str_get_cstr_internal_(str);
|
|
for (size_t i = old_len; i < new_len; ++i) {
|
|
c_str[i] = c;
|
|
}
|
|
guf_str_set_len_internal_(str, new_len);
|
|
c_str[new_len] = '\0';
|
|
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_append_char(guf_str *str, char c, ptrdiff_t times)
|
|
{
|
|
return guf_str_try_append_char(str, c, times, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_try_append_one_char(guf_str *str, char c, guf_err *err)
|
|
{
|
|
return guf_str_try_append_char(str, c, 1, err);
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_append_one_char(guf_str *str, char c)
|
|
{
|
|
return guf_str_try_append_one_char(str, c, NULL);
|
|
}
|
|
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_try_append(guf_str *str, guf_str_view sv, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
|
|
if (!guf_str_view_is_valid(sv)) {
|
|
guf_err_set_or_panic(err, GUF_ERR_INVALID_ARG, GUF_ERR_MSG("in guf_str_try_append_view: str_view is invalid"));
|
|
return NULL;
|
|
} else if (guf_str_is_readonly(str)) {
|
|
guf_err_set_or_panic(err, GUF_ERR_NULL_PTR, GUF_ERR_MSG("in in guf_str_try_append_view: str is readonly"));
|
|
return NULL;
|
|
}
|
|
|
|
if (sv.len == 0) {
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
GUF_ASSERT(sv.str && sv.len > 0);
|
|
|
|
const size_t old_cap = guf_str_cap_internal_(str);
|
|
const size_t old_len = guf_str_len_internal_(str);
|
|
const size_t new_len = old_len + (size_t)sv.len;
|
|
if (new_len <= old_len || new_len >= (size_t)PTRDIFF_MAX) { // Handle overflow.
|
|
guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in guf_str_try_append_view: new length would overflow ptrdiff_t"));
|
|
return NULL;
|
|
} else if (new_len > old_cap) { // Growth necessary.
|
|
guf_str_try_reserve(str, new_len, err);
|
|
if (err && *err != GUF_ERR_NONE) {
|
|
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_append_view: failed to reserve capacity"));
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
const size_t new_cap = guf_str_cap_internal_(str);
|
|
GUF_ASSERT_RELEASE(new_cap >= old_cap && new_cap >= new_len);
|
|
GUF_ASSERT(((ptrdiff_t)new_cap - (ptrdiff_t)old_len) >= sv.len);
|
|
|
|
char *c_str_dst = guf_str_get_cstr_internal_(str);
|
|
for (size_t dst_i = old_len, src_i = 0; dst_i < new_len; ++dst_i, ++src_i) {
|
|
GUF_ASSERT(src_i < (size_t)sv.len);
|
|
c_str_dst[dst_i] = sv.str[src_i];
|
|
}
|
|
c_str_dst[new_len] = '\0';
|
|
guf_str_set_len_internal_(str, new_len);
|
|
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_append(guf_str *str, guf_str_view sv)
|
|
{
|
|
return guf_str_try_append(str, sv, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_try_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
|
|
const ptrdiff_t len = guf_str_len(str);
|
|
if (pos < 0 || pos >= len) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_substr: pos out of range"));
|
|
return NULL;
|
|
} else if (count < 0) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_substr: count < 0"));
|
|
return NULL;
|
|
}
|
|
|
|
char *c_str = guf_str_try_get_cstr(str, err);
|
|
if (err && *err != GUF_ERR_NONE) {
|
|
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_substr: string is readonly (guf_str_try_get_cstr() failed)"));
|
|
return NULL;
|
|
}
|
|
GUF_ASSERT(c_str);
|
|
|
|
const ptrdiff_t pos_plus_count = guf_add_is_overflow_ptrdiff(pos, count) ? PTRDIFF_MAX : pos + count;
|
|
const ptrdiff_t substr_len = pos_plus_count > len ? len - pos : count;
|
|
GUF_ASSERT(substr_len >= 0 && substr_len <= len && substr_len <= guf_str_capacity(str));
|
|
GUF_ASSERT((size_t)pos + (size_t)(substr_len) <= (size_t)len); // [*]
|
|
|
|
if (pos > 0) {
|
|
for (ptrdiff_t i = 0; i < substr_len; ++i) {
|
|
// GUF_ASSERT(pos + i < len); // cf. [*]
|
|
c_str[i] = c_str[pos + i];
|
|
}
|
|
}
|
|
c_str[substr_len] = '\0';
|
|
guf_str_set_len_internal_(str, substr_len);
|
|
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return str;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_substr(guf_str *str, ptrdiff_t pos, ptrdiff_t count)
|
|
{
|
|
return guf_str_try_substr(str, pos, count, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS char guf_str_try_pop_back(guf_str *str, guf_err *err)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
|
|
const ptrdiff_t len = guf_str_len(str);
|
|
if (len <= 0) {
|
|
guf_err_set_or_panic(err, GUF_ERR_IDX_RANGE, GUF_ERR_MSG("in guf_str_try_pop_back: len <= 0"));
|
|
return '\0';
|
|
}
|
|
GUF_ASSERT(len - 1 >= 0);
|
|
const char last = guf_str_at_cpy(str, len - 1);
|
|
guf_str_try_substr(str, 0, len - 1, err);
|
|
if (err && *err != GUF_ERR_NONE) {
|
|
guf_err_set_or_panic(err, *err, GUF_ERR_MSG("in guf_str_try_pop_back: guf_str_try_substr failed"));
|
|
return '\0';
|
|
} else {
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
GUF_ASSERT(guf_str_len(str) == len - 1);
|
|
guf_err_set_if_not_null(err, GUF_ERR_NONE);
|
|
return last;
|
|
}
|
|
}
|
|
|
|
GUF_STR_KWRDS char guf_str_pop_back(guf_str *str)
|
|
{
|
|
return guf_str_try_pop_back(str, NULL);
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_hash_size_t guf_str_hash(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
return guf_str_view_hash(&(guf_str_view){.str = guf_str_const_cstr(str), .len = guf_str_len(str)});
|
|
}
|
|
|
|
GUF_STR_KWRDS uint64_t guf_str_hash64(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
return guf_str_view_hash64(&(guf_str_view){.str = guf_str_const_cstr(str), .len = guf_str_len(str)});
|
|
}
|
|
|
|
GUF_STR_KWRDS uint32_t guf_str_hash32(const guf_str *str)
|
|
{
|
|
GUF_ASSERT(guf_str_is_valid(str));
|
|
return guf_str_view_hash32(&(guf_str_view){.str = guf_str_const_cstr(str), .len = guf_str_len(str)});
|
|
}
|
|
|
|
|
|
// guf_str_view:
|
|
|
|
GUF_STR_KWRDS bool guf_str_view_is_valid(guf_str_view sv)
|
|
{
|
|
if (sv.str) {
|
|
return sv.len >= 0;
|
|
} else {
|
|
return sv.len == 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
cf. "str_pop_first_split":
|
|
- https://accu.org/conf-docs/PDFs_2021/luca_sass_modern_c_and_what_we_can_learn_from_it.pdf ("String handling in Modern C", page 128 of the pdf)
|
|
- https://youtu.be/QpAhX-gsHMs?si=lCvm6o60LrYHaAHc&t=3059 (last-retrieved 2025-04-30)
|
|
*/
|
|
|
|
GUF_STR_KWRDS guf_str_view guf_str_view_pop_split(guf_str_view *src, guf_str_view delim)
|
|
{
|
|
GUF_ASSERT(src);
|
|
GUF_ASSERT_RELEASE(guf_str_view_is_valid(*src));
|
|
GUF_ASSERT_RELEASE(guf_str_view_is_valid(delim));
|
|
|
|
if (delim.len <= 0) {
|
|
goto delim_not_found;
|
|
}
|
|
|
|
for (ptrdiff_t src_idx = 0; src_idx < src->len; ++src_idx) {
|
|
ptrdiff_t num_matched = 0;
|
|
for (ptrdiff_t delim_idx = 0; delim_idx < delim.len && (src_idx + delim.len <= src->len); ++delim_idx) {
|
|
if (delim.str[delim_idx] != src->str[src_idx + delim_idx]) {
|
|
break;
|
|
}
|
|
++num_matched;
|
|
}
|
|
if (num_matched == delim.len) { // Delimiter found in interval [src_idx, src_idx + delim.len)
|
|
const guf_str_view popped = guf_str_view_substr(*src, 0, src_idx);
|
|
const ptrdiff_t advance_len = popped.len + delim.len;
|
|
GUF_ASSERT(advance_len > 0 && advance_len >= delim.len);
|
|
src->len -= advance_len;
|
|
GUF_ASSERT(src->len >= 0);
|
|
src->str = src->len > 0 ? src->str + advance_len : NULL;
|
|
return popped;
|
|
}
|
|
}
|
|
|
|
delim_not_found:;
|
|
const guf_str_view popped = *src;
|
|
src->str = NULL;
|
|
src->len = 0;
|
|
return popped;
|
|
}
|
|
|
|
static inline int guf_str_view_cmp_asc_by_len_(const void *a, const void *b)
|
|
{
|
|
const guf_str_view *asv = (const guf_str_view*)a;
|
|
const guf_str_view *bsv = (const guf_str_view*)b;
|
|
if (asv->len < bsv->len) {
|
|
return -1;
|
|
} else if (asv->len > bsv->len) {
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static inline int guf_str_view_cmp_desc_by_len_(const void *a, const void *b)
|
|
{
|
|
return -guf_str_view_cmp_asc_by_len_(a, b);
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str_tok_state guf_str_tok_state_new(guf_str_view str, guf_str_view *delims, ptrdiff_t delim_count, guf_str_tok_delim_opt delim_match_opt)
|
|
{
|
|
GUF_ASSERT_RELEASE(guf_str_view_is_valid(str));
|
|
GUF_ASSERT_RELEASE(delim_count > 0 ? delims != NULL : true);
|
|
|
|
ptrdiff_t max_delim_len = 0;
|
|
if (delims && delim_count > 0) {
|
|
for (ptrdiff_t i = 0; i < delim_count; ++i) {
|
|
GUF_ASSERT_RELEASE(guf_str_view_is_valid(delims[i]));
|
|
max_delim_len = guf_max_ptrdiff_t(max_delim_len, delims[i].len);
|
|
}
|
|
}
|
|
if (max_delim_len <= 0 || delim_count <= 0 || delims == NULL) {
|
|
delims = NULL;
|
|
delim_count = 0;
|
|
} else {
|
|
switch (delim_match_opt) {
|
|
case GUF_STR_TOK_DELIM_OPT_MATCH_LONGEST:
|
|
qsort(delims, delim_count, sizeof(delims[0]), guf_str_view_cmp_desc_by_len_);
|
|
break;
|
|
case GUF_STR_TOK_DELIM_OPT_MATCH_SHORTEST:
|
|
qsort(delims, delim_count, sizeof(delims[0]), guf_str_view_cmp_asc_by_len_);
|
|
break;
|
|
case GUF_STR_TOK_DELIM_OPT_MATCH_IN_ORDER:
|
|
break;
|
|
default:
|
|
GUF_ASSERT(false);
|
|
break;
|
|
}
|
|
}
|
|
return (guf_str_tok_state) {
|
|
.input = str,
|
|
.delims = delims,
|
|
.delim_count = delim_count,
|
|
.num_toks_read = 0,
|
|
.num_delims_read = 0,
|
|
.cur_tok = (guf_str_view){.len = 0, .str = NULL},
|
|
.cur_delim = (guf_str_view){.len = 0, .str = NULL},
|
|
.done = false
|
|
};
|
|
}
|
|
|
|
GUF_STR_KWRDS bool guf_str_tok_next(guf_str_tok_state *state, bool preserve_delims)
|
|
{
|
|
GUF_ASSERT(state);
|
|
GUF_ASSERT(guf_str_view_is_valid(state->input));
|
|
GUF_ASSERT(state->num_toks_read >= 0 && state->num_delims_read >= 0 && state->delim_count >= 0);
|
|
GUF_ASSERT(state->delim_count > 0 ? state->delims != NULL : true);
|
|
|
|
if (state->done || state->input.len <= 0 || state->input.str == NULL) {
|
|
state->done = true;
|
|
state->cur_tok = (guf_str_view){.str = NULL, .len = 0};
|
|
state->cur_delim = (guf_str_view){.str = NULL, .len = 0};
|
|
return false;
|
|
} else if (state->delim_count <= 0 || state->delims == NULL) {
|
|
state->done = true;
|
|
state->cur_tok = state->input;
|
|
state->cur_delim = (guf_str_view){.str = NULL, .len = 0};
|
|
return state->cur_tok.len > 0;
|
|
}
|
|
|
|
find_next_delim_begin:
|
|
state->cur_tok = state->cur_delim = (guf_str_view){.str = state->input.str, .len = 0};
|
|
while (state->input.len > 0) {
|
|
GUF_ASSERT(state->input.str != NULL);
|
|
for (ptrdiff_t delim_idx = 0; delim_idx < state->delim_count; ++delim_idx) { // If state->delims is sorted descending/ascending by length, match the longest/shortest possible delim
|
|
const guf_str_view delim = state->delims[delim_idx];
|
|
GUF_ASSERT(guf_str_view_is_valid(delim));
|
|
if (delim.len > state->input.len || delim.len <= 0) { // Current delim cannot possibly match.
|
|
continue;
|
|
}
|
|
const guf_str_view delim_candidate = guf_str_view_substr(state->input, 0, delim.len);
|
|
if (guf_str_view_equal(&delim_candidate, &delim)) { // a) Matched the current delim:
|
|
GUF_ASSERT(state->input.len >= delim.len);
|
|
GUF_ASSERT(state->cur_tok.len >= 0);
|
|
state->cur_delim = delim;
|
|
state->num_delims_read += 1;
|
|
|
|
state->input.len -= delim.len;
|
|
state->input.str = state->input.len > 0 ? state->input.str + delim.len : NULL;
|
|
|
|
if (!preserve_delims && state->cur_tok.len == 0) {
|
|
goto find_next_delim_begin;
|
|
}
|
|
|
|
state->num_toks_read += state->cur_tok.len > 0 ? 1 : 0;
|
|
state->done = state->input.len <= 0;
|
|
GUF_ASSERT(state->cur_tok.len > 0 || state->cur_delim.len > 0);
|
|
return true;
|
|
}
|
|
}
|
|
// b) Could not match any of the delims:
|
|
state->cur_tok.len += 1;
|
|
state->input.len -= 1;
|
|
state->input.str = state->input.len > 0 ? state->input.str + 1 : NULL;
|
|
}
|
|
|
|
state->done = true;
|
|
GUF_ASSERT(state->cur_tok.len >= 0);
|
|
state->cur_delim = (guf_str_view){.str = NULL, .len = 0};
|
|
if (state->cur_tok.len > 0) {
|
|
state->num_toks_read += 1;
|
|
}
|
|
return state->cur_tok.len > 0;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str_view guf_str_view_trim_left_ascii(guf_str_view sv)
|
|
{
|
|
if (sv.len <= 0 || sv.str == NULL) {
|
|
return sv;
|
|
}
|
|
|
|
for (; sv.len > 0 && guf_char_isspace_ascii(*sv.str); --sv.len, ++sv.str)
|
|
;
|
|
|
|
GUF_ASSERT(sv.len >= 0);
|
|
GUF_ASSERT(sv.len == 0 || !guf_char_isspace_ascii(*sv.str));
|
|
|
|
return sv;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str_view guf_str_view_trim_right_ascii(guf_str_view sv)
|
|
{
|
|
if (sv.len <= 0 || sv.str == NULL) {
|
|
return sv;
|
|
}
|
|
|
|
for (; sv.len > 0 && guf_char_isspace_ascii(sv.str[sv.len - 1]); --sv.len)
|
|
;
|
|
|
|
GUF_ASSERT(sv.len >= 0);
|
|
GUF_ASSERT(sv.len == 0 || !guf_char_isspace_ascii(sv.str[sv.len - 1]));
|
|
|
|
return sv;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_str_view guf_str_view_substr(guf_str_view str, ptrdiff_t pos, ptrdiff_t count)
|
|
{
|
|
GUF_ASSERT_RELEASE(str.str);
|
|
GUF_ASSERT_RELEASE(pos >= 0);
|
|
GUF_ASSERT_RELEASE(count >= 0);
|
|
|
|
if (str.len == 0 || count == 0 || pos >= str.len || pos < 0 || str.str == NULL) {
|
|
return (guf_str_view){.str = str.str, .len = 0};
|
|
}
|
|
|
|
const ptrdiff_t pos_plus_count = guf_add_is_overflow_ptrdiff(pos, count) ? PTRDIFF_MAX : pos + count;
|
|
const ptrdiff_t substr_len = pos_plus_count > str.len ? str.len - pos : count;
|
|
GUF_ASSERT(substr_len >= 0);
|
|
GUF_ASSERT(substr_len <= str.len);
|
|
|
|
const guf_str_view sub_sv = {.str = str.str + pos, .len = substr_len};
|
|
GUF_ASSERT(guf_str_view_is_valid(sub_sv));
|
|
return sub_sv;
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv)
|
|
{
|
|
GUF_ASSERT(sv);
|
|
if (!sv->str || sv->len <= 0) {
|
|
return GUF_HASH_INIT;
|
|
}
|
|
return guf_hash(sv->str, sv->len, GUF_HASH_INIT);
|
|
}
|
|
|
|
GUF_STR_KWRDS uint64_t guf_str_view_hash64(const guf_str_view *sv)
|
|
{
|
|
GUF_ASSERT(sv);
|
|
if (!sv->str || sv->len <= 0) {
|
|
return GUF_HASH64_INIT;
|
|
}
|
|
return guf_hash64(sv->str, sv->len, GUF_HASH64_INIT);
|
|
}
|
|
|
|
GUF_STR_KWRDS uint32_t guf_str_view_hash32(const guf_str_view *sv)
|
|
{
|
|
GUF_ASSERT(sv);
|
|
if (!sv->str || sv->len <= 0) {
|
|
return GUF_HASH32_INIT;
|
|
}
|
|
return guf_hash32(sv->str, sv->len, GUF_HASH32_INIT);
|
|
}
|
|
|
|
GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b)
|
|
{
|
|
GUF_ASSERT(a && b);
|
|
if (a->len != b->len) {
|
|
return false;
|
|
}
|
|
|
|
if ((!a->str && b->str) || (!b->str && a->str)) {
|
|
return false;
|
|
} else if (!a->str && !b->str) {
|
|
return a->len == b->len;
|
|
}
|
|
GUF_ASSERT(a->str && b->str);
|
|
|
|
if (a->len <= 0) {
|
|
return true;
|
|
}
|
|
|
|
return 0 == memcmp(a->str, b->str, a->len);
|
|
}
|
|
|
|
GUF_STR_KWRDS bool guf_str_view_equal_val_arg(guf_str_view a_val, guf_str_view b_val)
|
|
{
|
|
return guf_str_view_equal(&a_val, &b_val);
|
|
}
|
|
|
|
#undef GUF_STR_IMPL
|
|
#undef GUF_STR_IMPL_STATIC
|
|
#endif /* end impl */
|
|
|
|
#undef GUF_STR_KWRDS
|