235 lines
8.4 KiB
C
235 lines
8.4 KiB
C
#if defined(GUF_STR_IMPL_STATIC)
|
|
#define GUF_STR_KWRDS static
|
|
#else
|
|
#define GUF_STR_KWRDS
|
|
#endif
|
|
|
|
#ifndef GUF_STR_H
|
|
#define GUF_STR_H
|
|
#include "guf_common.h"
|
|
#include "guf_alloc.h"
|
|
#include "guf_str_view_type.h"
|
|
#include "guf_utf8.h"
|
|
#include "guf_hash.h"
|
|
|
|
typedef enum guf_str_state {
|
|
GUF_STR_STATE_INIT = 0,
|
|
GUF_STR_STATE_SHORT = 1,
|
|
GUF_STR_STATE_VIEW = 2,
|
|
GUF_STR_STATE_ALLOC_ERR = 4
|
|
} guf_str_state;
|
|
|
|
typedef struct guf_str {
|
|
union {
|
|
struct heap {
|
|
char *c_str;
|
|
size_t len, capacity; // len and capacity do not include the null-terminator.
|
|
} heap;
|
|
struct stack { // Short-string optimisation.
|
|
#define GUF_STR_SSO_BUFSIZE (sizeof(struct heap) - sizeof(unsigned char))
|
|
#define GUF_STR_SSO_BUFCAP (GUF_STR_SSO_BUFSIZE - 1)
|
|
char c_str[GUF_STR_SSO_BUFSIZE];
|
|
unsigned char len;
|
|
} stack;
|
|
} data;
|
|
guf_allocator *allocator;
|
|
guf_str_state state;
|
|
} guf_str;
|
|
|
|
|
|
#define GUF_CSTR_TO_VIEW(CSTR) ((guf_str_view){.str = (CSTR), .len = strlen((CSTR))})
|
|
#define GUF_STR_TO_VIEW(GUF_STR_PTR) ((guf_str_view){.str = guf_str_const_cstr((GUF_STR_PTR)), .len = guf_str_len((GUF_STR_PTR))})
|
|
#define GUF_CSTR_TO_READONLY_STR(CSTR) ((guf_str){.state = GUF_STR_STATE_VIEW, .allocator = NULL, .data.heap.c_str = CSTR, .data.heap.len = strlen(CSTR), .data.heap.capacity = 0})
|
|
|
|
// Creation:
|
|
GUF_STR_KWRDS guf_str *guf_str_init(guf_str *str, guf_str_view str_view);
|
|
GUF_STR_KWRDS guf_str *guf_str_init_from_cstr(guf_str *str, const char* c_str);
|
|
GUF_STR_KWRDS guf_str *guf_str_init_empty_with_capacity(guf_str *str, size_t capacity);
|
|
// guf_str_new functions return GUF_DICT_UNINITIALISED or GUF_STR_UNINITIALISED_FAILED_ALLOC on failure (can be checked with guf_str_alloc_success)
|
|
GUF_STR_KWRDS guf_str guf_str_new(guf_str_view str_view);
|
|
GUF_STR_KWRDS guf_str guf_str_new_substr(guf_str_view str_view, ptrdiff_t pos, ptrdiff_t len);
|
|
|
|
GUF_STR_KWRDS guf_str guf_str_new_from_cstr(const char *c_str);
|
|
GUF_STR_KWRDS guf_str guf_str_new_empty_with_capacity(size_t capacity);
|
|
|
|
// Destruction:
|
|
GUF_STR_KWRDS void guf_str_free(guf_str *str);
|
|
|
|
// Modification:
|
|
GUF_STR_KWRDS guf_str *guf_str_append(guf_str *str, guf_str_view to_append);
|
|
GUF_STR_KWRDS guf_str *guf_str_append_cstr(guf_str *str, const char *cstr_to_append); // Not necessary
|
|
GUF_STR_KWRDS guf_str *guf_str_substr(guf_str* str, size_t pos, size_t count);
|
|
|
|
GUF_STR_KWRDS guf_str *guf_str_reserve(guf_str *str, size_t bufsize);
|
|
GUF_STR_KWRDS guf_str *guf_str_shrink_capacity(guf_str *str, size_t shrink_trigger_fac, bool shrink_exact);
|
|
|
|
GUF_STR_KWRDS char guf_str_pop_back(guf_str *str);
|
|
GUF_STR_KWRDS char guf_str_pop_front(guf_str *str);
|
|
|
|
// Copying and viewing:
|
|
GUF_STR_KWRDS guf_str guf_str_substr_cpy(guf_str_view str, size_t pos, size_t count); // not necessary
|
|
GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrdiff_t count);
|
|
|
|
// Tokenising/Iterating.
|
|
GUF_STR_KWRDS guf_str_view guf_str_next_tok(guf_str_view *input, const guf_str_view *delims, ptrdiff_t num_delims, const guf_str_view *preserved_delims, ptrdiff_t num_preserved_delims);
|
|
|
|
// Indexing:
|
|
GUF_STR_KWRDS char *guf_str_at(guf_str *str, size_t idx);
|
|
GUF_STR_KWRDS char *guf_str_back(guf_str *str);
|
|
GUF_STR_KWRDS char *guf_str_front(guf_str *str);
|
|
GUF_STR_KWRDS const char *guf_str_const_cstr(const guf_str *str);
|
|
|
|
// Metadata retrieval:
|
|
GUF_STR_KWRDS size_t guf_str_len(const guf_str *str); // The size (in chars) without the final zero-terminator (size - 1).
|
|
GUF_STR_KWRDS size_t guf_str_capacity(const guf_str *str);
|
|
GUF_STR_KWRDS bool guf_str_is_stack_allocated(const guf_str *str);
|
|
GUF_STR_KWRDS bool guf_str_is_valid(const guf_str *str);
|
|
GUF_STR_KWRDS bool guf_str_alloc_success(const guf_str *str);
|
|
|
|
GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv);
|
|
|
|
// Comparison:
|
|
GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b);
|
|
GUF_STR_KWRDS bool guf_str_equal(const guf_str *a, const guf_str *b);
|
|
GUF_STR_KWRDS bool guf_str_equals_cstr(const guf_str *a, const char *c_str);
|
|
GUF_STR_KWRDS bool guf_str_equals_strview(const guf_str *a, guf_str_view b);
|
|
GUF_STR_KWRDS int guf_str_view_cmp(const void *str_view_a, const void *str_view_b); // For qsort etc.
|
|
|
|
#endif
|
|
|
|
#if defined(GUF_STR_IMPL) || defined(GUF_STR_IMPL_STATIC)
|
|
|
|
#include <string.h>
|
|
|
|
#ifdef GUF_STR_IMPL
|
|
#define GUF_UTF8_IMPL
|
|
#else
|
|
#define GUF_UTF8_IMPL_STATIC
|
|
#endif
|
|
#include "guf_utf8.h"
|
|
|
|
GUF_STR_KWRDS guf_str_view guf_str_next_tok(guf_str_view *input, const guf_str_view *delims, ptrdiff_t num_delims, const guf_str_view *preserved_delims, ptrdiff_t num_preserved_delims)
|
|
{
|
|
if (input->len <= 0 || input->str == NULL) {
|
|
return (guf_str_view){.str = NULL, .len = 0};
|
|
}
|
|
|
|
ptrdiff_t max_delim_len = -1;
|
|
for (ptrdiff_t i = 0; i < num_delims; ++i) {
|
|
if (delims[i].len > max_delim_len) {
|
|
max_delim_len = delims[i].len;
|
|
}
|
|
}
|
|
|
|
guf_str_view tok = {.str = input->str, .len = 0};
|
|
guf_str_view prev_input = *input;
|
|
guf_utf8_char ch = {0};
|
|
|
|
for (guf_utf8_stat stat = guf_utf8_char_next(&ch, input); stat != GUF_UTF8_READ_DONE; stat = guf_utf8_char_next(&ch, input)) {
|
|
if (stat != GUF_UTF8_READ_VALID) {
|
|
prev_input = *input;
|
|
continue;
|
|
}
|
|
|
|
const int num_bytes = guf_utf8_char_num_bytes(&ch);
|
|
|
|
for (ptrdiff_t delim_len = GUF_MIN(max_delim_len, prev_input.len); delim_len > 0; --delim_len) {
|
|
guf_str_view delim_candidate = guf_substr_view(prev_input, 0, delim_len);
|
|
for (ptrdiff_t delim_i = 0; delim_i < num_delims; ++delim_i) {
|
|
if (guf_str_view_equal(&delim_candidate, delims + delim_i)) { // Found delim.
|
|
bool preserved = false;
|
|
if (preserved_delims && num_preserved_delims > 0) {
|
|
for (ptrdiff_t preserved_i = 0; preserved_i < num_preserved_delims; ++preserved_i) {
|
|
if (guf_str_view_equal(&delim_candidate, preserved_delims + preserved_i)) {
|
|
preserved = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (!preserved) {
|
|
input->len = prev_input.len - delim_len;
|
|
input->str = prev_input.len > 0 ? prev_input.str + delim_len : NULL;
|
|
GUF_ASSERT(input->len >= 0);
|
|
} else {
|
|
input->str -= num_bytes;
|
|
input->len += num_bytes;
|
|
}
|
|
|
|
if (tok.len == 0) {
|
|
if (preserved) {
|
|
input->str += num_bytes;
|
|
input->len -= num_bytes;
|
|
return delim_candidate;
|
|
}
|
|
tok.str = input->str;
|
|
goto end;
|
|
} else {
|
|
return tok;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
tok.len += num_bytes;
|
|
|
|
end:;
|
|
prev_input = *input;
|
|
}
|
|
|
|
return tok;
|
|
}
|
|
|
|
// TODO: find_first_of and tokenise -> for parsing, see aoclib.
|
|
|
|
GUF_STR_KWRDS guf_str_view guf_substr_view(guf_str_view str, ptrdiff_t pos, ptrdiff_t count)
|
|
{
|
|
GUF_ASSERT(str.str);
|
|
GUF_ASSERT(pos >= 0);
|
|
GUF_ASSERT(count >= 0);
|
|
|
|
if (str.len == 0 || count == 0 || pos >= str.len || str.str == NULL) {
|
|
return (guf_str_view){.str = str.str, .len = 0};
|
|
}
|
|
|
|
const ptrdiff_t substr_len = pos + count > str.len ? str.len - pos : count;
|
|
GUF_ASSERT(substr_len >= 0);
|
|
GUF_ASSERT(substr_len <= str.len);
|
|
|
|
return (guf_str_view){.str = str.str + pos, .len = substr_len};
|
|
}
|
|
|
|
GUF_STR_KWRDS guf_hash_size_t guf_str_view_hash(const guf_str_view *sv)
|
|
{
|
|
GUF_ASSERT(sv);
|
|
if (!sv->str || sv->len <= 0) {
|
|
return GUF_HASH_INIT;
|
|
}
|
|
|
|
return guf_hash(sv->str, sv->len, GUF_HASH_INIT);
|
|
}
|
|
|
|
// Comparison:
|
|
GUF_STR_KWRDS bool guf_str_view_equal(const guf_str_view* a, const guf_str_view* b)
|
|
{
|
|
GUF_ASSERT_RELEASE(a && b);
|
|
if (a->len != b->len) {
|
|
return false;
|
|
}
|
|
|
|
if ((!a->str && b->str) || (!b->str && a->str)) {
|
|
return false;
|
|
} else if (!a->str && !b->str) {
|
|
return a->len == b->len;
|
|
}
|
|
|
|
GUF_ASSERT_RELEASE(a->len >= 0);
|
|
|
|
return 0 == memcmp(a->str, b->str, a->len);
|
|
}
|
|
|
|
|
|
#undef GUF_STR_IMPL
|
|
#undef GUF_STR_IMPL_STATIC
|
|
#endif /* end impl */
|
|
|
|
#undef GUF_STR_KWRDS
|