diff --git a/src/guf_utf8.h b/src/guf_utf8.h index 9d10f0d..eb6253b 100644 --- a/src/guf_utf8.h +++ b/src/guf_utf8.h @@ -16,7 +16,7 @@ // Corresponds to one unicode codepoint (NOTE: one guf_utf8_char does not necessarily correspond to one printable character, e.g. combining characters). typedef struct guf_utf8_char { - char bytes[5]; + char bytes[4]; } guf_utf8_char; typedef enum guf_utf8_stat { @@ -68,7 +68,7 @@ const char* const GUF_UTF8_COMMON_PUNCT[32] = ".", ",", ";", ":", "(", ")", "[", "]", "!", "?", "¿", "¡", "&", "+", "-", "/", "*", "\"", "'", "„", "“", "´", "»", "«", "`", "\\", "%", "‒", "–", "—", "—", "_" }; -const guf_utf8_char GUF_UTF8_REPLACEMENT_CHAR = {.bytes = {'\xEF','\xBF','\xBD', '\0', '\0'}}; +const guf_utf8_char GUF_UTF8_REPLACEMENT_CHAR = {.bytes = {'\xEF','\xBF','\xBD', '\0'}}; #ifndef GUF_FN_KEYWORDS #define GUF_FN_KEYWORDS diff --git a/src/test/test_utf8.cpp b/src/test/test_utf8.cpp index ff80d0d..6a80789 100644 --- a/src/test/test_utf8.cpp +++ b/src/test/test_utf8.cpp @@ -317,35 +317,31 @@ void UTF8Test::encode_decode() TEST_CHECK(guf_utf8_encode(&utf8, 0x1F308)); // "🌈" (Rainbow) TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\x8C' && utf8.bytes[3] == '\x88'); - TEST_CHECK(utf8.bytes[4] == '\0'); TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F308); TEST_CHECK(guf_utf8_encode(&utf8, 0x130B8)); // "𓂸" (Egyptian Hieroglyph D052) TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x93' && utf8.bytes[2] == '\x82' && utf8.bytes[3] == '\xB8'); - TEST_CHECK(utf8.bytes[4] == '\0'); TEST_CHECK(guf_utf8_decode(&utf8) == 0x130B8); TEST_CHECK(guf_utf8_encode(&utf8, 0x1F97A)); // "🥺" (Face with Pleading Eyes) TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\xA5' && utf8.bytes[3] == '\xBA'); - TEST_CHECK(utf8.bytes[4] == '\0'); TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F97A); TEST_CHECK(guf_utf8_encode(&utf8, 0x1F980)); // "🦀" (Crab) TEST_CHECK(guf_utf8_char_num_bytes(&utf8) == 4); TEST_CHECK(utf8.bytes[0] == '\xF0' && utf8.bytes[1] == '\x9F' && utf8.bytes[2] == '\xA6' && utf8.bytes[3] == '\x80'); - TEST_CHECK(utf8.bytes[4] == '\0'); TEST_CHECK(guf_utf8_decode(&utf8) == 0x1F980); // Invalid characters: - utf8 = {.bytes = {'\xC0', '\x80', 0, 0, 0}}; + utf8 = {.bytes = {'\xC0', '\x80', 0, 0}}; TEST_CHECK(guf_utf8_decode(&utf8) < 0); - utf8 = {.bytes = {'\xC0', 0, 0, 0, 0}}; + utf8 = {.bytes = {'\xC0', 0, 0, 0}}; TEST_CHECK(guf_utf8_decode(&utf8) < 0); - utf8 = {.bytes = {'\x80', 0, 0, 0, 0}}; + utf8 = {.bytes = {'\x80', 0, 0, 0}}; TEST_CHECK(guf_utf8_decode(&utf8) < 0); // "The definition of UTF-8 prohibits encoding character numbers between U+D800 and U+DFFF" (surrogate pairs). diff --git a/todo.txt b/todo.txt index 8d7a5a7..8070cb8 100644 --- a/todo.txt +++ b/todo.txt @@ -1,7 +1,5 @@ - fix readonly str/uninit ? -- make guf_utf8_char 4 bytes (non-null terminated) - - guf_stack, guf_queue, guf_dqueue, guf_prio_queue (using a heap), guf_ringbuf - sort: add cpp #ifdef to remove restrict from declaration