Fix allocation overflow for guf_dbuf and guf_dict

(Implement GUF_ALLOC_MAX_BYTES and GUF_ALLOC_MAX_CAPACITY).
2025-03-18 01:56:45 +01:00 · 2025-03-18 01:56:45 +01:00 · e535d39e3d
commit e535d39e3d
parent cc0413116d
6 changed files with 170 additions and 63 deletions
--- a/src/guf_alloc.h
+++ b/src/guf_alloc.h
@ -17,28 +17,11 @@ typedef enum guf_alloc_fn_type {
    GUF_ALLOC_FN_TYPE_FREE,
 } guf_alloc_fn_type;

-static inline bool guf_size_t_mul_is_overflow(size_t a, size_t b) 
-{
-    const size_t c = a * b; 
-    return a != 0 && ((c / a) != b);    
-}
-
-static inline bool guf_size_calc_safe(ptrdiff_t count, ptrdiff_t sizeof_elem, ptrdiff_t *result)
-{
-    if (count < 0 || sizeof_elem <= 0) {
-        return false;
-    }
-    if (guf_size_t_mul_is_overflow((size_t)count, (size_t)sizeof_elem)) {
-        return false; 
-    } 
-    const size_t size = (size_t)count * (size_t)sizeof_elem;
-    GUF_ASSERT(size != 0);
-
-    const bool is_safe = size <= PTRDIFF_MAX;
-    if (result) {
-        *result = is_safe ? (ptrdiff_t)size : -1;
-    }
-    return is_safe;
-}
+/*
+    GUF_ALLOC_MAX_BYTES:    Largest number of bytes an allocated buffer of elements of TYPE can have.
+    GUF_ALLOC_MAX_CAPACITY: Largest number of elements an allocated buffer of elements of TYPE can have.
+*/
+#define GUF_ALLOC_MAX_BYTES(TYPE)    ( PTRDIFF_MAX - ( PTRDIFF_MAX % sizeof(TYPE) ) )
+#define GUF_ALLOC_MAX_CAPACITY(TYPE) ( GUF_ALLOC_MAX_BYTES(TYPE) / sizeof(TYPE) )

 #endif
--- a/src/guf_dbuf.h
+++ b/src/guf_dbuf.h
@ -92,6 +92,8 @@ typedef struct GUF_CAT(GUF_DBUF_NAME, _iter) {

 GUF_DBUF_KWRDS bool GUF_CAT(GUF_DBUF_NAME, _valid)(const GUF_DBUF_NAME* dbuf);

+GUF_DBUF_KWRDS ptrdiff_t GUF_CAT(GUF_DBUF_NAME, _max_capacity)(void);
+
 GUF_DBUF_KWRDS void GUF_CAT(GUF_DBUF_NAME, _try_reserve)(GUF_DBUF_NAME *dbuf, ptrdiff_t min_capacity, guf_err *err);
 GUF_DBUF_KWRDS void GUF_CAT(GUF_DBUF_NAME, _reserve)(GUF_DBUF_NAME *dbuf, ptrdiff_t min_capacity);

@ -169,6 +171,8 @@ GUF_DBUF_KWRDS GUF_CAT(GUF_DBUF_NAME, _iter) GUF_CAT(GUF_DBUF_NAME, _find_if)(GU

 #if defined(GUF_DBUF_IMPL) || defined(GUF_DBUF_IMPL_STATIC)

+#include "guf_math.h"
+
 GUF_DBUF_KWRDS bool GUF_CAT(GUF_DBUF_NAME, _valid)(const GUF_DBUF_NAME* dbuf) 
 {
    if (!dbuf) {
@ -179,6 +183,18 @@ GUF_DBUF_KWRDS bool GUF_CAT(GUF_DBUF_NAME, _valid)(const GUF_DBUF_NAME* dbuf)
    return valid_data_ptr && valid_allocator && dbuf->capacity >= 0 && dbuf->size >= 0 && dbuf->size <= dbuf->capacity;
 }

+GUF_DBUF_KWRDS ptrdiff_t GUF_CAT(GUF_DBUF_NAME, _max_capacity)(void)
+{
+    ptrdiff_t DBUF_MAX_BYTES = GUF_ALLOC_MAX_BYTES(GUF_T);
+    GUF_ASSERT((DBUF_MAX_BYTES % sizeof(GUF_T)) == 0);
+    (void)DBUF_MAX_BYTES;
+
+    const ptrdiff_t DBUF_MAX_CAP = GUF_ALLOC_MAX_CAPACITY(GUF_T);
+    GUF_ASSERT(DBUF_MAX_CAP <= PTRDIFF_MAX);
+    return DBUF_MAX_CAP;
+}
+
+
 GUF_DBUF_KWRDS void GUF_CAT(GUF_DBUF_NAME, _try_reserve)(GUF_DBUF_NAME *dbuf, ptrdiff_t min_capacity, guf_err *err) 
 {
    GUF_ASSERT_RELEASE(GUF_CAT(GUF_DBUF_NAME, _valid)(dbuf));
@ -190,9 +206,18 @@ GUF_DBUF_KWRDS void GUF_CAT(GUF_DBUF_NAME, _try_reserve)(GUF_DBUF_NAME *dbuf, pt
    }
    GUF_ASSERT(min_capacity > 0);

+    const ptrdiff_t DBUF_MAX_CAP = GUF_CAT(GUF_DBUF_NAME, _max_capacity)();
+    min_capacity = GUF_MIN(min_capacity, DBUF_MAX_CAP);
+
+    if (min_capacity <= dbuf->capacity) {
+        guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dbuf_try_reserve: already at max capacity"));
+        return;
+    }
+
    ptrdiff_t new_alloc_bytes = -1;
    if (!guf_size_calc_safe(min_capacity, sizeof(GUF_T), &new_alloc_bytes)) { 
-        guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dbuf_try_reserve: overflow of ptrdiff_t"));
+        GUF_ASSERT_RELEASE(false);
+        guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dbuf_try_reserve: overflow in size calculation (BUG: this should never happen...)"));
        return;
    } 
    GUF_ASSERT(new_alloc_bytes > 0);
@ -415,19 +440,32 @@ GUF_DBUF_KWRDS ptrdiff_t GUF_CAT(GUF_DBUF_NAME, _try_get_next_capacity)(ptrdiff_
    if (old_cap == 0) {
        new_cap = GUF_DBUF_INITIAL_CAP;
    } else if (old_cap < 8) {
-        new_cap = (size_t)old_cap * 2ull;
+        new_cap = (size_t)old_cap * 2u;
    } else {
        #ifdef GUF_DBUF_USE_GROWTH_FAC_ONE_POINT_FIVE
-            new_cap = (size_t)old_cap * 3ull / 2ull;
+            if (guf_mul_is_overflow_size_t(old_cap, 3)) { 
+                if (guf_mul_is_overflow_size_t((size_t)old_cap / 2u, 3)) { // Try (old_cap / 2) * 3
+                    new_cap = PTRDIFF_MAX;
+                } else {
+                    new_cap = ((size_t)old_cap / 2u) * 3u;
+                }
+            } else {
+                new_cap = (size_t)old_cap * 3u / 2u;
+            }
        #else
-            new_cap = (size_t)old_cap * 2ull;
+            if (guf_mul_is_overflow_size_t(old_cap, 2)) {
+                new_cap = PTRDIFF_MAX;
+            } else {
+                new_cap = (size_t)old_cap * 2u;
+            }
        #endif
    }

-    new_cap = GUF_MIN(new_cap, PTRDIFF_MAX);
+    const size_t DBUF_MAX_CAP = (size_t)GUF_CAT(GUF_DBUF_NAME, _max_capacity)();
+    new_cap = GUF_MIN(new_cap, DBUF_MAX_CAP);
    
    if (new_cap <= (size_t)old_cap) { // Detect overflow.
-        guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in function dbuf_try_get_next_capacity: next capacity would overflow ptrdiff_t"));
+        guf_err_set_or_panic(err, GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in function dbuf_try_get_next_capacity: reached max capacity"));
        return -1; 
    } else {
        guf_err_set_if_not_null(err, GUF_ERR_NONE);
--- a/src/guf_dict.h
+++ b/src/guf_dict.h
@ -164,11 +164,15 @@ GUF_DICT_KWRDS ptrdiff_t GUF_CAT(GUF_DICT_NAME, _iter_to_idx)(const GUF_DICT_NAM

 GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _debug_valid_size)(const GUF_DICT_NAME *ht);

+GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _valid)(const GUF_DICT_NAME *ht);
+GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _max_capacity)(void);
+
 // #define GUF_DICT_IMPL /* DEBUGGGGGGGGG */

 #if defined(GUF_DICT_IMPL) || defined(GUF_DICT_IMPL_STATIC)

 #include "guf_assert.h"
+#include "guf_math.h"

 GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _debug_valid_size)(const GUF_DICT_NAME *ht)
 {
@ -248,6 +252,14 @@ GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _valid)(const GUF_DICT_NAME *ht)
    return kv_dbuf_valid && kv_meta_buf_valid && count_valid;  
 }

+GUF_DICT_KWRDS bool GUF_CAT(GUF_DICT_NAME, _max_capacity)(void)
+{
+    const ptrdiff_t max_cap_kv_elems = GUF_ALLOC_MAX_CAPACITY(GUF_DICT_KV_NAME);
+    const ptrdiff_t max_cap_kv_indices = GUF_ALLOC_MAX_CAPACITY(GUF_DICT_KV_META_T);
+    return GUF_MIN(max_cap_kv_elems, max_cap_kv_indices);
+}
+
+
 GUF_DICT_KWRDS void GUF_CAT(GUF_DICT_NAME, _free)(GUF_DICT_NAME *ht, void *ctx)
 {
    (void)ctx;
@ -369,22 +381,36 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu
        }
    } else if (GUF_CAT(GUF_DICT_NAME, _load_factor)(ht) > MAX_LOAD_FAC) { // 1.b) Grow kv-index-buffer if necessary.
        GUF_ASSERT(ht->kv_indices);
-        const ptrdiff_t old_size = ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T);
-        ptrdiff_t new_size = 0;
-        if (!guf_size_calc_safe(old_size, KV_META_GROWTH_FAC, &new_size)) {
-            guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: New capacity would overflow)"));
+        const ptrdiff_t old_size_bytes = ht->kv_indices_cap * sizeof(GUF_DICT_KV_META_T); 
+        ptrdiff_t new_size_bytes = 0; 
+
+        const size_t MAX_SIZE_BYTES = (size_t)GUF_ALLOC_MAX_BYTES(GUF_DICT_KV_META_T); // TODO: check
+        GUF_ASSERT(MAX_SIZE_BYTES % sizeof(GUF_DICT_KV_META_T) == 0 && MAX_SIZE_BYTES <= PTRDIFF_MAX);
+
+        if (guf_mul_is_overflow_size_t(old_size_bytes, KV_META_GROWTH_FAC)) {
+            new_size_bytes = MAX_SIZE_BYTES;
+        } else {
+            const size_t mul = (size_t)old_size_bytes * (size_t)KV_META_GROWTH_FAC;
+            new_size_bytes = GUF_MIN(mul, MAX_SIZE_BYTES);
+        }
+        GUF_ASSERT(new_size_bytes % sizeof(GUF_DICT_KV_META_T) == 0);
+
+        if (new_size_bytes <= old_size_bytes) { // Handle overflow.
+            guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: New kv_indices_capacity would overflow)"));
            return;
        }
       
        // TODO: Not sure if alloc and free is better here than realloc (since we do not copy ht->kv_indices anyway.)
-        GUF_DICT_KV_META_T *new_kv_indices = allocator->alloc(new_size, allocator->ctx);
+        GUF_DICT_KV_META_T *new_kv_indices = allocator->alloc(new_size_bytes, allocator->ctx);
        if (new_kv_indices == NULL) {
            guf_err_set_or_panic(err, GUF_ERR_ALLOC_FAIL, GUF_ERR_MSG("in function dict_try_insert: re-allocation failed"));
            return;
        }
-        allocator->free(ht->kv_indices, old_size, allocator->ctx);
+        allocator->free(ht->kv_indices, old_size_bytes, allocator->ctx);
        ht->kv_indices = new_kv_indices;
-        ht->kv_indices_cap = ht->kv_indices_cap * KV_META_GROWTH_FAC;
+        const ptrdiff_t new_indices_cap = new_size_bytes / sizeof(GUF_DICT_KV_META_T); // TODO: check
+        GUF_ASSERT(ht->kv_indices_cap < new_indices_cap);
+        ht->kv_indices_cap = new_indices_cap;
        ht->num_tombstones = 0;
        // ht->max_probelen = 0;
        
@ -402,7 +428,7 @@ static void GUF_CAT(GUF_DICT_NAME, _try_grow_if_necessary)(GUF_DICT_NAME *ht, gu
            GUF_ASSERT(!key_exists);
            GUF_ASSERT(new_idx < SIZE_T_MAX && new_idx <= PTRDIFF_MAX);
            ht->kv_indices[new_idx].kv_idx = (GUF_DICT_KV_IDX_T)kv_idx;
-            ht->kv_indices[new_idx].key_hash = GUF_DICT_KEY_HASH(&kv->key);
+            ht->kv_indices[new_idx].key_hash = GUF_DICT_KEY_HASH(&kv->key); // TODO: might be expensive...
        }
    }

--- a/src/guf_id_pool.h
+++ b/src/guf_id_pool.h
@ -23,10 +23,10 @@
    cf. https://github.com/erincatto/box2d/blob/main/src/id_pool.c (last-retrieved 2025-03-17)
 */

-// // test beg
-#define GUF_ID_POOL_IMPL_STATIC
-#define GUF_ID_POOL_i32
-// // test end
+// // debug beg
+// #define GUF_ID_POOL_IMPL_STATIC
+// #define GUF_ID_POOL_i32
+// // debug end

 #if defined(GUF_ID_POOL_i32)
    #undef GUF_ID_POOL_i32
@ -77,7 +77,7 @@
        #define GUF_ID_POOL_NAME guf_idpool_u8
    #endif
 #else
-    #error "No GUF_ID_POOL_i defined"
+    #error "Must define GUF_ID_POOL_i32, GUF_ID_POOL_i16, GUF_ID_POOL_i8, GUF_ID_POOL_u32, GUF_ID_POOL_u16, or GUF_ID_POOL_u8"
 #endif

 #define GUF_ID_POOL_DBUF GUF_CAT(GUF_ID_POOL_NAME, _id_dbuf)
--- a/src/guf_math.h
+++ b/src/guf_math.h
@ -1,11 +1,12 @@
-#if defined(GUF_MATH_IMPL_STATIC)
-    #define GUF_MATH_KWRDS static
-#else
-    #define GUF_MATH_KWRDS
-#endif
+// #if defined(GUF_MATH_IMPL_STATIC)
+//     #define GUF_MATH_KWRDS static
+// #else
+//     #define GUF_MATH_KWRDS
+// #endif

 #ifndef GUF_MATH_H
 #define GUF_MATH_H
+#include "guf_common.h"
 #include "guf_assert.h"
 #include <math.h>
 #include <float.h>
@ -51,17 +52,6 @@ GUF_DEFINE_MIN_MAX_CLAMP(double, f64)

 #undef GUF_DEFINE_MIN_MAX_CLAMP

-static inline bool guf_add_is_overflow_size_t(size_t a, size_t b)
-{
-    return (a + b) > a;
-}
-
-static inline bool guf_mul_is_overflow_size_t(size_t a, size_t b) 
-{
-    const size_t c = a * b; 
-    return a != 0 && ((c / a) != b);
-}
-
 static inline int       guf_abs_int(int x)             {if (x >= 0) {return x;} GUF_ASSERT_RELEASE(x > INT_MIN);     return -x;} // I would not drink that...
 static inline int8_t    guf_abs_i8 (int8_t x)          {if (x >= 0) {return x;} GUF_ASSERT_RELEASE(x > INT8_MIN);    return -x;}
 static inline int16_t   guf_abs_i16(int16_t x)         {if (x >= 0) {return x;} GUF_ASSERT_RELEASE(x > INT16_MIN);   return -x;}
@ -83,8 +73,77 @@ static inline uint16_t  guf_absdiff_i16(int16_t a, int16_t b) {return a > b ? (u
 static inline uint32_t  guf_absdiff_i32(int32_t a, int32_t b) {return a > b ? (uint32_t)a - (uint32_t)b : (uint32_t)b - (uint32_t)a;}
 static inline uint64_t  guf_absdiff_i64(int64_t a, int64_t b) {return a > b ? (uint64_t)a - (uint64_t)b : (uint64_t)b - (uint64_t)a;}

-static bool guf_nearly_zero_f32(float x, float eps) {return fabsf(x) <= eps;}
-static bool guf_nearly_one_f32(float x, float eps)  {return fabsf(x - 1) <= eps;}
+static inline bool guf_add_is_overflow_size_t(size_t a, size_t b)
+{
+    return (a + b) < a;
+}
+static inline bool guf_sub_is_overflow_size_t(size_t a, size_t b)
+{
+    return (a - b) > a;
+}
+static inline bool guf_mul_is_overflow_size_t(size_t a, size_t b) 
+{
+    const size_t c = a * b; 
+    return a != 0 && ((c / a) != b);
+}
+
+static inline bool guf_add_is_overflow_u32(uint32_t a, uint32_t b)
+{
+    return (a + b) < a;
+}
+static inline bool guf_sub_is_overflow_u32(uint32_t a, uint32_t b)
+{
+    return (a - b) > a;
+}
+static inline bool guf_mul_is_overflow_u32(uint32_t a, uint32_t b) 
+{
+    const uint32_t c = a * b; 
+    return a != 0 && ((c / a) != b);
+}
+
+// cf. https://stackoverflow.com/questions/199333/how-do-i-detect-unsigned-integer-overflow (last-retrieved 2025-03-17)
+static inline bool guf_add_is_overflow_ptrdiff(ptrdiff_t a, ptrdiff_t b)
+{
+    return (b > 0 && a > PTRDIFF_MAX - b) || // a + b overflow
+           (b < 0 && a < PTRDIFF_MIN - b);   // a + b underflow
+}
+static inline bool guf_sub_is_overflow_ptrdiff(ptrdiff_t a, ptrdiff_t b)
+{
+    return (b < 0 && a > PTRDIFF_MAX + b) || // a - b overflow
+           (b > 0 && a < PTRDIFF_MIN + b);   // a - b underflow
+}
+
+static inline bool guf_add_is_overflow_i32(int32_t a, int32_t b)
+{
+    return (b > 0 && a > INT32_MAX - b) || // a + b overflow
+           (b < 0 && a < INT32_MIN - b);   // a + b underflow
+}
+static inline bool guf_sub_is_overflow_i32(int32_t a, int32_t b)
+{
+    return (b < 0 && a > INT32_MAX + b) || // a - b overflow
+           (b > 0 && a < INT32_MIN + b);   // a - b underflow
+}
+
+static inline bool guf_size_calc_safe(ptrdiff_t count, ptrdiff_t sizeof_elem, ptrdiff_t *result)
+{
+    if (count < 0 || sizeof_elem <= 0) {
+        return false;
+    }
+    if (guf_mul_is_overflow_size_t((size_t)count, (size_t)sizeof_elem)) {
+        return false; 
+    } 
+    const size_t size = (size_t)count * (size_t)sizeof_elem;
+
+    const bool is_safe = size <= PTRDIFF_MAX;
+    if (result) {
+        *result = is_safe ? (ptrdiff_t)size : -1;
+    }
+    return is_safe;
+}
+
+
+static bool guf_nearly_zero_f32(float x, float eps)   {return fabsf(x) <= eps;}
+static bool guf_nearly_one_f32(float x, float eps)    {return fabsf(x - 1) <= eps;}

 static bool guf_nearly_zero_f64(double x, double eps) {return fabs(x) <= eps;}
 static bool guf_nearly_one_f64(double x, double eps)  {return fabs(x - 1) <= eps;}
@ -156,6 +215,7 @@ static inline bool guf_isclose_abstol_f64(double a, double b, double abs_tol)
    return guf_isclose_f64(a, b, 0, abs_tol);
 }

+
 // An alternative lerp would be a + alpha * (b - a) (advantage: would be weakly monotonic, disadvantage: would not guarantee a for alpha = 0 and b for alpha = 1)
 static inline float guf_lerp_f32(float a, float b, float alpha)     {return (1 - alpha) * a + alpha * b;}
 static inline double guf_lerp_f64(double a, double b, double alpha) {return (1 - alpha) * a + alpha * b;}
--- a/todo.txt
+++ b/todo.txt
@ -2,6 +2,8 @@

 - guf_stack, guf_queue, guf_dqueue, guf_prio_queue (using a heap), guf_ringbuf

+- guf_dict: maybe put key_hash into kv_elem; maybe change order of key and val in kv_elem depending on size of key and val.
+
 - track allocs for test (implement alloc tracker):
   - each thread needs its own alloc and alloc_ctx; don't track granular, give each allocator it's unique id maybe?

@ -30,7 +32,5 @@

 - no guf_init.h

- linalg: float precision question += elem * -val / pivot_val vs elem * (-val / pivot_val) 
-
 - unicode normalisation
 - handle right-to-left text properly