diff --git a/src/guf_math.h b/src/guf_math.h index 49dbfb3..46e1029 100644 --- a/src/guf_math.h +++ b/src/guf_math.h @@ -11,6 +11,10 @@ static inline uint64_t guf_rotl_u64(uint64_t x, int k) return (x << k) | (x >> (64 - k)); } +static inline uint32_t guf_rotl_u32(uint32_t x, int k) { + return (x << k) | (x >> (32 - k)); +} + static inline float guf_clamp_f32(float x, float min, float max) { if (x < min) return min; diff --git a/src/guf_rand.h b/src/guf_rand.h index ed2e54a..39cb20b 100644 --- a/src/guf_rand.h +++ b/src/guf_rand.h @@ -87,12 +87,16 @@ GUF_FN_KEYWORDS uint64_t guf_rand_u64(guf_randstate *state) GUF_ASSERT(state->s[0] || state->s[1] || state->s[2] || state->s[3]); const uint64_t result = guf_rotl_u64(state->s[1] * 5, 7) * 9; + const uint64_t t = state->s[1] << 17; + state->s[2] ^= state->s[0]; state->s[3] ^= state->s[1]; state->s[1] ^= state->s[2]; state->s[0] ^= state->s[3]; + state->s[2] ^= t; + state->s[3] = guf_rotl_u64(state->s[3], 45); return result; diff --git a/src/guf_rand32.h b/src/guf_rand32.h new file mode 100644 index 0000000..3b1108a --- /dev/null +++ b/src/guf_rand32.h @@ -0,0 +1,239 @@ +#ifndef GUF_RAND32_H +#define GUF_RAND32_H +#include "guf_common.h" +#include "guf_assert.h" +#include "guf_math.h" +#include +#include + +#define GUF_RAND32_MAX UINT32_MAX + +typedef struct guf_randstate32 { // State for xoshiro256** 1.0 + uint32_t s[4]; +} guf_randstate32; + +#ifdef GUF_IMPL_STATIC + #define GUF_FN_KEYWORDS static +#else + #define GUF_FN_KEYWORDS +#endif + +GUF_FN_KEYWORDS uint64_t guf_rand32_splitmix64(uint64_t *state); + +GUF_FN_KEYWORDS void guf_randstate32_init(guf_randstate32 *state, uint64_t seed); +void guf_randstate32_jump(guf_randstate32 *state); // Advance the state; equivalent to 2^128 calls to guf_rand32_u32(state) + +// uniform distributions using xoshiro128** 1.1 +GUF_FN_KEYWORDS uint32_t guf_rand32_u32(guf_randstate32 *state); // [0, GUF_RAND_MAX] +GUF_FN_KEYWORDS float guf_rand32_f32(guf_randstate32 *state); // [0.f, 1.f) + +// return true with a probability of p, false with a probability of (1 - p) +GUF_FN_KEYWORDS bool guf_rand32_bernoulli_trial(guf_randstate32 *state, float p); +GUF_FN_KEYWORDS bool guf_rand32_flip(guf_randstate32 *state); // Fair coin flip (bernoulli trial with p == 0.5) + +GUF_FN_KEYWORDS float guf_rand32range_f32(guf_randstate32 *state, float min, float end); // [min, end) +GUF_FN_KEYWORDS int32_t guf_rand32range_i32(guf_randstate32 *state, int32_t min, int32_t max); // [min, max] + +// normal distributions +GUF_FN_KEYWORDS void guf_rand32_normal_sample_f32(guf_randstate32 *state, float mean, float std_dev, float *result, ptrdiff_t n); +GUF_FN_KEYWORDS float guf_rand32_normal_sample_one_f32(guf_randstate32 *state, float mean, float std_dev); + +#if defined(GUF_IMPL) || defined(GUF_IMPL_STATIC) + +/* + splitmix64 (public domain) written in 2015 by Sebastiano Vigna (vigna@acm.org) + cf. https://prng.di.unimi.it/splitmix64.c (last-retrieved 2025-02-11) +*/ +GUF_FN_KEYWORDS uint64_t guf_rand32_splitmix64(uint64_t *state) +{ + GUF_ASSERT(state); + uint64_t z = ((*state) += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + return z ^ (z >> 31); +} + +GUF_FN_KEYWORDS void guf_randstate32_init(guf_randstate32 *state, uint64_t seed) +{ + GUF_ASSERT_RELEASE(state); + + uint64_t split = guf_rand32_splitmix64(&seed); + state->s[0] = (uint32_t)split; // lower 32-bits + state->s[1] = (uint32_t)(split >> 32); // upper 32-bits + split = guf_rand32_splitmix64(&seed); + state->s[2] = (uint32_t)split; // lower 32-bits + state->s[3] = (uint32_t)(split >> 32); // upper 32-bits + + if (!state->s[0] && !state->s[1] && !state->s[2] && !state->s[3]) { // State must not be only zeroes: + state->s[0] = 0x9e3779b9; // arbitrary constant != 0 + + seed = 0x9e3779b97f4a7c15; + split = guf_rand32_splitmix64(&seed); + state->s[0] = (uint32_t)split; // lower 32-bits + state->s[1] = (uint32_t)(split >> 32); // upper 32-bits + split = guf_rand32_splitmix64(&seed); + state->s[2] = (uint32_t)split; // lower 32-bits + state->s[3] = (uint32_t)(split >> 32); // upper 32-bits + } +} + +/* + xoshiro128** 1.1 (public domain) written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + cf. https://prng.di.unimi.it/xoshiro128starstar.c (last-retrieved 2025-02-11) +*/ + +GUF_FN_KEYWORDS uint32_t guf_rand32_u32(guf_randstate32 *state) +{ + GUF_ASSERT(state); + GUF_ASSERT(state->s[0] || state->s[1] || state->s[2] || state->s[3]); + + const uint32_t result = guf_rotl_u32(state->s[1] * 5, 7) * 9; + + const uint32_t t = state->s[1] << 9; + + state->s[2] ^= state->s[0]; + state->s[3] ^= state->s[1]; + state->s[1] ^= state->s[2]; + state->s[0] ^= state->s[3]; + + state->s[2] ^= t; + + state->s[3] = guf_rotl_u32(state->s[3], 11); + + return result; +} + +/* + Equivalent to 2^128 calls to guf_rand_u64(); it can be used to generate 2^128 + non-overlapping subsequences for parallel computations. +*/ +void guf_randstate32_jump(guf_randstate32 *state) +{ + GUF_ASSERT(state); + static const uint32_t JUMP[] = { 0x8764000b, 0xf542d2d3, 0x6fa035c3, 0x77f2db5b }; + uint32_t s0 = 0; + uint32_t s1 = 0; + uint32_t s2 = 0; + uint32_t s3 = 0; + for(size_t i = 0; i < sizeof JUMP / sizeof *JUMP; i++) { + for(int b = 0; b < 32; b++) { + if (JUMP[i] & UINT32_C(1) << b) { + s0 ^= state->s[0]; + s1 ^= state->s[1]; + s2 ^= state->s[2]; + s3 ^= state->s[3]; + } + guf_rand32_u32(state); + } + } + state->s[0] = s0; + state->s[1] = s1; + state->s[2] = s2; + state->s[3] = s3; +} + +// Generate float in the unit interval [0, 1) +GUF_FN_KEYWORDS float guf_rand32_f32(guf_randstate32 *state) +{ + return (guf_rand32_u32(state) >> 8) * 0x1.0p-24f; // 8 == 32 - 24; (float has a 24-bit mantissa/significand) +} + +GUF_FN_KEYWORDS bool guf_rand32_bernoulli_trial(guf_randstate32 *state, float p) +{ + p = guf_clamp_f32(p, 0, 1); + return guf_rand32_f32(state) < p; // never true for p = 0, always true for p = 1 since guf_rand32_f32 is in range [0, 1) +} + +GUF_FN_KEYWORDS bool guf_rand32_flip(guf_randstate32 *state) +{ + return guf_rand32_bernoulli_trial(state, 0.5f); +} + +// returns uniformly-distributed random float in range [min, end) (or min if min == end) +GUF_FN_KEYWORDS float guf_rand32range_f32(guf_randstate32 *state, float min, float end) +{ + if (end == INFINITY) { + end = FLT_MAX; + } + if (min == -INFINITY) { + min = -FLT_MAX; + } + GUF_ASSERT_RELEASE(end >= min); + return guf_rand32_f32(state) * (end - min) + min; +} + +// returns uniformly-distributed random int32_t in range [min, max] (max is inclusive as opposed to the f32/f64 versions) +GUF_FN_KEYWORDS int32_t guf_rand32range_i32(guf_randstate32 *state, int32_t min, int32_t max) +{ + GUF_ASSERT_RELEASE(max >= min); + if (min == max) { + return min; + } + + // rand_max is 2^32 - 1 for rand_max_shift == 1 + const unsigned rand_max_shift = 1; + const uint32_t rand_max = GUF_RAND32_MAX >> rand_max_shift; // 2^32 - 1 + + const uint32_t delta = max - min; + if (delta > rand_max) { + guf_panic(GUF_ERR_INT_OVERFLOW, GUF_ERR_MSG("in function guf_randrange32_i32: interval [min, max] larger than INT32_MAX")); + return -1; + } + /* + cf. https://c-faq.com/lib/randrange.html (last-retrieved 2025-02-11) + https://stackoverflow.com/a/6852396 (last-retrieved 2025-02-11) + */ + const uint32_t num_rand_vals = rand_max + 1u; // 2^31 + const uint32_t num_bins = (delta + 1u); + + const uint32_t bin_size = num_rand_vals / num_bins; // bin_size = floor(num_rand_vals / num_bins) + const uint32_t limit = num_rand_vals - (num_rand_vals % num_bins); // limit == bin_size * num_bins + GUF_ASSERT(limit == bin_size * num_bins); + uint32_t step; + do { + step = guf_rand32_u32(state) >> rand_max_shift; + } while (step >= limit); + step = step / bin_size; + + const int32_t rnd = min + step; + GUF_ASSERT(rnd >= min && rnd <= max); + return rnd; +} + +// Box-Müller-transform transcribed from wikipedia, cf. https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform (last-retrieved 2025-02-12) + +GUF_FN_KEYWORDS void guf_rand32_normal_sample_f32(guf_randstate32 *state, float mean, float std_dev, float *result, ptrdiff_t n) +{ + GUF_ASSERT_RELEASE(result); + GUF_ASSERT_RELEASE(n >= 0); + const float TAU = 2.f * (float)GUF_PI; + + ptrdiff_t i = 0; + while (i < n) { + float u1, u2; + do { + u1 = guf_rand32_f32(state); + } while (u1 == 0); + u2 = guf_rand32_f32(state); + + const float mag = std_dev * sqrtf(-2.f * logf(u1)); + result[i++] = mag * cosf(TAU * u2) + mean; + if (i < n) { + result[i++] = mag * sinf(TAU * u2) + mean; + } + } +} + +GUF_FN_KEYWORDS float guf_rand32_normal_sample_one_f32(guf_randstate32 *state, float mean, float std_dev) +{ + float result; + guf_rand32_normal_sample_f32(state, mean, std_dev, &result, 1); + return result; +} + +#undef GUF_IMPL +#undef GUF_IMPL_STATIC +#endif + +#undef GUF_FN_KEYWORDS +#endif diff --git a/src/guf_test.c b/src/guf_test.c index 09651dd..b867425 100644 --- a/src/guf_test.c +++ b/src/guf_test.c @@ -46,6 +46,9 @@ #define GUF_IMPL_STATIC #include "guf_rand.h" +#define GUF_IMPL_STATIC +#include "guf_rand32.h" + int main(void) { printf("libguf test: compiled with C %ld\n", __STDC_VERSION__); @@ -165,13 +168,15 @@ int main(void) guf_randstate rng; guf_randstate_init(&rng, time(NULL)); - guf_randstate_jump(&rng); + + guf_randstate32 rng32; + guf_randstate32_init(&rng32, time(NULL)); printf("\n"); int heads = 0, tails = 0; int throws = 10; for (i = 0; i < throws; ++i) { - bool is_head = guf_rand_flip(&rng); + bool is_head = guf_rand32_flip(&rng32); if (is_head) { puts("head"); ++heads; @@ -184,14 +189,12 @@ int main(void) int result[256]; memset(result, 0, sizeof result); - for (int n = 0; n < 24000; ++n) { double r = round(guf_rand_normal_sample_one_f64(&rng, 100, 10)); if (r >= 0 && r < GUF_STATIC_BUF_SIZE(result)) { result[(int)r] += 1; } } - for (size_t n = 50; n <= 150; ++n) { printf("%zu:\t", n); for (int j = 0; j < result[n] / 8; ++j) {