/** * @file SFMT.c * @brief SIMD oriented Fast Mersenne Twister(SFMT) * * @author Mutsuo Saito (Hiroshima University) * @author Makoto Matsumoto (Hiroshima University) * * Copyright (C) 2006,2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima * University. All rights reserved. * * The new BSD License is applied to this software, see LICENSE.txt */ import std.c.time: time, time_t; extern(C) { void * memset ( void * ptr, int value, size_t num ); } /** * @file SFMT.h * * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom * number generator * * @author Mutsuo Saito (Hiroshima University) * @author Makoto Matsumoto (Hiroshima University) * * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima * University. All rights reserved. * * The new BSD License is applied to this software. * see LICENSE.txt * * @note We assume that your system has inttypes.h. If your system * doesn't have inttypes.h, you have to typedef uint32_t and uint64_t, * and you have to define PRIu64 and PRIx64 in this file as follows: * @verbatim typedef unsigned int uint32_t typedef unsigned long long uint64_t #define PRIu64 "llu" #define PRIx64 "llx" @endverbatim * uint32_t must be exactly 32-bit unsigned integer type (no more, no * less), and uint64_t must be exactly 64-bit unsigned integer type. * PRIu64 and PRIx64 are used for printf function to print 64-bit * unsigned int and 64-bit unsigned int in hexadecimal format. */ alias uint uint32_t; alias ulong uint64_t; /* These real versions are due to Isaku Wada */ /** generates a random number on [0,1]-real-interval */ static double to_real1(uint32_t v) { return v * (1.0/4294967295.0); /* divided by 2^32-1 */ } /** generates a random number on [0,1]-real-interval */ static double genrand_real1() { return to_real1(gen_rand32()); } /** generates a random number on [0,1)-real-interval */ static double to_real2(uint32_t v) { return v * (1.0/4294967296.0); /* divided by 2^32 */ } /** generates a random number on [0,1)-real-interval */ static double genrand_real2() { return to_real2(gen_rand32()); } /** generates a random number on (0,1)-real-interval */ static double to_real3(uint32_t v) { return ((cast(double)v) + 0.5)*(1.0/4294967296.0); /* divided by 2^32 */ } /** generates a random number on (0,1)-real-interval */ static double genrand_real3() { return to_real3(gen_rand32()); } /** These real versions are due to Isaku Wada */ /** generates a random number on [0,1) with 53-bit resolution*/ static double to_res53(uint64_t v) { return v * (1.0/18446744073709551616.0L); } /** generates a random number on [0,1) with 53-bit resolution from two * 32 bit integers */ static double to_res53_mix(uint32_t x, uint32_t y) { return to_res53(x | (cast(uint64_t)y << 32)); } /** generates a random number on [0,1) with 53-bit resolution */ static double genrand_res53() { return to_res53(gen_rand64()); } /** generates a random number on [0,1) with 53-bit resolution using 32bit integer. */ static double genrand_res53_mix() { uint32_t x, y; x = gen_rand32(); y = gen_rand32(); return to_res53_mix(x, y); } //// start of SSMT-params.h const int MEXP = 19937; /*----------------- BASIC DEFINITIONS -----------------*/ /** Mersenne Exponent. The period of the sequence * is a multiple of 2^MEXP-1. * #define MEXP 19937 */ /** SFMT generator has an internal state array of 128-bit integers, * and N is its size. */ auto int N; static this() { N = (MEXP / 128 + 1); } /** N32 is the size of internal state array when regarded as an array * of 32-bit integers.*/ auto int N32; static this() { N32 = (N * 4); } /** N64 is the size of internal state array when regarded as an array * of 64-bit integers.*/ auto int N64; static this() { N64 = (N * 2); } /*---------------------- the parameters of SFMT following definitions are in paramsXXXX.h file. ----------------------*/ /** the pick up position of the array. #define POS1 122 */ /** the parameter of shift left as four 32-bit registers. #define SL1 18 */ /** the parameter of shift left as one 128-bit register. * The 128-bit integer is shifted by (SL2 * 8) bits. #define SL2 1 */ /** the parameter of shift right as four 32-bit registers. #define SR1 11 */ /** the parameter of shift right as one 128-bit register. * The 128-bit integer is shifted by (SL2 * 8) bits. #define SR2 1 */ /** A bitmask, used in the recursion. These parameters are introduced * to break symmetry of SIMD. #define MSK1 0xdfffffefU #define MSK2 0xddfecb7fU #define MSK3 0xbffaffffU #define MSK4 0xbffffff6U */ /** These definitions are part of a 128-bit period certification vector. #define PARITY1 0x00000001U #define PARITY2 0x00000000U #define PARITY3 0x00000000U #define PARITY4 0xc98e126aU */ const uint POS1 = 122; const uint SL1 = 18; const uint SL2 = 1; const uint SR1 = 11; const uint SR2 = 1; const uint MSK1 = 0xdfffffefU; const uint MSK2 = 0xddfecb7fU; const uint MSK3 = 0xbffaffffU; const uint MSK4 = 0xbffffff6U; const uint PARITY1 = 0x00000001U; const uint PARITY2 = 0x00000000U; const uint PARITY3 = 0x00000000U; const uint PARITY4 = 0x13c9e684U; /+ /* PARAMETERS FOR ALTIVEC */ #if defined(__APPLE__) /* For OSX */ #else #define ALTI_SL1 {SL1, SL1, SL1, SL1} #define ALTI_SR1 {SR1, SR1, SR1, SR1} #define ALTI_MSK {MSK1, MSK2, MSK3, MSK4} #define ALTI_MSK64 {MSK2, MSK1, MSK4, MSK3} #define ALTI_SL2_PERM {1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8} #define ALTI_SL2_PERM64 {1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0} #define ALTI_SR2_PERM {7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14} #define ALTI_SR2_PERM64 {15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14} #endif /* For OSX */ #define IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6" +/ //// start of sfmt.c /*------------------------------------------------------ 128-bit SIMD data type for Altivec, SSE2 or standard C ------------------------------------------------------*/ /** 128-bit data structure */ struct W128_T { uint32_t u[4]; }; /** 128-bit data type */ alias W128_T w128_t; /*-------------------------------------- FILE GLOBAL VARIABLES internal state, index counter and flag --------------------------------------*/ /** the 128-bit internal state array */ w128_t sfmt[(MEXP / 128 + 1)]; /** the 32bit integer pointer to the 128-bit internal state array */ static uint32_t* psfmt32; static this() { psfmt32 = cast(uint32_t *)&sfmt[0].u[0]; } /** the 64bit integer pointer to the 128-bit internal state array */ static uint64_t *psfmt64; static this() { psfmt64 = cast(uint64_t *)&sfmt[0].u[0]; } /** index counter to the 32-bit internal state array */ static int idx; /** a flag: it is 0 if and only if the internal state is not yet * initialized. */ static int initialized = 0; /** a parity check vector which certificate the period of 2^{MEXP} */ static uint32_t parity[4] = [0x00000001U, 0x00000000U, 0x00000000U, 0x13c9e684U]; /*---------------- STATIC FUNCTIONS ----------------*/ /** * This function simulate a 64-bit index of LITTLE ENDIAN * in BIG ENDIAN machine. */ static int idxof(int i) { return i; } /** * This function simulates SIMD 128-bit right shift by the standard C. * The 128-bit integer given in in is shifted by (shift * 8) bits. * This function simulates the LITTLE ENDIAN SIMD. * @param output the output of this function * @param in the 128-bit data to be shifted * @param shift the shift value */ static void rshift128(out w128_t output, w128_t input, int shift) { uint64_t th, tl, oh, ol; th = (cast(uint64_t)input.u[3] << 32) | (cast(uint64_t)input.u[2]); tl = (cast(uint64_t)input.u[1] << 32) | (cast(uint64_t)input.u[0]); oh = th >> (shift * 8); ol = tl >> (shift * 8); ol |= th << (64 - shift * 8); output.u[1] = cast(uint32_t)(ol >> 32); output.u[0] = cast(uint32_t)ol; output.u[3] = cast(uint32_t)(oh >> 32); output.u[2] = cast(uint32_t)oh; } /** * This function simulates SIMD 128-bit left shift by the standard C. * The 128-bit integer given in in is shifted by (shift * 8) bits. * This function simulates the LITTLE ENDIAN SIMD. * @param output the output of this function * @param in the 128-bit data to be shifted * @param shift the shift value */ static void lshift128(out w128_t output, in w128_t input, int shift) { uint64_t th, tl, oh, ol; th = (cast(uint64_t)input.u[3] << 32) | (cast(uint64_t)input.u[2]); tl = (cast(uint64_t)input.u[1] << 32) | (cast(uint64_t)input.u[0]); oh = th << (shift * 8); ol = tl << (shift * 8); oh |= tl >> (64 - shift * 8); output.u[1] = cast(uint32_t)(ol >> 32); output.u[0] = cast(uint32_t)ol; output.u[3] = cast(uint32_t)(oh >> 32); output.u[2] = cast(uint32_t)oh; } /** * This function represents the recursion formula. * @param r output * @param a a 128-bit part of the internal state array * @param b a 128-bit part of the internal state array * @param c a 128-bit part of the internal state array * @param d a 128-bit part of the internal state array */ static void do_recursion(out w128_t r, w128_t a, w128_t b, w128_t c, w128_t d) { w128_t x; w128_t y; lshift128(x, a, 1); rshift128(y, c, 1); r.u[0] = a.u[0] ^ x.u[0] ^ ((b.u[0] >> 11) & 0xdfffffefU) ^ y.u[0] ^ (d.u[0] << 18); r.u[1] = a.u[1] ^ x.u[1] ^ ((b.u[1] >> 11) & 0xddfecb7fU) ^ y.u[1] ^ (d.u[1] << 18); r.u[2] = a.u[2] ^ x.u[2] ^ ((b.u[2] >> 11) & 0xbffaffffU) ^ y.u[2] ^ (d.u[2] << 18); r.u[3] = a.u[3] ^ x.u[3] ^ ((b.u[3] >> 11) & 0xbffffff6U) ^ y.u[3] ^ (d.u[3] << 18); } /** * This function fills the internal state array with pseudorandom * integers. */ static void gen_rand_all() { int i; w128_t r1, r2; r1 = sfmt[(MEXP / 128 + 1) - 2]; r2 = sfmt[(MEXP / 128 + 1) - 1]; for (i = 0; i < (MEXP / 128 + 1) - 122; i++) { do_recursion(sfmt[i], sfmt[i], sfmt[i + 122], r1, r2); r1 = r2; r2 = sfmt[i]; } for (; i < (MEXP / 128 + 1); i++) { do_recursion(sfmt[i], sfmt[i], sfmt[i + 122 - (MEXP / 128 + 1)], r1, r2); r1 = r2; r2 = sfmt[i]; } } /** * This function fills the user-specified array with pseudorandom * integers. * * @param array an 128-bit array to be filled by pseudorandom numbers. * @param size number of 128-bit pseudorandom numbers to be generated. */ static void gen_rand_array(w128_t *array, int size) { int i, j; w128_t r1, r2; r1 = sfmt[(MEXP / 128 + 1) - 2]; r2 = sfmt[(MEXP / 128 + 1) - 1]; for (i = 0; i < (MEXP / 128 + 1) - 122; i++) { do_recursion(array[i], sfmt[i], sfmt[i + 122], r1, r2); r1 = r2; r2 = array[i]; } for (; i < (MEXP / 128 + 1); i++) { do_recursion(array[i], sfmt[i], array[i + 122 - (MEXP / 128 + 1)], r1, r2); r1 = r2; r2 = array[i]; } for (; i < size - (MEXP / 128 + 1); i++) { do_recursion(array[i], array[i - (MEXP / 128 + 1)], array[i + 122 - (MEXP / 128 + 1)], r1, r2); r1 = r2; r2 = array[i]; } for (j = 0; j < 2 * (MEXP / 128 + 1) - size; j++) { sfmt[j] = array[j + size - (MEXP / 128 + 1)]; } for (; i < size; i++, j++) { do_recursion(array[i], array[i - (MEXP / 128 + 1)], array[i + 122 - (MEXP / 128 + 1)], r1, r2); r1 = r2; r2 = array[i]; sfmt[j] = array[i]; } } /** * This function represents a function used in the initialization * by init_by_array * @param x 32-bit integer * @return 32-bit integer */ static uint32_t func1(uint32_t x) { return (x ^ (x >> 27)) * cast(uint32_t)1664525UL; } /** * This function represents a function used in the initialization * by init_by_array * @param x 32-bit integer * @return 32-bit integer */ static uint32_t func2(uint32_t x) { return (x ^ (x >> 27)) * cast(uint32_t)1566083941UL; } /** * This function certificate the period of 2^{MEXP} */ static void period_certification() { int inner = 0; int i, j; uint32_t work; for (i = 0; i < 4; i++) inner ^= psfmt32[idxof(i)] & parity[i]; for (i = 16; i > 0; i >>= 1) inner ^= inner >> i; inner &= 1; /* check OK */ if (inner == 1) { return; } /* check NG, and modification */ for (i = 0; i < 4; i++) { work = 1; for (j = 0; j < 32; j++) { if ((work & parity[i]) != 0) { psfmt32[idxof(i)] ^= work; return; } work = work << 1; } } } /*---------------- PUBLIC FUNCTIONS ----------------*/ /** * This function returns the identification string. * The string shows the word size, the Mersenne exponent, * and all parameters of this generator. */ const char[] get_idstring() { return "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6".dup; } /** * This function returns the minimum size of array used for \b * fill_array32() function. * @return minimum size of array used for fill_array32() function. */ int get_min_array_size32() { return ((MEXP / 128 + 1) * 4); } /** * This function returns the minimum size of array used for \b * fill_array64() function. * @return minimum size of array used for fill_array64() function. */ int get_min_array_size64() { return ((MEXP / 128 + 1) * 2); } /** * This function generates and returns 32-bit pseudorandom number. * init_gen_rand or init_by_array must be called before this function. * @return 32-bit pseudorandom number */ uint32_t gen_rand32() { uint32_t r; assert(initialized); if (idx >= ((MEXP / 128 + 1) * 4)) { gen_rand_all(); idx = 0; } r = psfmt32[idx++]; return r; } /** * This function generates and returns 64-bit pseudorandom number. * init_gen_rand or init_by_array must be called before this function. * The function gen_rand64 should not be called after gen_rand32, * unless an initialization is again executed. * @return 64-bit pseudorandom number */ uint64_t gen_rand64() { uint64_t r; assert(initialized); assert(idx % 2 == 0); if (idx >= ((MEXP / 128 + 1) * 4)) { gen_rand_all(); idx = 0; } r = psfmt64[idx / 2]; idx += 2; return r; } /** * This function generates pseudorandom 32-bit integers in the * specified array[] by one call. The number of pseudorandom integers * is specified by the argument size, which must be at least 624 and a * multiple of four. The generation by this function is much faster * than the following gen_rand function. * * For initialization, init_gen_rand or init_by_array must be called * before the first call of this function. This function can not be * used after calling gen_rand function, without initialization. * * @param array an array where pseudorandom 32-bit integers are filled * by this function. The pointer to the array must be \b "aligned" * (namely, must be a multiple of 16) in the SIMD version, since it * refers to the address of a 128-bit integer. In the standard C * version, the pointer is arbitrary. * * @param size the number of 32-bit pseudorandom integers to be * generated. size must be a multiple of 4, and greater than or equal * to (MEXP / 128 + 1) * 4. * * @note \b memalign or \b posix_memalign is available to get aligned * memory. Mac OSX doesn't have these functions, but \b malloc of OSX * returns the pointer to the aligned memory block. */ void fill_array32(uint32_t *array, int size) { assert(initialized); assert(idx == ((MEXP / 128 + 1) * 4)); assert(size % 4 == 0); assert(size >= ((MEXP / 128 + 1) * 4)); gen_rand_array(cast(w128_t*)array, size / 4); idx = ((MEXP / 128 + 1) * 4); } /** * This function generates pseudorandom 64-bit integers in the * specified array[] by one call. The number of pseudorandom integers * is specified by the argument size, which must be at least 312 and a * multiple of two. The generation by this function is much faster * than the following gen_rand function. * * For initialization, init_gen_rand or init_by_array must be called * before the first call of this function. This function can not be * used after calling gen_rand function, without initialization. * * @param array an array where pseudorandom 64-bit integers are filled * by this function. The pointer to the array must be "aligned" * (namely, must be a multiple of 16) in the SIMD version, since it * refers to the address of a 128-bit integer. In the standard C * version, the pointer is arbitrary. * * @param size the number of 64-bit pseudorandom integers to be * generated. size must be a multiple of 2, and greater than or equal * to (MEXP / 128 + 1) * 2 * * @note \b memalign or \b posix_memalign is available to get aligned * memory. Mac OSX doesn't have these functions, but \b malloc of OSX * returns the pointer to the aligned memory block. */ void fill_array64(uint64_t* array, int size) { assert(initialized); assert(idx == ((MEXP / 128 + 1) * 4)); assert(size % 2 == 0); assert(size >= ((MEXP / 128 + 1) * 2)); gen_rand_array(cast(w128_t *)array, size / 2); idx = ((MEXP / 128 + 1) * 4); } /** * This function initializes the internal state array with a 32-bit * integer seed. * * @param seed a 32-bit integer used as the seed. */ void init_gen_rand(uint32_t seed) { int i; psfmt32[idxof(0)] = seed; for (i = 1; i < ((MEXP / 128 + 1) * 4); i++) { psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)] ^ (psfmt32[idxof(i - 1)] >> 30)) + i; } idx = ((MEXP / 128 + 1) * 4); period_certification(); initialized = 1; } static this() { init_gen_rand(jsw_time_seed()); } /** * This function initializes the internal state array, * with an array of 32-bit integers used as the seeds * @param init_key the array of 32-bit integers, used as a seed. * @param key_length the length of init_key. */ void init_by_array(uint32_t[] init_key, int key_length) { int i, j, count; uint32_t r; int lag; int mid; int size = (MEXP / 128 + 1) * 4; if (size >= 623) { lag = 11; } else if (size >= 68) { lag = 7; } else if (size >= 39) { lag = 5; } else { lag = 3; } mid = (size - lag) / 2; memset(cast(void*)sfmt, 0x8b, sfmt.sizeof); //sfmt[0 .. sfmt.sizeof] = cast(int)0x8b; if (key_length + 1 > ((MEXP / 128 + 1) * 4)) { count = key_length + 1; } else { count = ((MEXP / 128 + 1) * 4); } r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] ^ psfmt32[idxof(((MEXP / 128 + 1) * 4) - 1)]); psfmt32[idxof(mid)] += r; r += key_length; psfmt32[idxof(mid + lag)] += r; psfmt32[idxof(0)] = r; count--; for (i = 1, j = 0; (j < count) && (j < key_length); j++) { r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % ((MEXP / 128 + 1) * 4))] ^ psfmt32[idxof((i + ((MEXP / 128 + 1) * 4) - 1) % ((MEXP / 128 + 1) * 4))]); psfmt32[idxof((i + mid) % ((MEXP / 128 + 1) * 4))] += r; r += init_key[j] + i; psfmt32[idxof((i + mid + lag) % ((MEXP / 128 + 1) * 4))] += r; psfmt32[idxof(i)] = r; i = (i + 1) % ((MEXP / 128 + 1) * 4); } for (; j < count; j++) { r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % ((MEXP / 128 + 1) * 4))] ^ psfmt32[idxof((i + ((MEXP / 128 + 1) * 4) - 1) % ((MEXP / 128 + 1) * 4))]); psfmt32[idxof((i + mid) % ((MEXP / 128 + 1) * 4))] += r; r += i; psfmt32[idxof((i + mid + lag) % ((MEXP / 128 + 1) * 4))] += r; psfmt32[idxof(i)] = r; i = (i + 1) % ((MEXP / 128 + 1) * 4); } for (j = 0; j < ((MEXP / 128 + 1) * 4); j++) { r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % ((MEXP / 128 + 1) * 4))] + psfmt32[idxof((i + ((MEXP / 128 + 1) * 4) - 1) % ((MEXP / 128 + 1) * 4))]); psfmt32[idxof((i + mid) % ((MEXP / 128 + 1) * 4))] ^= r; r -= i; psfmt32[idxof((i + mid + lag) % ((MEXP / 128 + 1) * 4))] ^= r; psfmt32[idxof(i)] = r; i = (i + 1) % ((MEXP / 128 + 1) * 4); } idx = ((MEXP / 128 + 1) * 4); period_certification(); initialized = 1; } /* Portable time seed */ uint32_t jsw_time_seed() { time_t now = time ( null ); ubyte* p = cast(ubyte*)&now; uint32_t seed = 0; size_t i; for ( i = 0; i < now.sizeof; i++ ) seed = seed * ( ubyte.max + 2U ) + p[i]; return seed; }