/* -*- mode: c++ -*- */ // Linear Congruential Generator uint stepLCG(uint *z, uint A, uint C){ return (*z) = (A * (*z) + C); } __kernel void randomLCG(const int randomNumbers, __global float* randomsSeed, __global float* randomGPU){ int id = get_global_id(0); int maxID = get_global_size(0); uint rng = randomsSeed[id]; for(int i=0; i < randomNumbers; ++i){ randomGPU[id + i * maxID] = (float)stepLCG(&rng, 1664525, 1013904223UL) / 0xffffffff; } } uint stepLFG(uint *z, __global uint *znmk, uint A, uint C){ return (*znmk) = (*z) = (A * (*z) + C) + (*znmk); } // Lagged Fibonacci Generator __kernel void randomLFG(const int randomNumbers, __global float* randomsSeed, const int randomStateSize, __global uint* randomState, __global float* randomGPU){ int id = get_global_id(0); int maxID = get_global_size(0); // bootstrap uint rng = randomsSeed[id]; for(int i=0; i < randomStateSize; ++i){ randomState[id + i * maxID] = stepLCG(&rng, 1664525, 1013904223UL); } // Lagged Fibonacci Generator int nmkIndex = 0; for(int i=0; i < randomNumbers; ++i){ randomGPU[id + i * maxID] = (float)stepLFG(&rng, &randomState[nmkIndex], 1664525, 1013904223UL) / 0xffffffff; nmkIndex = (nmkIndex + 1) % randomStateSize; } } // Combined Tausworthe Generator uint stepCTG(uint *z, uint S1, uint S2, uint S3, uint M){ uint b=((((*z)<>S2); return (*z) = ((((*z)&M)<= r); (*value) += hh + h - 1.0; } return (*value); } void seedHalton(ulong i, int base, float* inv_base, float* value){ float f = (*inv_base) = 1.0/base; (*value) = 0.0; while( i > 0){ (*value) += f * (float)(i % base); i /= base; f *= (*inv_base); } } __kernel void haltonSequence(const int randomNumbers, const int base, __global float* randomGPU){ int id = get_global_id(0); int maxID = get_global_size(0); float inv_base = 0.0; float rng = 0.0; seedHalton(id * randomNumbers, base, &inv_base, &rng); for(int i=0; i < randomNumbers; ++i){ randomGPU[id + i * maxID] = stepHalton(&rng, inv_base); } } // 1D uniformity test // TODO // Generate a quantized histogram // randomNums = number of randoms per thread // randoms = array of random numbers // bucketNum = number of histogram buckets // buckets = array of histogram buckets __kernel void testUniform1D(const int randomNums, __global float* randoms, const int bucketNum, __global int* buckets){ int id = get_global_id(0); // ID int threadCount = get_global_size(0); // threadCount float bucketWidth = 1.0 / bucketNum; int target = 0; for (size_t i = id; i < randomNums; i += threadCount) { if (i < randomNums) { // random numbers are between 0 and 1 target = (int)(randoms[i] / bucketWidth); atomic_add(&buckets[target], 1); } } } // 1D Monte-Carlo integral // TODO // Implement a Monte Carlo integrator: sin(x) ; x := [0:PI/2] // sampleNumber = number of samples per thread // seed = float4 seed array for the random number generator // integral = partial integral #define M_PIP2 1.57796327f __kernel void mcInt1D(const int sampleNumber, __global float4* seed, __global float* integral){ }