From 26d9534bf3ce9001b0397a9cba38e5e673e24768 Mon Sep 17 00:00:00 2001 From: gyulaid Date: Wed, 30 Mar 2022 23:32:30 +0200 Subject: [PATCH] MonteCarlo --- MonteCarlo/CTG.cpp | 42 ++++++++++++++++++++++++++++++++---- MonteCarlo/Halton.cpp | 40 +++++++++++++++++++++++++++++++--- MonteCarlo/Hybrid.cpp | 40 +++++++++++++++++++++++++++++++--- MonteCarlo/LCG.cpp | 40 +++++++++++++++++++++++++++++++--- MonteCarlo/LFG.cpp | 40 +++++++++++++++++++++++++++++++--- MonteCarlo/MonteCarlo.cpp | 18 +++++++++------- MonteCarlo/MonteCarloTests.h | 25 ++++++++++++++++----- kernels/montecarlo.cl | 15 ++++++++++++- 8 files changed, 230 insertions(+), 30 deletions(-) diff --git a/MonteCarlo/CTG.cpp b/MonteCarlo/CTG.cpp index 7e329d7..696ab80 100644 --- a/MonteCarlo/CTG.cpp +++ b/MonteCarlo/CTG.cpp @@ -2,13 +2,16 @@ #include "MonteCarloTests.h" #include "Common.h" +#include -CTG::CTG(size_t _randomNumbers, size_t _threadCount) +CTG::CTG(size_t _randomNumbers, size_t _threadCount, size_t _bucketNum) { randomNumbers = _randomNumbers; threadCount = _threadCount; + bucketNum = _bucketNum; randomBuffer = std::vector(threadCount * randomNumbers); seedBuffer = std::vector(threadCount); + histoBuffer = std::vector(bucketNum); kernel_name = "randomCTG"; for (int i = 0; i < threadCount; ++i) @@ -18,7 +21,8 @@ CTG::CTG(size_t _randomNumbers, size_t _threadCount) } void CTG::collect_results(cl::CommandQueue* queue) { - + queue->enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * threadCount * randomNumbers, randomBuffer.data()); + queue->enqueueReadBuffer(clHistoBuffer, true, 0, sizeof(int) * bucketNum, histoBuffer.data()); } void CTG::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) { @@ -43,6 +47,22 @@ void CTG::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program cl::NullRange, NULL, Event); + Event->wait(); + + cl::Kernel kernel_histo = cl::Kernel(*program, "testUniform1D", &err); + CheckCLError(err); + clHistoBuffer = cl::Buffer(*context, CL_MEM_WRITE_ONLY, sizeof(int) * bucketNum, NULL, &err); + + kernel_histo.setArg(0, threadCount * randomNumbers); + kernel_histo.setArg(1, clResultBuffer); + kernel_histo.setArg(2, bucketNum); + kernel_histo.setArg(3, clHistoBuffer); + queue->enqueueNDRangeKernel(kernel_histo, + cl::NullRange, + cl::NDRange(threadCount, 1), + cl::NullRange, + NULL, + Event); } void CTG::cpu_compute() @@ -51,13 +71,27 @@ void CTG::cpu_compute() bool CTG::validate_results() { - std::ofstream ofs("randoms_" + description() + ".txt", std::ofstream::out); + float min = 1000.0; + float max = -1000.0; + std::ofstream ofs(description() + "_randoms.txt", std::ofstream::out); for (int i = 0; i < threadCount; ++i) { for (int j = 0; j < randomNumbers; ++j) { - ofs << j << " " << randomBuffer[i + j * threadCount] << std::endl; + ofs << j << "\t" << randomBuffer[i + j * threadCount] << std::endl; + if (randomBuffer[i + j * threadCount] > max) { + max = randomBuffer[i + j * threadCount]; + } + if (randomBuffer[i + j * threadCount] < min) { + min = randomBuffer[i + j * threadCount]; + } } } ofs.close(); + std::ofstream ofsh(description() + "_histo.txt", std::ofstream::out); + for (int i = 0; i < bucketNum; ++i) { + ofsh << i << "\t" << histoBuffer[i] << std::endl; + } + ofsh.close(); + std::cout << "CTG: Min: " << min << ", max: " << max << std::endl; return true; } diff --git a/MonteCarlo/Halton.cpp b/MonteCarlo/Halton.cpp index c7b53bb..b2a5bc6 100644 --- a/MonteCarlo/Halton.cpp +++ b/MonteCarlo/Halton.cpp @@ -2,18 +2,22 @@ #include "MonteCarloTests.h" #include "Common.h" +#include -Halton::Halton(size_t _randomNumbers, size_t _threadCount, size_t _base) { +Halton::Halton(size_t _randomNumbers, size_t _threadCount, size_t _base, size_t _bucketNum) { randomNumbers = _randomNumbers; threadCount = _threadCount; base = _base; + bucketNum = _bucketNum; randomBuffer = std::vector(threadCount * randomNumbers); + histoBuffer = std::vector(bucketNum); kernel_name = "haltonSequence"; } void Halton::collect_results(cl::CommandQueue* queue) { queue->enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * threadCount * randomNumbers, randomBuffer.data()); + queue->enqueueReadBuffer(clHistoBuffer, true, 0, sizeof(int) * bucketNum, histoBuffer.data()); } void Halton::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) @@ -36,6 +40,22 @@ void Halton::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Prog cl::NullRange, NULL, Event); + Event->wait(); + + cl::Kernel kernel_histo = cl::Kernel(*program, "testUniform1D", &err); + CheckCLError(err); + clHistoBuffer = cl::Buffer(*context, CL_MEM_WRITE_ONLY, sizeof(int) * bucketNum, NULL, &err); + + kernel_histo.setArg(0, threadCount * randomNumbers); + kernel_histo.setArg(1, clResultBuffer); + kernel_histo.setArg(2, bucketNum); + kernel_histo.setArg(3, clHistoBuffer); + queue->enqueueNDRangeKernel(kernel_histo, + cl::NullRange, + cl::NDRange(threadCount, 1), + cl::NullRange, + NULL, + Event); } void Halton::cpu_compute() @@ -44,13 +64,27 @@ void Halton::cpu_compute() bool Halton::validate_results() { - std::ofstream ofs("randoms_" + description() + ".txt", std::ofstream::out); + float min = 1000.0; + float max = -1000.0; + std::ofstream ofs(description() + "_randoms.txt", std::ofstream::out); for (int i = 0; i < threadCount; ++i) { for (int j = 0; j < randomNumbers; ++j) { - ofs << j << " " << randomBuffer[i + j * threadCount] << std::endl; + ofs << j << "\t" << randomBuffer[i + j * threadCount] << std::endl; + if (randomBuffer[i + j * threadCount] > max) { + max = randomBuffer[i + j * threadCount]; + } + if (randomBuffer[i + j * threadCount] < min) { + min = randomBuffer[i + j * threadCount]; + } } } ofs.close(); + std::ofstream ofsh(description() + "_histo.txt", std::ofstream::out); + for (int i = 0; i < bucketNum; ++i) { + ofsh << i << "\t" << histoBuffer[i] << std::endl; + } + ofsh.close(); + std::cout << "Halton: Min: " << min << ", max: " << max << std::endl; return true; } diff --git a/MonteCarlo/Hybrid.cpp b/MonteCarlo/Hybrid.cpp index 6729718..a39277e 100644 --- a/MonteCarlo/Hybrid.cpp +++ b/MonteCarlo/Hybrid.cpp @@ -2,13 +2,16 @@ #include "MonteCarloTests.h" #include "Common.h" +#include -Hybrid::Hybrid(size_t _randomNumbers, size_t _threadCount) +Hybrid::Hybrid(size_t _randomNumbers, size_t _threadCount, size_t _bucketNum) { randomNumbers = _randomNumbers; threadCount = _threadCount; + bucketNum = _bucketNum; randomBuffer = std::vector(threadCount * randomNumbers); seedBuffer = std::vector(threadCount); + histoBuffer = std::vector(bucketNum); kernel_name = "hybridRNG"; for (int i = 0; i < threadCount; ++i) @@ -20,6 +23,7 @@ Hybrid::Hybrid(size_t _randomNumbers, size_t _threadCount) void Hybrid::collect_results(cl::CommandQueue* queue) { queue->enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * threadCount * randomNumbers, randomBuffer.data()); + queue->enqueueReadBuffer(clHistoBuffer, true, 0, sizeof(int) * bucketNum, histoBuffer.data()); } void Hybrid::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) @@ -45,6 +49,22 @@ void Hybrid::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Prog cl::NullRange, NULL, Event); + Event->wait(); + + cl::Kernel kernel_histo = cl::Kernel(*program, "testUniform1D", &err); + CheckCLError(err); + clHistoBuffer = cl::Buffer(*context, CL_MEM_WRITE_ONLY, sizeof(int) * bucketNum, NULL, &err); + + kernel_histo.setArg(0, threadCount * randomNumbers); + kernel_histo.setArg(1, clResultBuffer); + kernel_histo.setArg(2, bucketNum); + kernel_histo.setArg(3, clHistoBuffer); + queue->enqueueNDRangeKernel(kernel_histo, + cl::NullRange, + cl::NDRange(threadCount, 1), + cl::NullRange, + NULL, + Event); } void Hybrid::cpu_compute() @@ -53,13 +73,27 @@ void Hybrid::cpu_compute() bool Hybrid::validate_results() { - std::ofstream ofs("randoms_" + description() + ".txt", std::ofstream::out); + float min = 1000.0; + float max = -1000.0; + std::ofstream ofs(description() + "_randoms.txt", std::ofstream::out); for (int i = 0; i < threadCount; ++i) { for (int j = 0; j < randomNumbers; ++j) { - ofs << j << " " << randomBuffer[i + j * threadCount] << std::endl; + ofs << j << "\t" << randomBuffer[i + j * threadCount] << std::endl; + if (randomBuffer[i + j * threadCount] > max) { + max = randomBuffer[i + j * threadCount]; + } + if (randomBuffer[i + j * threadCount] < min) { + min = randomBuffer[i + j * threadCount]; + } } } ofs.close(); + std::ofstream ofsh(description() + "_histo.txt", std::ofstream::out); + for (int i = 0; i < bucketNum; ++i) { + ofsh << i << "\t" << histoBuffer[i] << std::endl; + } + ofsh.close(); + std::cout << "Hybrid: Min: " << min << ", max: " << max << std::endl; return true; } diff --git a/MonteCarlo/LCG.cpp b/MonteCarlo/LCG.cpp index dcec479..f26083b 100644 --- a/MonteCarlo/LCG.cpp +++ b/MonteCarlo/LCG.cpp @@ -2,13 +2,16 @@ #include "MonteCarloTests.h" #include "Common.h" +#include -LCG::LCG(size_t _randomNumbers, size_t _threadCount) +LCG::LCG(size_t _randomNumbers, size_t _threadCount, size_t _bucketNum) { randomNumbers = _randomNumbers; threadCount = _threadCount; + bucketNum = _bucketNum; randomBuffer = std::vector(threadCount * randomNumbers); seedBuffer = std::vector(threadCount); + histoBuffer = std::vector(bucketNum); kernel_name = "randomLCG"; for (int i = 0; i < threadCount; ++i) @@ -20,6 +23,7 @@ LCG::LCG(size_t _randomNumbers, size_t _threadCount) void LCG::collect_results(cl::CommandQueue* queue) { queue->enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * threadCount * randomNumbers, randomBuffer.data()); + queue->enqueueReadBuffer(clHistoBuffer, true, 0, sizeof(int) * bucketNum, histoBuffer.data()); } void LCG::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) @@ -45,6 +49,22 @@ void LCG::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program cl::NullRange, NULL, Event); + Event->wait(); + + cl::Kernel kernel_histo = cl::Kernel(*program, "testUniform1D", &err); + CheckCLError(err); + clHistoBuffer = cl::Buffer(*context, CL_MEM_WRITE_ONLY, sizeof(int) * bucketNum, NULL, &err); + + kernel_histo.setArg(0, threadCount * randomNumbers); + kernel_histo.setArg(1, clResultBuffer); + kernel_histo.setArg(2, bucketNum); + kernel_histo.setArg(3, clHistoBuffer); + queue->enqueueNDRangeKernel(kernel_histo, + cl::NullRange, + cl::NDRange(threadCount, 1), + cl::NullRange, + NULL, + Event); } void LCG::cpu_compute() @@ -53,13 +73,27 @@ void LCG::cpu_compute() bool LCG::validate_results() { - std::ofstream ofs("randoms_" + description() + ".txt", std::ofstream::out); + float min = 1000.0; + float max = -1000.0; + std::ofstream ofs(description() + "_randoms.txt", std::ofstream::out); for (int i = 0; i < threadCount; ++i) { for (int j = 0; j < randomNumbers; ++j) { - ofs << j << " " << randomBuffer[i + j * threadCount] << std::endl; + ofs << j << "\t" << randomBuffer[i + j * threadCount] << std::endl; + if (randomBuffer[i + j * threadCount] > max) { + max = randomBuffer[i + j * threadCount]; + } + if (randomBuffer[i + j * threadCount] < min) { + min = randomBuffer[i + j * threadCount]; + } } } ofs.close(); + std::ofstream ofsh(description() + "_histo.txt", std::ofstream::out); + for (int i = 0; i < bucketNum; ++i) { + ofsh << i << "\t" << histoBuffer[i] << std::endl; + } + ofsh.close(); + std::cout << "LCG: Min: " << min << ", max: " << max << std::endl; return true; } diff --git a/MonteCarlo/LFG.cpp b/MonteCarlo/LFG.cpp index 7e28f13..c86e8fc 100644 --- a/MonteCarlo/LFG.cpp +++ b/MonteCarlo/LFG.cpp @@ -2,13 +2,16 @@ #include "MonteCarloTests.h" #include "Common.h" +#include -LFG::LFG(size_t _randomNumbers, size_t _threadCount, size_t _randomStateSize) +LFG::LFG(size_t _randomNumbers, size_t _threadCount, size_t _randomStateSize, size_t _bucketNum) { randomNumbers = _randomNumbers; threadCount = _threadCount; + bucketNum = _bucketNum; randomBuffer = std::vector(threadCount * randomNumbers); seedBuffer = std::vector(threadCount); + histoBuffer = std::vector(bucketNum); kernel_name = "randomLFG"; randomStateSize = _randomStateSize; @@ -21,6 +24,7 @@ LFG::LFG(size_t _randomNumbers, size_t _threadCount, size_t _randomStateSize) void LFG::collect_results(cl::CommandQueue* queue) { queue->enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * threadCount * randomNumbers, randomBuffer.data()); + queue->enqueueReadBuffer(clHistoBuffer, true, 0, sizeof(int) * bucketNum, histoBuffer.data()); } void LFG::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) @@ -49,6 +53,22 @@ void LFG::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program cl::NullRange, NULL, Event); + Event->wait(); + + cl::Kernel kernel_histo = cl::Kernel(*program, "testUniform1D", &err); + CheckCLError(err); + clHistoBuffer = cl::Buffer(*context, CL_MEM_WRITE_ONLY, sizeof(int) * bucketNum, NULL, &err); + + kernel_histo.setArg(0, threadCount * randomNumbers); + kernel_histo.setArg(1, clResultBuffer); + kernel_histo.setArg(2, bucketNum); + kernel_histo.setArg(3, clHistoBuffer); + queue->enqueueNDRangeKernel(kernel_histo, + cl::NullRange, + cl::NDRange(threadCount, 1), + cl::NullRange, + NULL, + Event); } void LFG::cpu_compute() @@ -57,13 +77,27 @@ void LFG::cpu_compute() bool LFG::validate_results() { - std::ofstream ofs("randoms_" + description() + ".txt", std::ofstream::out); + float min = 1000.0; + float max = -1000.0; + std::ofstream ofs(description() + "_randoms.txt", std::ofstream::out); for (int i = 0; i < threadCount; ++i) { for (int j = 0; j < randomNumbers; ++j) { - ofs << j << " " << randomBuffer[i + j * threadCount] << std::endl; + ofs << j << "\t" << randomBuffer[i + j * threadCount] << std::endl; + if (randomBuffer[i + j * threadCount] > max) { + max = randomBuffer[i + j * threadCount]; + } + if (randomBuffer[i + j * threadCount] < min) { + min = randomBuffer[i + j * threadCount]; + } } } ofs.close(); + std::ofstream ofsh(description() + "_histo.txt", std::ofstream::out); + for (int i = 0; i < bucketNum; ++i) { + ofsh << i << "\t" << histoBuffer[i] << std::endl; + } + ofsh.close(); + std::cout << "LFG: Min: " << min << ", max: " << max << std::endl; return true; } diff --git a/MonteCarlo/MonteCarlo.cpp b/MonteCarlo/MonteCarlo.cpp index efd6b45..78a4908 100644 --- a/MonteCarlo/MonteCarlo.cpp +++ b/MonteCarlo/MonteCarlo.cpp @@ -14,9 +14,9 @@ #include #include "MonteCarloTests.h" -const bool writeOutRandoms = true; -const size_t randomNumbers = 1024; -const size_t threadCount = 512; +const bool writeOutRandoms = false; +const size_t randomNumbers = 512; +const size_t threadCount = 512; // was 512 void cppapi() { @@ -116,11 +116,13 @@ int main() //cppapi(); OpenCLHandler handler("../kernels/montecarlo.cl"); - //handler.run_test(new LCG(randomNumbers, threadCount)); - //handler.run_test(new LFG(randomNumbers, threadCount, 256)); - //handler.run_test(new CTG(randomNumbers, threadCount)); - //handler.run_test(new Hybrid(randomNumbers, threadCount)); - handler.run_test(new Halton(randomNumbers, threadCount, 2)); + size_t bucketNum = 50; + + //handler.run_test(new LCG(randomNumbers, threadCount, bucketNum)); + //handler.run_test(new LFG(randomNumbers, threadCount, 256, bucketNum)); + handler.run_test(new CTG(randomNumbers, threadCount, bucketNum)); + //handler.run_test(new Hybrid(randomNumbers, threadCount, bucketNum)); + //handler.run_test(new Halton(randomNumbers, threadCount, 2, bucketNum)); return 0; } diff --git a/MonteCarlo/MonteCarloTests.h b/MonteCarlo/MonteCarloTests.h index 3f8383b..0846857 100644 --- a/MonteCarlo/MonteCarloTests.h +++ b/MonteCarlo/MonteCarloTests.h @@ -9,8 +9,11 @@ private: std::vector randomBuffer; std::vector seedBuffer; std::string kernel_name; + cl::Buffer clHistoBuffer; + std::vector histoBuffer; + size_t bucketNum; public: - LCG(size_t _randomNumbers, size_t _threadCount); + LCG(size_t _randomNumbers, size_t _threadCount, size_t _bucketNum); void collect_results(cl::CommandQueue* queue); void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event); void cpu_compute(); @@ -26,8 +29,11 @@ private: std::vector seedBuffer; std::string kernel_name; size_t randomStateSize; + cl::Buffer clHistoBuffer; + size_t bucketNum; + std::vector histoBuffer; public: - LFG(size_t _randomNumbers, size_t _threadCount, size_t _randomStateSize); + LFG(size_t _randomNumbers, size_t _threadCount, size_t _randomStateSize, size_t _bucketNum); void collect_results(cl::CommandQueue* queue); void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event); void cpu_compute(); @@ -42,8 +48,11 @@ private: std::vector randomBuffer; std::vector seedBuffer; std::string kernel_name; + cl::Buffer clHistoBuffer; + size_t bucketNum; + std::vector histoBuffer; public: - CTG(size_t _randomNumbers, size_t _threadCount); + CTG(size_t _randomNumbers, size_t _threadCount, size_t _bucketNum); void collect_results(cl::CommandQueue* queue); void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event); void cpu_compute(); @@ -58,8 +67,11 @@ private: std::vector randomBuffer; std::vector seedBuffer; std::string kernel_name; + cl::Buffer clHistoBuffer; + size_t bucketNum; + std::vector histoBuffer; public: - Hybrid(size_t _randomNumbers, size_t _threadCount); + Hybrid(size_t _randomNumbers, size_t _threadCount, size_t _bucketNum); void collect_results(cl::CommandQueue* queue); void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event); void cpu_compute(); @@ -74,8 +86,11 @@ private: size_t base; std::vector randomBuffer; std::string kernel_name; + cl::Buffer clHistoBuffer; + size_t bucketNum; + std::vector histoBuffer; public: - Halton(size_t _randomNumbers, size_t _threadCount, size_t _base); + Halton(size_t _randomNumbers, size_t _threadCount, size_t _base, size_t _bucketNum); void collect_results(cl::CommandQueue* queue); void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event); void cpu_compute(); diff --git a/kernels/montecarlo.cl b/kernels/montecarlo.cl index 194fc33..fec503b 100644 --- a/kernels/montecarlo.cl +++ b/kernels/montecarlo.cl @@ -134,7 +134,20 @@ void haltonSequence(const int randomNumbers, const int base, __global float* ran // buckets = array of histogram buckets __kernel void testUniform1D(const int randomNums, __global float* randoms, const int bucketNum, __global int* buckets){ - + int id = get_global_id(0); // ID + int threadCount = get_global_size(0); // threadCount + + float bucketWidth = 1.0 / bucketNum; + + int target = 0; + + for (size_t i = id; i < randomNums; i += threadCount) { + if (i < randomNums) { + // random numbers are between 0 and 1 + target = (int)(randoms[i] / bucketWidth); + atomic_add(&buckets[target], 1); + } + } } // 1D Monte-Carlo integral