diff --git a/Primitives/Primitives.cpp b/Primitives/Primitives.cpp index 10abf0e..e03dd26 100644 --- a/Primitives/Primitives.cpp +++ b/Primitives/Primitives.cpp @@ -52,6 +52,10 @@ void capi() } } +void add_tests(std::vector* tests) { + +} + int main() { @@ -60,17 +64,27 @@ int main() std::vector tests; Square s; - Histogram h(false, 32, 4096); + Histogram h1(false, 32, 4096); + Histogram h2(true, 32, 4096); ReduceAdd r1(handler.get_max_size()); - ExclusiveScan e(8); + ExclusiveScan e(512); + Compact c(1024); - //handler.run_test(&s); - //handler.run_test(&h); - if (handler.run_test(&e)) { - std::cout << "Success" << std::endl; - } - else { - std::cout << "Failure" << std::endl; + tests.push_back(&s); + tests.push_back(&h1); + tests.push_back(&h2); + tests.push_back(&r1); + tests.push_back(&e); + tests.push_back(&c); + + + for (size_t i = 0; i < tests.size(); i++) { + if (handler.run_test(tests[i])) { + std::cout << tests[i]->description() << " - Success" << std::endl; + } + else { + std::cout << tests[i]->description() << " - Failure" << std::endl; + } } return 0; } diff --git a/Primitives/Primitives.vcxproj b/Primitives/Primitives.vcxproj index 66a0919..75d50fb 100644 --- a/Primitives/Primitives.vcxproj +++ b/Primitives/Primitives.vcxproj @@ -80,6 +80,7 @@ + diff --git a/Primitives/Primitives.vcxproj.filters b/Primitives/Primitives.vcxproj.filters index 0d7c192..c74aaae 100644 --- a/Primitives/Primitives.vcxproj.filters +++ b/Primitives/Primitives.vcxproj.filters @@ -53,6 +53,9 @@ Source Files + + Source Files + diff --git a/Primitives/primitives/Compact.cpp b/Primitives/primitives/Compact.cpp new file mode 100644 index 0000000..1a22893 --- /dev/null +++ b/Primitives/primitives/Compact.cpp @@ -0,0 +1,107 @@ +#include +#include "../Common.h" +#include "Tests.h" +#include + +Compact::Compact(size_t _data_size) +{ + data_size = _data_size; + limit = 50; + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution distr(0, 100); + for (size_t index = 0; index < data_size; ++index) { + int val = distr(gen); + sourceData.push_back(val); + //gpuResult.push_back(val); + } + gpuResult.resize(data_size, 0); +} + +void Compact::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) +{ + cl_int err = CL_SUCCESS; + cl::Event inner_event; + cl::Kernel kernel_predicate = cl::Kernel(*program, "compact_predicate", &err); + CheckCLError(err); + cl::Kernel kernel_exscan = cl::Kernel(*program, "compact_exscan", &err); + CheckCLError(err); + cl::Kernel kernel_compact = cl::Kernel(*program, "compact_compact", &err); + CheckCLError(err); + + clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); + clResultBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); + + cl::Buffer pred(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); + cl::Buffer prefSum(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); + + queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, sourceData.data()); + + kernel_predicate.setArg(0, clInputBuffer); + kernel_predicate.setArg(1, pred); + + queue->enqueueNDRangeKernel(kernel_predicate, + cl::NullRange, // Indexek nem eloffszetelve + cl::NDRange(data_size, 1), // Minden elemet egy szál + cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul + NULL, // + &inner_event); + inner_event.wait(); + + kernel_exscan.setArg(0, pred); + kernel_exscan.setArg(1, prefSum); + queue->enqueueNDRangeKernel(kernel_exscan, + cl::NullRange, // Indexek nem eloffszetelve + cl::NDRange(data_size, 1), // Minden elemet egy szál + cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul + NULL, // + &inner_event); + inner_event.wait(); + + queue->enqueueReadBuffer(prefSum, true, 0, sizeof(int), &result_size); + + kernel_compact.setArg(0, clInputBuffer); + kernel_compact.setArg(1, pred); + kernel_compact.setArg(2, prefSum); + queue->enqueueNDRangeKernel(kernel_compact, + cl::NullRange, // Indexek nem eloffszetelve + cl::NDRange(data_size, 1), // Minden elemet egy szál + cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul + NULL, // + Event); + + gpuResult.resize(result_size, 0); + +} + +void Compact::cpu_compute() +{ + for (size_t i = 0; i < data_size; i++) { + if (sourceData[i] < limit) { + cpuResult.push_back(sourceData[i]); + } + } +} + +void Compact::collect_results(cl::CommandQueue* queue) +{ + queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * result_size, gpuResult.data()); +} + +bool Compact::validate_results() +{ + bool success = true; + for (size_t index = 0; index < result_size; index++) { + if (cpuResult[index] != gpuResult[index]) { + std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << std::endl; + success = false; + } + } + return success; +} + +std::string Compact::description() +{ + return std::string("Compact (data_size=" + std::to_string(data_size) + ")"); +} diff --git a/Primitives/primitives/ExclusiveScan.cpp b/Primitives/primitives/ExclusiveScan.cpp index 121d617..f54e6d1 100644 --- a/Primitives/primitives/ExclusiveScan.cpp +++ b/Primitives/primitives/ExclusiveScan.cpp @@ -5,9 +5,9 @@ ExclusiveScan::ExclusiveScan(size_t max_size) { + // Must be power of 2, breaks above 512 data_size = max_size; - std::random_device rd; std::mt19937 gen(rd()); std::uniform_int_distribution distr(0, 100); @@ -64,4 +64,9 @@ bool ExclusiveScan::validate_results() } } return success; -} \ No newline at end of file +} + +std::string ExclusiveScan::description() +{ + return std::string("ExclusiveScan (data_size=" + std::to_string(data_size) + ")"); +} diff --git a/Primitives/primitives/Histogram.cpp b/Primitives/primitives/Histogram.cpp index e79fa7a..5ae9553 100644 --- a/Primitives/primitives/Histogram.cpp +++ b/Primitives/primitives/Histogram.cpp @@ -80,11 +80,17 @@ bool Histogram::validate_results() return false; } } + return true; +} + +std::string Histogram::description() +{ + std::string type; if (global) { - std::cout << "Test \"Histogram - global\" completed, set: " << valueSet << ", size: " << data_size << std::endl; - } + type = "gobal"; + } else { - std::cout << "Test \"Histogram - local\" completed, set: " << valueSet << ", size: " << data_size << std::endl; + type = "local"; } - return true; -} \ No newline at end of file + return std::string("Histogram (type=" + type +",data_size=" + std::to_string(data_size) + ",valueSet=" + std::to_string(valueSet) + ")"); +} diff --git a/Primitives/primitives/ReduceAdd.cpp b/Primitives/primitives/ReduceAdd.cpp index 30ea13b..50e35f1 100644 --- a/Primitives/primitives/ReduceAdd.cpp +++ b/Primitives/primitives/ReduceAdd.cpp @@ -47,7 +47,6 @@ void ReduceAdd::cpu_compute() for (size_t index = 0; index < data_size; ++index) { cpuResult += sourceData[index]; } - std::cout << "CPU result is " << std::setprecision(12) << cpuResult << std::endl; } void ReduceAdd::collect_results(cl::CommandQueue* queue) @@ -57,5 +56,17 @@ void ReduceAdd::collect_results(cl::CommandQueue* queue) bool ReduceAdd::validate_results() { - return abs(cpuResult - gpuResult[0]) < 0.3f; -} \ No newline at end of file + float diff = abs(cpuResult - gpuResult[0]); + if (diff < 0.3f) { + return true; + } + else + { + std::cout << "Wrong result: " << cpuResult << "!=" << gpuResult[0] << ", diff is " < gpuResult; + std::vector sourceData; + std::vector cpuResult; + int limit; + int result_size; +public: + Compact(size_t _data_size); + void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event); + void cpu_compute(); + void collect_results(cl::CommandQueue* queue); + bool validate_results(); + std::string description(); }; \ No newline at end of file diff --git a/kernels/programs.cl b/kernels/programs.cl index aa0987e..d2a2385 100644 --- a/kernels/programs.cl +++ b/kernels/programs.cl @@ -128,7 +128,13 @@ void exscan_global(__global int* data) __kernel void compact_predicate(__global int* data, __global int* pred) { - + int id = get_global_id(0); + if (data[id] < 50) { + pred[id] = 1; + } + else { + pred[id] = 0; + } } // TODO @@ -137,7 +143,29 @@ void compact_predicate(__global int* data, __global int* pred) __kernel void compact_exscan(__global int* pred, __global int* prefSum) { + int id = get_global_id(0); + int size = get_global_size(0); + int val; + if (id > 0) { + val = pred[id - 1]; + } + else { + val = 0; + } + barrier(CLK_LOCAL_MEM_FENCE); + prefSum[id] = val; + + + for (int s = 1; s < size; s *= 2) { + int tmp = prefSum[id]; + if (id + s < size) { + prefSum[id + s] += prefSum[id]; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + // First element of array will store length + if (id == 0) prefSum[0] = prefSum[size-1]; } // TODO @@ -150,5 +178,11 @@ void compact_exscan(__global int* pred, __global int* prefSum) __kernel void compact_compact(__global int* data, __global int* pred, __global int* prefSum) { - + int id = get_global_id(0); + int tmp = data[id]; + barrier(CLK_LOCAL_MEM_FENCE); + if (pred[id] == 1) { + // IF id == 0, fill data[0], see line #168 + data[!!id * prefSum[id]] = tmp; + } } \ No newline at end of file