diff --git a/Primitives/Primitives.cpp b/Primitives/Primitives.cpp index 6879d95..10abf0e 100644 --- a/Primitives/Primitives.cpp +++ b/Primitives/Primitives.cpp @@ -62,10 +62,11 @@ int main() Square s; Histogram h(false, 32, 4096); ReduceAdd r1(handler.get_max_size()); + ExclusiveScan e(8); //handler.run_test(&s); //handler.run_test(&h); - if (handler.run_test(&r1)) { + if (handler.run_test(&e)) { std::cout << "Success" << std::endl; } else { diff --git a/Primitives/Primitives.vcxproj b/Primitives/Primitives.vcxproj index 5bf5df6..66a0919 100644 --- a/Primitives/Primitives.vcxproj +++ b/Primitives/Primitives.vcxproj @@ -80,6 +80,7 @@ + diff --git a/Primitives/Primitives.vcxproj.filters b/Primitives/Primitives.vcxproj.filters index 27fc81f..0d7c192 100644 --- a/Primitives/Primitives.vcxproj.filters +++ b/Primitives/Primitives.vcxproj.filters @@ -50,6 +50,9 @@ Source Files + + Source Files + diff --git a/Primitives/primitives/ExclusiveScan.cpp b/Primitives/primitives/ExclusiveScan.cpp new file mode 100644 index 0000000..121d617 --- /dev/null +++ b/Primitives/primitives/ExclusiveScan.cpp @@ -0,0 +1,67 @@ +#include +#include "../Common.h" +#include "Tests.h" +#include + +ExclusiveScan::ExclusiveScan(size_t max_size) +{ + data_size = max_size; + + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution distr(0, 100); + for (size_t index = 0; index < data_size; ++index) { + int val = distr(gen); + sourceData.push_back(val); + gpuResult.push_back(val); + } +} + +void ExclusiveScan::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) +{ + cl_int err = CL_SUCCESS; + cl::Kernel kernel = cl::Kernel(*program, "exscan_global", &err); + CheckCLError(err); + + clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); + CheckCLError(err); + queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data()); + + kernel.setArg(0, clInputBuffer); + + queue->enqueueNDRangeKernel(kernel, + cl::NullRange, // Indexek nem eloffszetelve + cl::NDRange(data_size, 1), // Minden elemet egy szál + cl::NDRange(data_size, 1), // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul + NULL, // + Event); +} + +void ExclusiveScan::cpu_compute() +{ + for (size_t i = 0; i < data_size; i++) { + int tmp = 0; + for (size_t j = 0; j < i; j++) { + tmp += sourceData[j]; + } + cpuResult.push_back(tmp); + } +} + +void ExclusiveScan::collect_results(cl::CommandQueue* queue) +{ + queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data()); +} + +bool ExclusiveScan::validate_results() +{ + bool success = true; + for (size_t index = 0; index < data_size; index++) { + if (cpuResult[index] != gpuResult[index]) { + std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << std::endl; + success = false; + } + } + return success; +} \ No newline at end of file diff --git a/Primitives/primitives/ReduceAdd.cpp b/Primitives/primitives/ReduceAdd.cpp index b5805cf..30ea13b 100644 --- a/Primitives/primitives/ReduceAdd.cpp +++ b/Primitives/primitives/ReduceAdd.cpp @@ -53,7 +53,6 @@ void ReduceAdd::cpu_compute() void ReduceAdd::collect_results(cl::CommandQueue* queue) { queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data()); - std::cout << "Results grabbed" << std::endl; } bool ReduceAdd::validate_results() diff --git a/Primitives/primitives/Tests.h b/Primitives/primitives/Tests.h index d3898ad..55c7e1f 100644 --- a/Primitives/primitives/Tests.h +++ b/Primitives/primitives/Tests.h @@ -58,4 +58,17 @@ public: void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event); void cpu_compute(); bool validate_results(); +}; + +class ExclusiveScan : public TestCase { +private: + std::vector gpuResult; + std::vector sourceData; + std::vector cpuResult; +public: + ExclusiveScan(size_t max_size); + void collect_results(cl::CommandQueue* queue); + void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event); + void cpu_compute(); + bool validate_results(); }; \ No newline at end of file diff --git a/kernels/programs.cl b/kernels/programs.cl index 16a8267..aa0987e 100644 --- a/kernels/programs.cl +++ b/kernels/programs.cl @@ -96,14 +96,27 @@ __kernel void exscan_global(__global int* data) { int id = get_global_id(0); + int size = get_global_size(0); + + int val; if (id > 0) { - data[id] = data[ID - 1]; + val = data[id - 1]; } else { - data[id] = 0; + val = 0; } - barrier(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + data[id] = val; + + for (int s = 1; s < size; s *= 2) { + int tmp = data[id]; + if (id + s < size) { + data[id + s] += data[id]; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + if (id == 0) data[id] = 0; } // TODO