diff --git a/Primitives/Primitives.cpp b/Primitives/Primitives.cpp
index 6879d95..10abf0e 100644
--- a/Primitives/Primitives.cpp
+++ b/Primitives/Primitives.cpp
@@ -62,10 +62,11 @@ int main()
Square s;
Histogram h(false, 32, 4096);
ReduceAdd r1(handler.get_max_size());
+ ExclusiveScan e(8);
//handler.run_test(&s);
//handler.run_test(&h);
- if (handler.run_test(&r1)) {
+ if (handler.run_test(&e)) {
std::cout << "Success" << std::endl;
}
else {
diff --git a/Primitives/Primitives.vcxproj b/Primitives/Primitives.vcxproj
index 5bf5df6..66a0919 100644
--- a/Primitives/Primitives.vcxproj
+++ b/Primitives/Primitives.vcxproj
@@ -80,6 +80,7 @@
+
diff --git a/Primitives/Primitives.vcxproj.filters b/Primitives/Primitives.vcxproj.filters
index 27fc81f..0d7c192 100644
--- a/Primitives/Primitives.vcxproj.filters
+++ b/Primitives/Primitives.vcxproj.filters
@@ -50,6 +50,9 @@
Source Files
+
+ Source Files
+
diff --git a/Primitives/primitives/ExclusiveScan.cpp b/Primitives/primitives/ExclusiveScan.cpp
new file mode 100644
index 0000000..121d617
--- /dev/null
+++ b/Primitives/primitives/ExclusiveScan.cpp
@@ -0,0 +1,67 @@
+#include
+#include "../Common.h"
+#include "Tests.h"
+#include
+
+ExclusiveScan::ExclusiveScan(size_t max_size)
+{
+ data_size = max_size;
+
+
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_int_distribution distr(0, 100);
+ for (size_t index = 0; index < data_size; ++index) {
+ int val = distr(gen);
+ sourceData.push_back(val);
+ gpuResult.push_back(val);
+ }
+}
+
+void ExclusiveScan::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event)
+{
+ cl_int err = CL_SUCCESS;
+ cl::Kernel kernel = cl::Kernel(*program, "exscan_global", &err);
+ CheckCLError(err);
+
+ clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err);
+ CheckCLError(err);
+ queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data());
+
+ kernel.setArg(0, clInputBuffer);
+
+ queue->enqueueNDRangeKernel(kernel,
+ cl::NullRange, // Indexek nem eloffszetelve
+ cl::NDRange(data_size, 1), // Minden elemet egy szál
+ cl::NDRange(data_size, 1), // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
+ NULL, //
+ Event);
+}
+
+void ExclusiveScan::cpu_compute()
+{
+ for (size_t i = 0; i < data_size; i++) {
+ int tmp = 0;
+ for (size_t j = 0; j < i; j++) {
+ tmp += sourceData[j];
+ }
+ cpuResult.push_back(tmp);
+ }
+}
+
+void ExclusiveScan::collect_results(cl::CommandQueue* queue)
+{
+ queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data());
+}
+
+bool ExclusiveScan::validate_results()
+{
+ bool success = true;
+ for (size_t index = 0; index < data_size; index++) {
+ if (cpuResult[index] != gpuResult[index]) {
+ std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << std::endl;
+ success = false;
+ }
+ }
+ return success;
+}
\ No newline at end of file
diff --git a/Primitives/primitives/ReduceAdd.cpp b/Primitives/primitives/ReduceAdd.cpp
index b5805cf..30ea13b 100644
--- a/Primitives/primitives/ReduceAdd.cpp
+++ b/Primitives/primitives/ReduceAdd.cpp
@@ -53,7 +53,6 @@ void ReduceAdd::cpu_compute()
void ReduceAdd::collect_results(cl::CommandQueue* queue)
{
queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data());
- std::cout << "Results grabbed" << std::endl;
}
bool ReduceAdd::validate_results()
diff --git a/Primitives/primitives/Tests.h b/Primitives/primitives/Tests.h
index d3898ad..55c7e1f 100644
--- a/Primitives/primitives/Tests.h
+++ b/Primitives/primitives/Tests.h
@@ -58,4 +58,17 @@ public:
void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event);
void cpu_compute();
bool validate_results();
+};
+
+class ExclusiveScan : public TestCase {
+private:
+ std::vector gpuResult;
+ std::vector sourceData;
+ std::vector cpuResult;
+public:
+ ExclusiveScan(size_t max_size);
+ void collect_results(cl::CommandQueue* queue);
+ void gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event);
+ void cpu_compute();
+ bool validate_results();
};
\ No newline at end of file
diff --git a/kernels/programs.cl b/kernels/programs.cl
index 16a8267..aa0987e 100644
--- a/kernels/programs.cl
+++ b/kernels/programs.cl
@@ -96,14 +96,27 @@ __kernel
void exscan_global(__global int* data)
{
int id = get_global_id(0);
+ int size = get_global_size(0);
+
+ int val;
if (id > 0) {
- data[id] = data[ID - 1];
+ val = data[id - 1];
}
else {
- data[id] = 0;
+ val = 0;
}
- barrier(CLK_GLOBAL_MEM_FENCE);
+ barrier(CLK_LOCAL_MEM_FENCE);
+ data[id] = val;
+
+ for (int s = 1; s < size; s *= 2) {
+ int tmp = data[id];
+ if (id + s < size) {
+ data[id + s] += data[id];
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+ if (id == 0) data[id] = 0;
}
// TODO