10 changed files with 229 additions and 23 deletions
@ -0,0 +1,107 @@ |
|||
#include <iostream> |
|||
#include "../Common.h" |
|||
#include "Tests.h" |
|||
#include <random> |
|||
|
|||
Compact::Compact(size_t _data_size) |
|||
{ |
|||
data_size = _data_size; |
|||
limit = 50; |
|||
|
|||
std::random_device rd; |
|||
std::mt19937 gen(rd()); |
|||
std::uniform_int_distribution<int> distr(0, 100); |
|||
for (size_t index = 0; index < data_size; ++index) { |
|||
int val = distr(gen); |
|||
sourceData.push_back(val); |
|||
//gpuResult.push_back(val);
|
|||
} |
|||
gpuResult.resize(data_size, 0); |
|||
} |
|||
|
|||
void Compact::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) |
|||
{ |
|||
cl_int err = CL_SUCCESS; |
|||
cl::Event inner_event; |
|||
cl::Kernel kernel_predicate = cl::Kernel(*program, "compact_predicate", &err); |
|||
CheckCLError(err); |
|||
cl::Kernel kernel_exscan = cl::Kernel(*program, "compact_exscan", &err); |
|||
CheckCLError(err); |
|||
cl::Kernel kernel_compact = cl::Kernel(*program, "compact_compact", &err); |
|||
CheckCLError(err); |
|||
|
|||
clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); |
|||
clResultBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); |
|||
|
|||
cl::Buffer pred(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); |
|||
cl::Buffer prefSum(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); |
|||
|
|||
queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, sourceData.data()); |
|||
|
|||
kernel_predicate.setArg(0, clInputBuffer); |
|||
kernel_predicate.setArg(1, pred); |
|||
|
|||
queue->enqueueNDRangeKernel(kernel_predicate, |
|||
cl::NullRange, // Indexek nem eloffszetelve
|
|||
cl::NDRange(data_size, 1), // Minden elemet egy szál
|
|||
cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
|
|||
NULL, //
|
|||
&inner_event); |
|||
inner_event.wait(); |
|||
|
|||
kernel_exscan.setArg(0, pred); |
|||
kernel_exscan.setArg(1, prefSum); |
|||
queue->enqueueNDRangeKernel(kernel_exscan, |
|||
cl::NullRange, // Indexek nem eloffszetelve
|
|||
cl::NDRange(data_size, 1), // Minden elemet egy szál
|
|||
cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
|
|||
NULL, //
|
|||
&inner_event); |
|||
inner_event.wait(); |
|||
|
|||
queue->enqueueReadBuffer(prefSum, true, 0, sizeof(int), &result_size); |
|||
|
|||
kernel_compact.setArg(0, clInputBuffer); |
|||
kernel_compact.setArg(1, pred); |
|||
kernel_compact.setArg(2, prefSum); |
|||
queue->enqueueNDRangeKernel(kernel_compact, |
|||
cl::NullRange, // Indexek nem eloffszetelve
|
|||
cl::NDRange(data_size, 1), // Minden elemet egy szál
|
|||
cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
|
|||
NULL, //
|
|||
Event); |
|||
|
|||
gpuResult.resize(result_size, 0); |
|||
|
|||
} |
|||
|
|||
void Compact::cpu_compute() |
|||
{ |
|||
for (size_t i = 0; i < data_size; i++) { |
|||
if (sourceData[i] < limit) { |
|||
cpuResult.push_back(sourceData[i]); |
|||
} |
|||
} |
|||
} |
|||
|
|||
void Compact::collect_results(cl::CommandQueue* queue) |
|||
{ |
|||
queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * result_size, gpuResult.data()); |
|||
} |
|||
|
|||
bool Compact::validate_results() |
|||
{ |
|||
bool success = true; |
|||
for (size_t index = 0; index < result_size; index++) { |
|||
if (cpuResult[index] != gpuResult[index]) { |
|||
std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << std::endl; |
|||
success = false; |
|||
} |
|||
} |
|||
return success; |
|||
} |
|||
|
|||
std::string Compact::description() |
|||
{ |
|||
return std::string("Compact (data_size=" + std::to_string(data_size) + ")"); |
|||
} |
Loading…
Reference in new issue