10 changed files with 299 additions and 62 deletions
@ -0,0 +1,90 @@ |
|||||
|
#include <iostream> |
||||
|
#include "../Common.h" |
||||
|
#include "Tests.h" |
||||
|
#include <random> |
||||
|
|
||||
|
Histogram::Histogram(bool _global, int _valueSet, int _data_size) |
||||
|
{ |
||||
|
global = _global; |
||||
|
valueSet = _valueSet; |
||||
|
data_size = _data_size; |
||||
|
|
||||
|
std::random_device rd; |
||||
|
std::mt19937 gen(rd()); |
||||
|
std::uniform_int_distribution<int> distr(0, valueSet-1); |
||||
|
|
||||
|
for (size_t index = 0; index < data_size; ++index) { |
||||
|
sourceData.push_back(distr(gen)); |
||||
|
} |
||||
|
cpuResult.resize(valueSet, 0); |
||||
|
gpuResult.resize(valueSet, 0); |
||||
|
} |
||||
|
|
||||
|
void Histogram::collect_results(cl::CommandQueue* queue) |
||||
|
{ |
||||
|
queue->enqueueReadBuffer(clResultBuffer, true, 0, sizeof(int) * valueSet, gpuResult.data()); |
||||
|
} |
||||
|
|
||||
|
void Histogram::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) |
||||
|
{ |
||||
|
cl_int err = CL_SUCCESS; |
||||
|
|
||||
|
// Get the kernel handle
|
||||
|
cl::Kernel kernel; |
||||
|
if (global) { |
||||
|
kernel = cl::Kernel(*program, "histogram_global", &err); |
||||
|
} |
||||
|
else { |
||||
|
kernel = cl::Kernel(*program, "histogram_local", &err); |
||||
|
} |
||||
|
CheckCLError(err); |
||||
|
|
||||
|
clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); |
||||
|
queue->enqueueWriteBuffer(clInputBuffer, |
||||
|
true, // Blocking!
|
||||
|
0, sizeof(int) * data_size, sourceData.data()); |
||||
|
CheckCLError(err); |
||||
|
// Allocate the output data
|
||||
|
clResultBuffer = cl::Buffer(*context, CL_MEM_WRITE_ONLY, sizeof(int) * valueSet, NULL, &err); |
||||
|
CheckCLError(err); |
||||
|
// Set the kernel parameters
|
||||
|
kernel.setArg(0, clInputBuffer); // kernel FV paraméterei sorrendben
|
||||
|
kernel.setArg(1, clResultBuffer); |
||||
|
if (!global) { |
||||
|
kernel.setArg(2, sizeof(int) * valueSet, NULL); |
||||
|
kernel.setArg(3, valueSet); |
||||
|
} |
||||
|
|
||||
|
// Enqueue the kernel
|
||||
|
queue->enqueueNDRangeKernel(kernel, |
||||
|
cl::NullRange, // Indexek nem eloffszetelve
|
||||
|
cl::NDRange(data_size, 1), // Minden elemet egy szál
|
||||
|
cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
|
||||
|
NULL, //
|
||||
|
Event); |
||||
|
} |
||||
|
|
||||
|
void Histogram::cpu_compute() |
||||
|
{ |
||||
|
|
||||
|
for (size_t index = 0; index < data_size; ++index) { |
||||
|
cpuResult[sourceData[index]] = cpuResult[sourceData[index]] + 1; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
bool Histogram::validate_results() |
||||
|
{ |
||||
|
for (size_t index = 0; index < valueSet; index++) { |
||||
|
if (cpuResult[index] != gpuResult[index]) { |
||||
|
std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << std::endl; |
||||
|
return false; |
||||
|
} |
||||
|
} |
||||
|
if (global) { |
||||
|
std::cout << "Test \"Histogram - global\" completed, set: " << valueSet << ", size: " << data_size << std::endl; |
||||
|
} |
||||
|
else { |
||||
|
std::cout << "Test \"Histogram - local\" completed, set: " << valueSet << ", size: " << data_size << std::endl; |
||||
|
} |
||||
|
return true; |
||||
|
} |
@ -0,0 +1,62 @@ |
|||||
|
#include <iostream> |
||||
|
#include <iomanip> |
||||
|
#include "../Common.h" |
||||
|
#include "Tests.h" |
||||
|
#include <random> |
||||
|
#include <stdlib.h> |
||||
|
|
||||
|
ReduceAdd::ReduceAdd(size_t max_size) |
||||
|
{ |
||||
|
data_size = max_size; |
||||
|
cpuResult = 0.0f; |
||||
|
const float vmax = 1000.0f; |
||||
|
std::random_device rd; |
||||
|
std::mt19937 gen(rd()); |
||||
|
std::uniform_real_distribution<float> distr(0.0f, vmax); |
||||
|
|
||||
|
for (size_t index = 0; index < data_size; ++index) |
||||
|
{ |
||||
|
float val = distr(gen); |
||||
|
gpuResult.push_back(val); |
||||
|
sourceData.push_back(val); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void ReduceAdd::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) |
||||
|
{ |
||||
|
cl_int err = CL_SUCCESS; |
||||
|
cl::Kernel kernel = cl::Kernel(*program, "reduce_global", &err); |
||||
|
CheckCLError(err); |
||||
|
|
||||
|
clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); |
||||
|
CheckCLError(err); |
||||
|
queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data()); |
||||
|
|
||||
|
kernel.setArg(0, clInputBuffer); |
||||
|
|
||||
|
queue->enqueueNDRangeKernel(kernel, |
||||
|
cl::NullRange, // Indexek nem eloffszetelve
|
||||
|
cl::NDRange(data_size, 1), // Minden elemet egy szál
|
||||
|
cl::NDRange(1024, 1), // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
|
||||
|
NULL, //
|
||||
|
Event); |
||||
|
} |
||||
|
|
||||
|
void ReduceAdd::cpu_compute() |
||||
|
{ |
||||
|
for (size_t index = 0; index < data_size; ++index) { |
||||
|
cpuResult += sourceData[index]; |
||||
|
} |
||||
|
std::cout << "CPU result is " << std::setprecision(12) << cpuResult << std::endl; |
||||
|
} |
||||
|
|
||||
|
void ReduceAdd::collect_results(cl::CommandQueue* queue) |
||||
|
{ |
||||
|
queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data()); |
||||
|
std::cout << "Results grabbed" << std::endl; |
||||
|
} |
||||
|
|
||||
|
bool ReduceAdd::validate_results() |
||||
|
{ |
||||
|
return abs(cpuResult - gpuResult[0]) < 0.3f; |
||||
|
} |
Loading…
Reference in new issue