#include #include #include "Common.h" #include "PrimitiveTests.h" #include #include ReduceAdd::ReduceAdd(size_t max_size) { data_size = max_size; cpuResult = 0.0f; const float vmax = 1000.0f; std::random_device rd; std::mt19937 gen(rd()); std::uniform_real_distribution distr(0.0f, vmax); for (size_t index = 0; index < data_size; ++index) { float val = distr(gen); gpuResult.push_back(val); sourceData.push_back(val); } } void ReduceAdd::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event) { cl_int err = CL_SUCCESS; cl::Kernel kernel = cl::Kernel(*program, "reduce_global", &err); CheckCLError(err); clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err); CheckCLError(err); queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data()); kernel.setArg(0, clInputBuffer); queue->enqueueNDRangeKernel(kernel, cl::NullRange, // Indexek nem eloffszetelve cl::NDRange(data_size, 1), // Minden elemet egy szál cl::NDRange(1024, 1), // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul NULL, // Event); } void ReduceAdd::cpu_compute() { cpuResult = 0; for (size_t index = 0; index < data_size; ++index) { cpuResult += sourceData[index]; } } void ReduceAdd::collect_results(cl::CommandQueue* queue) { queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, gpuResult.data()); } bool ReduceAdd::validate_results() { float diff = abs(cpuResult - gpuResult[0]); if (diff < 0.3f) { return true; } else { std::cout << "Wrong result: " << cpuResult << "!=" << gpuResult[0] << ", diff is " <