diff --git a/Primitives/Common.cpp b/Primitives/Common.cpp index dba97d2..97bc884 100644 --- a/Primitives/Common.cpp +++ b/Primitives/Common.cpp @@ -201,4 +201,30 @@ bool CheckCLError(cl_int err) } return true; -} \ No newline at end of file +} + +void Timer::start() +{ + t_start = std::chrono::high_resolution_clock::now(); +} + +void Timer::end(unsigned int nRuns) +{ + auto t_end = std::chrono::high_resolution_clock::now(); + std::cout << "CPU [time] " << + std::chrono::duration_cast(t_end - t_start).count() / 1e+06 + / nRuns << " ms" << std::endl; +} + +void Timer::measure(const std::function& program, unsigned int nRuns) +{ + start(); + for (unsigned int i = 0; i < nRuns; ++i) + { + program(); + } + end(nRuns); + +} + +std::chrono::time_point Timer::t_start; diff --git a/Primitives/Common.h b/Primitives/Common.h index d90f608..b99f766 100644 --- a/Primitives/Common.h +++ b/Primitives/Common.h @@ -1,8 +1,19 @@ #pragma once +#include +#include #include "cl.hpp" void printTimeStats(cl_event event); void WriteTGA_RGB(const char* filename, unsigned char* data, unsigned int width, unsigned int height); std::string FileToString(const std::string& path); const char* getErrorString(cl_int error); -bool CheckCLError(cl_int err); \ No newline at end of file +bool CheckCLError(cl_int err); + +class Timer { +private: + static std::chrono::time_point t_start; +public: + static void start(); + static void end(unsigned int nRuns = 1); + static void measure(const std::function& program, unsigned int nRuns = 10000); +}; \ No newline at end of file diff --git a/Primitives/OpenCLHandler.cpp b/Primitives/OpenCLHandler.cpp index dbeb35e..a796e79 100644 --- a/Primitives/OpenCLHandler.cpp +++ b/Primitives/OpenCLHandler.cpp @@ -2,8 +2,32 @@ #include "OpenCLHandler.h" #include +void OpenCLHandler::printTimeStats(cl::Event& event) +{ + cl_int err = CL_SUCCESS; + event.wait(); + cl_ulong execStart, execEnd; + execStart = event.getProfilingInfo(&err); + if (err != CL_SUCCESS) + { + std::cerr << "Error during profile query: CL_PROFILING_COMMAND_START [" + << err << "]." << std::endl; + } + execEnd = event.getProfilingInfo(&err); + if (err != CL_SUCCESS) + { + std::cerr << "Error during profile query: CL_PROFILING_COMMAND_END [" + << err << "]." << std::endl; + } + //std::cout << "[start] " << execStart << " [end] " << execEnd + // << " [time] " << (execEnd - execStart) / 1e+06 << "ms." << std::endl; + std::cout << "GPU [time] " << (execEnd - execStart) / 1e+06 << " ms" << + std::endl; +} + OpenCLHandler::OpenCLHandler() { + cl_int err = CL_SUCCESS; // Get a platform ID @@ -15,7 +39,7 @@ OpenCLHandler::OpenCLHandler() exit(-1); } - std::cout << platforms[0].getInfo() << std::endl; + std::cout << "Running on: " << platforms[0].getInfo() << std::endl; // Create a context cl_context_properties properties[] = @@ -24,13 +48,14 @@ OpenCLHandler::OpenCLHandler() // Enumerate the devices std::vector devices = context.getInfo(); - std::cout << devices[0].getInfo() << std::endl; + std::cout << "Global memory: " << devices[0].getInfo() << std::endl; max_workgroup_size = devices[0].getInfo(); + std::cout << "Max workgroup: " << max_workgroup_size << std::endl << std::endl; // Create the command queue cl::Event event; - queue = cl::CommandQueue(context, devices[0], 0, &err); + queue = cl::CommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE, &err); // Create the OpenCL program std::string programSource = FileToString("../kernels/programs.cl"); program = cl::Program(context, programSource); @@ -39,13 +64,15 @@ OpenCLHandler::OpenCLHandler() bool OpenCLHandler::run_test(TestCase* test) { - cl::Event event; + cl::Event gpuEvent; - test->gpu_compute(&context, &queue, &program, &event); + test->gpu_compute(&context, &queue, &program, &gpuEvent); - test->cpu_compute(); + Timer::measure([&]() { + test->cpu_compute(); + }, 5); - event.wait(); + printTimeStats(gpuEvent); test->collect_results(&queue); diff --git a/Primitives/OpenCLHandler.h b/Primitives/OpenCLHandler.h index 4094b25..55e4d09 100644 --- a/Primitives/OpenCLHandler.h +++ b/Primitives/OpenCLHandler.h @@ -8,6 +8,7 @@ private: cl::Context context; cl::CommandQueue queue; size_t max_workgroup_size; + void printTimeStats(cl::Event& event); public: OpenCLHandler(); bool run_test(TestCase* test); diff --git a/Primitives/Primitives.cpp b/Primitives/Primitives.cpp index e03dd26..80c38d1 100644 --- a/Primitives/Primitives.cpp +++ b/Primitives/Primitives.cpp @@ -60,6 +60,7 @@ void add_tests(std::vector* tests) { int main() { capi(); + OpenCLHandler handler; std::vector tests; @@ -67,23 +68,24 @@ int main() Histogram h1(false, 32, 4096); Histogram h2(true, 32, 4096); ReduceAdd r1(handler.get_max_size()); - ExclusiveScan e(512); Compact c(1024); + ExclusiveScan e(handler.get_max_size()); tests.push_back(&s); tests.push_back(&h1); tests.push_back(&h2); tests.push_back(&r1); - tests.push_back(&e); tests.push_back(&c); + tests.push_back(&e); for (size_t i = 0; i < tests.size(); i++) { + std::cout << tests[i]->description() << std::endl; if (handler.run_test(tests[i])) { - std::cout << tests[i]->description() << " - Success" << std::endl; + std::cout << " Success" << std::endl << std::endl; } else { - std::cout << tests[i]->description() << " - Failure" << std::endl; + std::cout << " Failure" << std::endl << std::endl; } } return 0; diff --git a/Primitives/primitives/ExclusiveScan.cpp b/Primitives/primitives/ExclusiveScan.cpp index f54e6d1..0636fdc 100644 --- a/Primitives/primitives/ExclusiveScan.cpp +++ b/Primitives/primitives/ExclusiveScan.cpp @@ -59,7 +59,8 @@ bool ExclusiveScan::validate_results() bool success = true; for (size_t index = 0; index < data_size; index++) { if (cpuResult[index] != gpuResult[index]) { - std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << std::endl; + int diff = abs(cpuResult[index] - gpuResult[index]); + std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << ", diff: " << diff << std::endl; success = false; } } diff --git a/Primitives/primitives/Histogram.cpp b/Primitives/primitives/Histogram.cpp index 5ae9553..c1674da 100644 --- a/Primitives/primitives/Histogram.cpp +++ b/Primitives/primitives/Histogram.cpp @@ -16,7 +16,6 @@ Histogram::Histogram(bool _global, int _valueSet, int _data_size) for (size_t index = 0; index < data_size; ++index) { sourceData.push_back(distr(gen)); } - cpuResult.resize(valueSet, 0); gpuResult.resize(valueSet, 0); } @@ -66,7 +65,8 @@ void Histogram::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::P void Histogram::cpu_compute() { - + cpuResult.resize(0, 0); + cpuResult.resize(valueSet, 0); for (size_t index = 0; index < data_size; ++index) { cpuResult[sourceData[index]] = cpuResult[sourceData[index]] + 1; } diff --git a/Primitives/primitives/ReduceAdd.cpp b/Primitives/primitives/ReduceAdd.cpp index 50e35f1..90d0c36 100644 --- a/Primitives/primitives/ReduceAdd.cpp +++ b/Primitives/primitives/ReduceAdd.cpp @@ -44,6 +44,7 @@ void ReduceAdd::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::P void ReduceAdd::cpu_compute() { + cpuResult = 0; for (size_t index = 0; index < data_size; ++index) { cpuResult += sourceData[index]; } @@ -63,6 +64,7 @@ bool ReduceAdd::validate_results() else { std::cout << "Wrong result: " << cpuResult << "!=" << gpuResult[0] << ", diff is " <