#include <iostream>
#include "Common.h"
#include "PrimitiveTests.h"
#include <random>

Compact::Compact(size_t _data_size)
{
	data_size = _data_size;
	limit = 50;
	result_size = 0;
	
	std::random_device rd;
	std::mt19937 gen(rd());
	std::uniform_int_distribution<int> distr(0, 100);
	for (size_t index = 0; index < data_size; ++index) {
		int val = distr(gen);
		sourceData.push_back(val);
		//gpuResult.push_back(val);
	}
	gpuResult.resize(data_size, 0);
}

void Compact::gpu_compute(cl::Context* context, cl::CommandQueue* queue, cl::Program* program, cl::Event* Event)
{
	cl_int err = CL_SUCCESS;
	cl::Event inner_event;
	cl::Kernel kernel_predicate = cl::Kernel(*program, "compact_predicate", &err);
	CheckCLError(err);
	cl::Kernel kernel_exscan = cl::Kernel(*program, "compact_exscan", &err);
	CheckCLError(err);
	cl::Kernel kernel_compact = cl::Kernel(*program, "compact_compact", &err);
	CheckCLError(err);

	clInputBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err);
	clResultBuffer = cl::Buffer(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err);

	cl::Buffer pred(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err);
	cl::Buffer prefSum(*context, CL_MEM_READ_ONLY, sizeof(int) * data_size, NULL, &err);

	queue->enqueueWriteBuffer(clInputBuffer, true, 0, sizeof(int) * data_size, sourceData.data());

	kernel_predicate.setArg(0, clInputBuffer);
	kernel_predicate.setArg(1, pred);

	queue->enqueueNDRangeKernel(kernel_predicate,
		cl::NullRange,				// Indexek nem eloffszetelve
		cl::NDRange(data_size, 1),	// Minden elemet egy sz�l
		cl::NullRange,				// Workgroup m�ret? - ez az auto, ha nem indul, 1024-re, onnan cs�kkent, amig elindul
		NULL,						// 
		&inner_event);
	inner_event.wait();

	kernel_exscan.setArg(0, pred);
	kernel_exscan.setArg(1, prefSum);
	queue->enqueueNDRangeKernel(kernel_exscan,
		cl::NullRange,				// Indexek nem eloffszetelve
		cl::NDRange(data_size, 1),	// Minden elemet egy sz�l
		cl::NullRange,				// Workgroup m�ret? - ez az auto, ha nem indul, 1024-re, onnan cs�kkent, amig elindul
		NULL,						// 
		&inner_event);
	inner_event.wait();
	
	queue->enqueueReadBuffer(prefSum, true, 0, sizeof(int), &result_size);

	kernel_compact.setArg(0, clInputBuffer);
	kernel_compact.setArg(1, pred);
	kernel_compact.setArg(2, prefSum);
	queue->enqueueNDRangeKernel(kernel_compact,
		cl::NullRange,				// Indexek nem eloffszetelve
		cl::NDRange(data_size, 1),	// Minden elemet egy sz�l
		cl::NullRange,				// Workgroup m�ret? - ez az auto, ha nem indul, 1024-re, onnan cs�kkent, amig elindul
		NULL,						// 
		Event);

	gpuResult.resize(result_size, 0);

}

void Compact::cpu_compute()
{
	for (size_t i = 0; i < data_size; i++) {
		if (sourceData[i] < limit) {
			cpuResult.push_back(sourceData[i]);
		}
	}
}

void Compact::collect_results(cl::CommandQueue* queue)
{
	queue->enqueueReadBuffer(clInputBuffer, true, 0, sizeof(int) * result_size, gpuResult.data());
}

bool Compact::validate_results()
{
	bool success = true;
	for (size_t index = 0; index < result_size; index++) {
		if (cpuResult[index] != gpuResult[index]) {
			std::cout << "Wrong result at [" << index << "]: " << gpuResult[index] << "!=" << cpuResult[index] << std::endl;
			success = false;
		}
	}
	return success;
}

std::string Compact::description()
{
	return std::string("Compact (data_size=" + std::to_string(data_size) + ")");
}