Browse Source

Add project files.

master
trinitas 3 years ago
parent
commit
6bdcc65404
  1. 20
      Primitives.sln
  2. 203
      Primitives/Common.h
  3. 187
      Primitives/Primitives.cpp
  4. 86
      Primitives/Primitives.vcxproj
  5. 38
      Primitives/Primitives.vcxproj.filters
  6. 12936
      Primitives/cl.hpp
  7. 109
      kernels/programs.cl

20
Primitives.sln

@ -0,0 +1,20 @@

Microsoft Visual Studio Solution File, Format Version 11.00
# Visual Studio 2010
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Primitives", "Primitives\Primitives.vcxproj", "{54072875-46AD-4C09-AE1A-2E7E70B30414}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{54072875-46AD-4C09-AE1A-2E7E70B30414}.Debug|Win32.ActiveCfg = Debug|Win32
{54072875-46AD-4C09-AE1A-2E7E70B30414}.Debug|Win32.Build.0 = Debug|Win32
{54072875-46AD-4C09-AE1A-2E7E70B30414}.Release|Win32.ActiveCfg = Release|Win32
{54072875-46AD-4C09-AE1A-2E7E70B30414}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

203
Primitives/Common.h

@ -0,0 +1,203 @@
#pragma once
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <sstream>
#include "cl.hpp"
#pragma warning( disable : 4996 )
void printTimeStats(cl_event event)
{
cl_int err = CL_SUCCESS;
if(event == NULL)
{
std::cerr << "No event object returned!" << std::endl;
}
else
{
clWaitForEvents(1, &event);
}
cl_ulong execStart, execEnd;
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
sizeof(cl_ulong), &execStart, NULL);
if(err != CL_SUCCESS)
{
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_START [" << err << "]." << std::endl;
}
err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &execEnd, NULL);
if(err != CL_SUCCESS)
{
std::cerr << "Error during profile query: CL_PROFILING_COMMAND_END [" << err << "]." << std::endl;
}
std::cout << "[start] " << execStart << " [end] " << execEnd << " [time] " << (execEnd - execStart) / 1e+06 << "ms." << std::endl;
}
void WriteTGA_RGB(const char* filename, unsigned char* data, unsigned int width, unsigned int height)
{
FILE *f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Unable to create output TGA image `%s'\n", filename);
exit(EXIT_FAILURE);
}
fputc(0x00, f); /* ID Length, 0 => No ID */
fputc(0x00, f); /* Color Map Type, 0 => No color map included */
fputc(0x02, f); /* Image Type, 2 => Uncompressed, True-color Image */
fputc(0x00, f); /* Next five bytes are about the color map entries */
fputc(0x00, f); /* 2 bytes Index, 2 bytes length, 1 byte size */
fputc(0x00, f);
fputc(0x00, f);
fputc(0x00, f);
fputc(0x00, f); /* X-origin of Image */
fputc(0x00, f);
fputc(0x00, f); /* Y-origin of Image */
fputc(0x00, f);
fputc(width & 0xff, f); /* Image Width */
fputc((width >> 8) & 0xff, f);
fputc(height & 0xff, f); /* Image Height */
fputc((height >> 8) & 0xff, f);
fputc(0x18, f); /* Pixel Depth, 0x18 => 24 Bits */
fputc(0x20, f); /* Image Descriptor */
for (int y = height - 1; y >= 0; y--) {
for (size_t x = 0; x < width; x++) {
const size_t i = (y * width + x) * 3;
fputc(data[i + 2], f); /* write blue */
fputc(data[i + 1], f); /* write green */
fputc(data[i], f); /* write red */
}
}
}
std::string FileToString(const std::string& path) {
std::ifstream file(path, std::ios::in | std::ios::binary);
if (file)
{
std::ostringstream contents;
contents << file.rdbuf();
file.close();
return(contents.str());
}
return std::string();
}
const char *getErrorString(cl_int error)
{
switch (error) {
// run-time and JIT compiler errors
case 0: return "CL_SUCCESS";
case -1: return "CL_DEVICE_NOT_FOUND";
case -2: return "CL_DEVICE_NOT_AVAILABLE";
case -3: return "CL_COMPILER_NOT_AVAILABLE";
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case -5: return "CL_OUT_OF_RESOURCES";
case -6: return "CL_OUT_OF_HOST_MEMORY";
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case -8: return "CL_MEM_COPY_OVERLAP";
case -9: return "CL_IMAGE_FORMAT_MISMATCH";
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case -11: return "CL_BUILD_PROGRAM_FAILURE";
case -12: return "CL_MAP_FAILURE";
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case -15: return "CL_COMPILE_PROGRAM_FAILURE";
case -16: return "CL_LINKER_NOT_AVAILABLE";
case -17: return "CL_LINK_PROGRAM_FAILURE";
case -18: return "CL_DEVICE_PARTITION_FAILED";
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
// compile-time errors
case -30: return "CL_INVALID_VALUE";
case -31: return "CL_INVALID_DEVICE_TYPE";
case -32: return "CL_INVALID_PLATFORM";
case -33: return "CL_INVALID_DEVICE";
case -34: return "CL_INVALID_CONTEXT";
case -35: return "CL_INVALID_QUEUE_PROPERTIES";
case -36: return "CL_INVALID_COMMAND_QUEUE";
case -37: return "CL_INVALID_HOST_PTR";
case -38: return "CL_INVALID_MEM_OBJECT";
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case -40: return "CL_INVALID_IMAGE_SIZE";
case -41: return "CL_INVALID_SAMPLER";
case -42: return "CL_INVALID_BINARY";
case -43: return "CL_INVALID_BUILD_OPTIONS";
case -44: return "CL_INVALID_PROGRAM";
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
case -46: return "CL_INVALID_KERNEL_NAME";
case -47: return "CL_INVALID_KERNEL_DEFINITION";
case -48: return "CL_INVALID_KERNEL";
case -49: return "CL_INVALID_ARG_INDEX";
case -50: return "CL_INVALID_ARG_VALUE";
case -51: return "CL_INVALID_ARG_SIZE";
case -52: return "CL_INVALID_KERNEL_ARGS";
case -53: return "CL_INVALID_WORK_DIMENSION";
case -54: return "CL_INVALID_WORK_GROUP_SIZE";
case -55: return "CL_INVALID_WORK_ITEM_SIZE";
case -56: return "CL_INVALID_GLOBAL_OFFSET";
case -57: return "CL_INVALID_EVENT_WAIT_LIST";
case -58: return "CL_INVALID_EVENT";
case -59: return "CL_INVALID_OPERATION";
case -60: return "CL_INVALID_GL_OBJECT";
case -61: return "CL_INVALID_BUFFER_SIZE";
case -62: return "CL_INVALID_MIP_LEVEL";
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
case -64: return "CL_INVALID_PROPERTY";
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
case -66: return "CL_INVALID_COMPILER_OPTIONS";
case -67: return "CL_INVALID_LINKER_OPTIONS";
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
// extension errors
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
default: return "Unknown OpenCL error";
}
}
bool CheckCLError(cl_int err)
{
if(err != CL_SUCCESS)
{
std::cout << "OpenCL error: " << getErrorString(err) << std::endl;
return false;
}
return true;
}

187
Primitives/Primitives.cpp

@ -0,0 +1,187 @@
// Primitives.cpp : Defines the entry point for the console application.
#include <string>
#include <vector>
#include "Common.h"
// OpenCL C API
#include <CL/opencl.h>
// OpenCL C++ API
#include "cl.hpp"
const size_t dataSize = 4096;
void capi()
{
// Get a platform ID
cl_platform_id platformID;
clGetPlatformIDs(1, &platformID, NULL);
// Get a device ID
cl_device_id deviceID;
clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, NULL);
// Create a context
cl_context context;
cl_context_properties contextProperties[] =
{ CL_CONTEXT_PLATFORM, (cl_context_properties)platformID, 0 };
context = clCreateContext(contextProperties, 1, &deviceID, NULL, NULL, NULL);
// Create a command queue
cl_command_queue queue;
queue = clCreateCommandQueue(context, deviceID, CL_QUEUE_PROFILING_ENABLE, NULL);
// Create an OpenCL program
std::string source = FileToString("../kernels/programs.cl");
const char* csource = source.c_str();
cl_program program = clCreateProgramWithSource(context, 1, &csource, NULL, NULL);
cl_int err = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
cl_uint logLength;
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength);
char* log = new char[logLength];
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, logLength, log, 0);
std::cout << log << std::endl;
delete[] log;
exit(-1);
}
// Get the kernel handle
cl_kernel kernel = clCreateKernel(program, "square", &err);
if(!CheckCLError(err)) exit(-1);
// Allocate and upload the input data
std::vector<float> hostBuffer;
for (size_t index = 0; index < dataSize; ++index)
{
hostBuffer.push_back(static_cast<float>(index));
}
cl_mem inputBuffer;
inputBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * dataSize, NULL, &err);
if (!CheckCLError(err)) exit(-1);
clEnqueueWriteBuffer(queue, inputBuffer, CL_TRUE, 0, sizeof(float) * dataSize, hostBuffer.data(), 0, NULL, NULL);
// Alocate output data
cl_mem outputBuffer;
outputBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * dataSize, NULL, &err);
if (!CheckCLError(err)) exit(-1);
// Set the kernel paramateres
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputBuffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputBuffer);
// Enqueue the kernel
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &dataSize, NULL, 0, NULL, NULL);
// Copy the result back to the host
clEnqueueReadBuffer(queue, outputBuffer, CL_TRUE, 0, sizeof(float) * dataSize, hostBuffer.data(), 0, NULL, NULL);
// Validate the output
for (size_t index = 0; index < dataSize; ++index)
{
if (hostBuffer[index] != index*index)
{
std::cout << "Wrong result at [" << index << "]: " << hostBuffer[index] << "!=" << index*index << std::endl;
break;
}
}
std::cout << "Finished" << std::endl;
}
void cppapi()
{
cl_int err = CL_SUCCESS;
// Get a platform ID
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
if (platforms.size() == 0)
{
std::cout << "Unable to find suitable platform." << std::endl;
exit(-1);
}
std::cout << platforms[0].getInfo<CL_PLATFORM_NAME>() << std::endl;
// Create a context
cl_context_properties properties[] =
{ CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 };
cl::Context context(CL_DEVICE_TYPE_GPU, properties);
// Enumerate the devices
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
std::cout << devices[0].getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>() << std::endl;
// Create the command queue
cl::Event event;
cl::CommandQueue queue(context, devices[0], 0, &err);
// Create the OpenCL program
std::string programSource = FileToString("../kernels/programs.cl");
cl::Program program = cl::Program(context, programSource);
program.build(devices);
// Get the kernel handle
cl::Kernel kernel(program, "histogram_global", &err);
CheckCLError(err);
// Allocate and upload the input data
std::vector<float> hostBuffer;
for (size_t index = 0; index < dataSize; ++index)
{
hostBuffer.push_back(static_cast<float>(index % 32));
}
cl::Buffer clInputBuffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(float) * dataSize, NULL, &err);
queue.enqueueWriteBuffer(clInputBuffer,
true, // Blocking!
0, sizeof(float) * dataSize, hostBuffer.data());
// Allocate the output data
cl::Buffer clResultBuffer = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * 32, NULL, &err);
// Set the kernel parameters
kernel.setArg(0, clInputBuffer); // kernel FV paraméterei sorrendben
kernel.setArg(1, clResultBuffer);
// Enqueue the kernel
queue.enqueueNDRangeKernel(kernel,
cl::NullRange, // Indexek nem eloffszetelve
cl::NDRange(dataSize, 1), // Minden elemet egy szál
cl::NullRange, // Workgroup méret? - ez az auto, ha nem indul, 1024-re, onnan csökkent, amig elindul
NULL, //
&event); // Õ jlezi hogy vége, lsd lent
// Create reference values
for (size_t index = 0; index < dataSize; ++index) {
}
event.wait();
// Copy result back to host
queue.enqueueReadBuffer(clResultBuffer, true, 0, sizeof(float) * 32, hostBuffer.data());
// Validate the result
for (size_t index = 0; index < 32; ++index)
{
if (hostBuffer[index] != index*index)
{
std::cout << "Wrong result at [" << index << "]: " << hostBuffer[index] << "!=" << index*index << std::endl;
break;
}
}
for (size_t index = 0; index < 32; ++index) {
std::cout << index << ": " << hostBuffer[index] << std::endl;
}
std::cout << "Finished" << std::endl;
}
int main()
{
//capi();
cppapi();
return 0;
}

86
Primitives/Primitives.vcxproj

@ -0,0 +1,86 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{54072875-46AD-4C09-AE1A-2E7E70B30414}</ProjectGuid>
<RootNamespace>Primitives</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v142</PlatformToolset>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
<PlatformToolset>v142</PlatformToolset>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<OutDir>$(SolutionDir)bin\</OutDir>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>$(CUDA_PATH)\include</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>$(CUDA_PATH)\lib\Win32\</AdditionalLibraryDirectories>
<AdditionalDependencies>OpenCL.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Console</SubSystem>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<AdditionalIncludeDirectories>$(CUDA_PATH)\include</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>$(CUDA_PATH)\lib\Win32\</AdditionalLibraryDirectories>
<AdditionalDependencies>OpenCL.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="cl.hpp" />
<ClInclude Include="Common.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="Primitives.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="..\kernels\programs.cl" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

38
Primitives/Primitives.vcxproj.filters

@ -0,0 +1,38 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
<Filter Include="Kernels">
<UniqueIdentifier>{fd13ccc5-a98b-4e30-9eba-12f62c7dd566}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="cl.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Common.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Primitives.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\kernels\programs.cl">
<Filter>Kernels</Filter>
</None>
</ItemGroup>
</Project>

12936
Primitives/cl.hpp

File diff suppressed because it is too large

109
kernels/programs.cl

@ -0,0 +1,109 @@
// map operator with f(x) = x*x
__kernel void square(__global float* inputData,
__global float* outputData)
{
int id = get_global_id(0); //get_local_id a workgroup-on belul
outputData[id] = inputData[id] * inputData[id];
}
// TODO
//
// histogram[data[id]] := histogram[data[id]] + 1
//
// SYNCHRONIZATION!
__kernel
void histogram_global(__global int* data, __global int* histogram)
{
int id = get_global_id(0);
atomic_add(&histogram[data[id]], 1.0f);
}
// TODO
//
// ID := get_global_id(0)
// LID := get_local_id(0)
//
// IF LID < histogramSize DO:
// lhistogram[LID] := 0
// BARRIER
//
// Add data to local histogram
//
// BARRIER
//
// IF LID < histogramSize DO:
// histogram[LID] = lhistogram[LID]
__kernel
void histogram_local(__global int* data, __global int* histogram, __local int* lhistogram, const int histogramSize)
{
}
// TODO
//
// ID := get_global_id(0)
//
// FOR s = get_global_size(0) / 2 ; s > 0 ; s >>= 1 DO:
// IF (ID < s)
// data[ID] = max(data[ID], data[ID + s])
// BARRIER
//
__kernel
void reduce_global(__global float* data)
{
}
// TODO
//
// ID := get_global_id(0)
// IF ID > 0 THEN data[ID] = data[ID - 1]
// ELSE data[ID] = 0
// BARRIER
//
// FOR s = 1; s < get_global_size(0); s *= 2 DO:
// tmp := data[ID]
// IF ( ID + s < get_global_size(0) THEN
// data[ID + s] += tmp;
// BARRIER
//
// IF(ID = 0) THEN data[ID] = 0;
__kernel
void exscan_global(__global int* data)
{
}
// TODO
// ID := get_global_id(0)
// IF data[id] < 50 THEN
// predicate = 1
// ELSE
// predicate = 0
__kernel
void compact_predicate(__global int* data, __global int* pred)
{
}
// TODO
//
// exclusive scan pred to prefSum
__kernel
void compact_exscan(__global int* pred, __global int* prefSum)
{
}
// TODO
//
// ID := get_global_id(0)
// VALUE := data[ID]
// BARRIER
// IF pred[ID] == 1 THEN
// data[prefSum[ID]] = VALUE
__kernel
void compact_compact(__global int* data, __global int* pred, __global int* prefSum)
{
}
Loading…
Cancel
Save