8 changed files with 13638 additions and 0 deletions
@ -0,0 +1,149 @@ |
|||
#pragma once |
|||
|
|||
#include <cstdio> |
|||
#include <cstdlib> |
|||
#include <iostream> |
|||
#include <fstream> |
|||
#include <sstream> |
|||
|
|||
#include "cl.hpp" |
|||
|
|||
#pragma warning( disable : 4996 ) |
|||
|
|||
void WriteTGA_RGB(const char* filename, unsigned char* data, unsigned int width, unsigned int height) |
|||
{ |
|||
FILE *f = fopen(filename, "wb"); |
|||
if (!f) { |
|||
fprintf(stderr, "Unable to create output TGA image `%s'\n", filename); |
|||
exit(EXIT_FAILURE); |
|||
} |
|||
|
|||
fputc(0x00, f); /* ID Length, 0 => No ID */ |
|||
fputc(0x00, f); /* Color Map Type, 0 => No color map included */ |
|||
fputc(0x02, f); /* Image Type, 2 => Uncompressed, True-color Image */ |
|||
fputc(0x00, f); /* Next five bytes are about the color map entries */ |
|||
fputc(0x00, f); /* 2 bytes Index, 2 bytes length, 1 byte size */ |
|||
fputc(0x00, f); |
|||
fputc(0x00, f); |
|||
fputc(0x00, f); |
|||
fputc(0x00, f); /* X-origin of Image */ |
|||
fputc(0x00, f); |
|||
fputc(0x00, f); /* Y-origin of Image */ |
|||
fputc(0x00, f); |
|||
fputc(width & 0xff, f); /* Image Width */ |
|||
fputc((width >> 8) & 0xff, f); |
|||
fputc(height & 0xff, f); /* Image Height */ |
|||
fputc((height >> 8) & 0xff, f); |
|||
fputc(0x18, f); /* Pixel Depth, 0x18 => 24 Bits */ |
|||
fputc(0x20, f); /* Image Descriptor */ |
|||
|
|||
for (int y = height - 1; y >= 0; y--) { |
|||
for (size_t x = 0; x < width; x++) { |
|||
const size_t i = (y * width + x) * 3; |
|||
fputc(data[i + 2], f); /* write blue */ |
|||
fputc(data[i + 1], f); /* write green */ |
|||
fputc(data[i], f); /* write red */ |
|||
} |
|||
} |
|||
} |
|||
|
|||
std::string FileToString(const std::string& path) { |
|||
std::ifstream file(path, std::ios::in | std::ios::binary); |
|||
if (file) |
|||
{ |
|||
std::ostringstream contents; |
|||
contents << file.rdbuf(); |
|||
file.close(); |
|||
return(contents.str()); |
|||
} |
|||
return std::string(); |
|||
|
|||
} |
|||
|
|||
const char *getErrorString(cl_int error) |
|||
{ |
|||
switch (error) { |
|||
// run-time and JIT compiler errors
|
|||
case 0: return "CL_SUCCESS"; |
|||
case -1: return "CL_DEVICE_NOT_FOUND"; |
|||
case -2: return "CL_DEVICE_NOT_AVAILABLE"; |
|||
case -3: return "CL_COMPILER_NOT_AVAILABLE"; |
|||
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; |
|||
case -5: return "CL_OUT_OF_RESOURCES"; |
|||
case -6: return "CL_OUT_OF_HOST_MEMORY"; |
|||
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE"; |
|||
case -8: return "CL_MEM_COPY_OVERLAP"; |
|||
case -9: return "CL_IMAGE_FORMAT_MISMATCH"; |
|||
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; |
|||
case -11: return "CL_BUILD_PROGRAM_FAILURE"; |
|||
case -12: return "CL_MAP_FAILURE"; |
|||
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; |
|||
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; |
|||
case -15: return "CL_COMPILE_PROGRAM_FAILURE"; |
|||
case -16: return "CL_LINKER_NOT_AVAILABLE"; |
|||
case -17: return "CL_LINK_PROGRAM_FAILURE"; |
|||
case -18: return "CL_DEVICE_PARTITION_FAILED"; |
|||
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; |
|||
|
|||
// compile-time errors
|
|||
case -30: return "CL_INVALID_VALUE"; |
|||
case -31: return "CL_INVALID_DEVICE_TYPE"; |
|||
case -32: return "CL_INVALID_PLATFORM"; |
|||
case -33: return "CL_INVALID_DEVICE"; |
|||
case -34: return "CL_INVALID_CONTEXT"; |
|||
case -35: return "CL_INVALID_QUEUE_PROPERTIES"; |
|||
case -36: return "CL_INVALID_COMMAND_QUEUE"; |
|||
case -37: return "CL_INVALID_HOST_PTR"; |
|||
case -38: return "CL_INVALID_MEM_OBJECT"; |
|||
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; |
|||
case -40: return "CL_INVALID_IMAGE_SIZE"; |
|||
case -41: return "CL_INVALID_SAMPLER"; |
|||
case -42: return "CL_INVALID_BINARY"; |
|||
case -43: return "CL_INVALID_BUILD_OPTIONS"; |
|||
case -44: return "CL_INVALID_PROGRAM"; |
|||
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE"; |
|||
case -46: return "CL_INVALID_KERNEL_NAME"; |
|||
case -47: return "CL_INVALID_KERNEL_DEFINITION"; |
|||
case -48: return "CL_INVALID_KERNEL"; |
|||
case -49: return "CL_INVALID_ARG_INDEX"; |
|||
case -50: return "CL_INVALID_ARG_VALUE"; |
|||
case -51: return "CL_INVALID_ARG_SIZE"; |
|||
case -52: return "CL_INVALID_KERNEL_ARGS"; |
|||
case -53: return "CL_INVALID_WORK_DIMENSION"; |
|||
case -54: return "CL_INVALID_WORK_GROUP_SIZE"; |
|||
case -55: return "CL_INVALID_WORK_ITEM_SIZE"; |
|||
case -56: return "CL_INVALID_GLOBAL_OFFSET"; |
|||
case -57: return "CL_INVALID_EVENT_WAIT_LIST"; |
|||
case -58: return "CL_INVALID_EVENT"; |
|||
case -59: return "CL_INVALID_OPERATION"; |
|||
case -60: return "CL_INVALID_GL_OBJECT"; |
|||
case -61: return "CL_INVALID_BUFFER_SIZE"; |
|||
case -62: return "CL_INVALID_MIP_LEVEL"; |
|||
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE"; |
|||
case -64: return "CL_INVALID_PROPERTY"; |
|||
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR"; |
|||
case -66: return "CL_INVALID_COMPILER_OPTIONS"; |
|||
case -67: return "CL_INVALID_LINKER_OPTIONS"; |
|||
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT"; |
|||
|
|||
// extension errors
|
|||
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; |
|||
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR"; |
|||
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR"; |
|||
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR"; |
|||
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR"; |
|||
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR"; |
|||
default: return "Unknown OpenCL error"; |
|||
} |
|||
} |
|||
|
|||
bool CheckCLError(cl_int err) |
|||
{ |
|||
if(err != CL_SUCCESS) |
|||
{ |
|||
std::cout << "OpenCL error: " << getErrorString(err) << std::endl; |
|||
return false; |
|||
} |
|||
|
|||
return true; |
|||
} |
@ -0,0 +1,200 @@ |
|||
// MonteCarlo.cpp : Defines the entry point for the console application.
|
|||
|
|||
#include <string> |
|||
#include <vector> |
|||
#include "Common.h" |
|||
|
|||
// OpenCL C API
|
|||
#include <CL/opencl.h> |
|||
|
|||
// OpenCL C++ API
|
|||
#include "cl.hpp" |
|||
|
|||
const bool writeOutRandoms = true; |
|||
const size_t randomNumbers = 1024; |
|||
const size_t threadCount = 512; |
|||
|
|||
void capi() |
|||
{ |
|||
// Get a platform ID
|
|||
cl_platform_id platformID; |
|||
clGetPlatformIDs(1, &platformID, NULL); |
|||
|
|||
// Get a device ID
|
|||
cl_device_id deviceID; |
|||
clGetDeviceIDs(platformID, CL_DEVICE_TYPE_GPU, 1, &deviceID, NULL); |
|||
|
|||
// Create a context
|
|||
cl_context context; |
|||
cl_context_properties contextProperties[] = |
|||
{ CL_CONTEXT_PLATFORM, (cl_context_properties)platformID, 0 }; |
|||
context = clCreateContext(contextProperties, 1, &deviceID, NULL, NULL, NULL); |
|||
|
|||
// Create a command queue
|
|||
cl_command_queue queue; |
|||
queue = clCreateCommandQueue(context, deviceID, CL_QUEUE_PROFILING_ENABLE, NULL); |
|||
|
|||
// Create an OpenCL program
|
|||
std::string source = FileToString("../kernels/programs.cl"); |
|||
const char* csource = source.c_str(); |
|||
cl_program program = clCreateProgramWithSource(context, 1, &csource, NULL, NULL); |
|||
cl_int err = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL); |
|||
if (err != CL_SUCCESS) |
|||
{ |
|||
cl_uint logLength; |
|||
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength); |
|||
char* log = new char[logLength]; |
|||
clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, logLength, log, 0); |
|||
std::cout << log << std::endl; |
|||
delete[] log; |
|||
exit(-1); |
|||
} |
|||
|
|||
// Get the kernel handle
|
|||
cl_kernel kernel = clCreateKernel(program, "randomLCG", &err); |
|||
if(!CheckCLError(err)) exit(-1); |
|||
|
|||
// Allocate memory for random numbers and random seeds
|
|||
// Every thread receives a private seed, stored in seedBuffer/clSeedBuffer
|
|||
// Every thread is supposed to generate randomNumbers random numbers
|
|||
std::vector<float> randomBuffer(threadCount * randomNumbers); |
|||
std::vector<float> seedBuffer(threadCount); |
|||
for (int i = 0; i < threadCount; ++i) |
|||
{ |
|||
seedBuffer[i] = rand(); |
|||
} |
|||
|
|||
cl_mem randomBufferDev; |
|||
randomBufferDev = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)* threadCount * randomNumbers, NULL, &err); |
|||
if (!CheckCLError(err)) exit(-1); |
|||
cl_mem seedBufferDev; |
|||
seedBufferDev = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * threadCount, NULL, &err); |
|||
if (!CheckCLError(err)) exit(-1); |
|||
|
|||
clEnqueueWriteBuffer(queue, seedBufferDev, CL_TRUE, 0, sizeof(float) * threadCount, seedBuffer.data(), 0, NULL, NULL); |
|||
|
|||
// Set the kernel paramateres
|
|||
clSetKernelArg(kernel, 0, sizeof(int), &randomNumbers); |
|||
clSetKernelArg(kernel, 1, sizeof(cl_mem), &seedBufferDev); |
|||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &randomBufferDev); |
|||
// Enqueue the kernel: threadCount threads in total, each generating randomNumbers random numbers in [0,1]
|
|||
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &threadCount, NULL, 0, NULL, NULL); |
|||
// Copy the result back to the host
|
|||
clEnqueueReadBuffer(queue, randomBufferDev, CL_TRUE, 0, sizeof(float) * threadCount * randomNumbers, randomBuffer.data(), 0, NULL, NULL); |
|||
|
|||
// Write out the output
|
|||
if(writeOutRandoms == true) |
|||
{ |
|||
std::ofstream ofs("randoms.txt", std::ofstream::out); |
|||
for (int i = 0; i < threadCount; ++i) { |
|||
for (int j = 0; j < randomNumbers; ++j) { |
|||
ofs << j << " " << randomBuffer[i + j * threadCount] << std::endl; |
|||
} |
|||
ofs << std::endl; |
|||
} |
|||
ofs.close(); |
|||
} |
|||
|
|||
std::cout << "Finished" << std::endl; |
|||
} |
|||
|
|||
void cppapi() |
|||
{ |
|||
cl_int err = CL_SUCCESS; |
|||
|
|||
// Get a platform ID
|
|||
std::vector<cl::Platform> platforms; |
|||
cl::Platform::get(&platforms); |
|||
if (platforms.size() == 0) |
|||
{ |
|||
std::cout << "Unable to find suitable platform." << std::endl; |
|||
exit(-1); |
|||
} |
|||
|
|||
// Create a context
|
|||
cl_context_properties properties[] = |
|||
{ CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0 }; |
|||
cl::Context context(CL_DEVICE_TYPE_GPU, properties); |
|||
|
|||
// Enumerate the devices
|
|||
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); |
|||
|
|||
// Create the command queue
|
|||
cl::Event event; |
|||
cl::CommandQueue queue(context, devices[0], 0, &err); |
|||
|
|||
// Create the OpenCL program
|
|||
std::string programSource = FileToString("../kernels/programs.cl"); |
|||
cl::Program program = cl::Program(context, programSource); |
|||
err = program.build(devices); |
|||
if (!CheckCLError(err)) |
|||
{ |
|||
for (size_t devID = 0; devID < devices.size(); ++devID) |
|||
{ |
|||
std::cout << "Device: " << devID << std::endl; |
|||
std::cout << "Build Status: " << program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(devices[devID]) << std::endl; |
|||
std::cout << "Build Options:\t" << program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(devices[devID]) << std::endl; |
|||
std::cout << "Build Log:\t " << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[devID]) << std::endl; |
|||
std::cout << "--------------------------------------------------" << std::endl; |
|||
} |
|||
} |
|||
|
|||
|
|||
// Get the kernel handle
|
|||
cl::Kernel kernel(program, "randomLCG", &err); |
|||
CheckCLError(err); |
|||
|
|||
// Allocate memory for random numbers and random seeds
|
|||
// Every thread receives a private seed, stored in seedBuffer/clSeedBuffer
|
|||
// Every thread is supposed to generate randomNumbers random numbers
|
|||
std::vector<float> randomBuffer(threadCount * randomNumbers); |
|||
std::vector<float> seedBuffer(threadCount); |
|||
for (int i = 0; i < threadCount; ++i) |
|||
{ |
|||
seedBuffer[i] = rand(); |
|||
} |
|||
|
|||
cl::Buffer clSeedBuffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(float) * threadCount, NULL, &err); |
|||
queue.enqueueWriteBuffer(clSeedBuffer, true, 0, sizeof(float) * threadCount, seedBuffer.data()); |
|||
|
|||
cl::Buffer clRandomBuffer = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * threadCount * randomNumbers, NULL, &err); |
|||
|
|||
// Set the kernel parameters
|
|||
kernel.setArg(0, randomNumbers); |
|||
kernel.setArg(1, clSeedBuffer); |
|||
kernel.setArg(2, clRandomBuffer); |
|||
|
|||
// Enqueue the kernel: threadCount threads in total, each generating random numbers in [0,1] randomNumbers times
|
|||
queue.enqueueNDRangeKernel(kernel, |
|||
cl::NullRange, |
|||
cl::NDRange(threadCount, 1), |
|||
cl::NullRange, |
|||
NULL, |
|||
&event); |
|||
event.wait(); |
|||
|
|||
// Copy result back to host
|
|||
queue.enqueueReadBuffer(clRandomBuffer, true, 0, sizeof(float) * threadCount * randomNumbers, randomBuffer.data()); |
|||
|
|||
// Write out the output to file as "index value" rows
|
|||
if(writeOutRandoms == true) |
|||
{ |
|||
std::ofstream ofs("randoms2.txt", std::ofstream::out); |
|||
for (int i = 0; i < threadCount; ++i) { |
|||
for (int j = 0; j < randomNumbers; ++j) { |
|||
ofs << j << " " << randomBuffer[i + j * threadCount] << std::endl; |
|||
} |
|||
} |
|||
ofs.close(); |
|||
} |
|||
|
|||
std::cout << "Finished" << std::endl; |
|||
} |
|||
|
|||
int main() |
|||
{ |
|||
capi(); |
|||
cppapi(); |
|||
return 0; |
|||
} |
|||
|
@ -0,0 +1,87 @@ |
|||
<?xml version="1.0" encoding="utf-8"?> |
|||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
|||
<ItemGroup Label="ProjectConfigurations"> |
|||
<ProjectConfiguration Include="Debug|Win32"> |
|||
<Configuration>Debug</Configuration> |
|||
<Platform>Win32</Platform> |
|||
</ProjectConfiguration> |
|||
<ProjectConfiguration Include="Release|Win32"> |
|||
<Configuration>Release</Configuration> |
|||
<Platform>Win32</Platform> |
|||
</ProjectConfiguration> |
|||
</ItemGroup> |
|||
<PropertyGroup Label="Globals"> |
|||
<ProjectGuid>{4B53AC9F-C842-4E17-93EB-1362CFD08104}</ProjectGuid> |
|||
<RootNamespace>MonteCarlo</RootNamespace> |
|||
<WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion> |
|||
</PropertyGroup> |
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> |
|||
<ConfigurationType>Application</ConfigurationType> |
|||
<UseDebugLibraries>true</UseDebugLibraries> |
|||
<CharacterSet>MultiByte</CharacterSet> |
|||
<PlatformToolset>v143</PlatformToolset> |
|||
</PropertyGroup> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> |
|||
<ConfigurationType>Application</ConfigurationType> |
|||
<UseDebugLibraries>false</UseDebugLibraries> |
|||
<WholeProgramOptimization>true</WholeProgramOptimization> |
|||
<CharacterSet>MultiByte</CharacterSet> |
|||
<PlatformToolset>v100</PlatformToolset> |
|||
</PropertyGroup> |
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> |
|||
<ImportGroup Label="ExtensionSettings"> |
|||
</ImportGroup> |
|||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> |
|||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
|||
</ImportGroup> |
|||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> |
|||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> |
|||
</ImportGroup> |
|||
<PropertyGroup Label="UserMacros" /> |
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> |
|||
<OutDir>$(SolutionDir)bin\</OutDir> |
|||
</PropertyGroup> |
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> |
|||
<ClCompile> |
|||
<WarningLevel>Level3</WarningLevel> |
|||
<Optimization>Disabled</Optimization> |
|||
<AdditionalIncludeDirectories>$(CUDA_PATH)\include</AdditionalIncludeDirectories> |
|||
</ClCompile> |
|||
<Link> |
|||
<GenerateDebugInformation>true</GenerateDebugInformation> |
|||
<AdditionalLibraryDirectories>$(CUDA_PATH)\lib\Win32\</AdditionalLibraryDirectories> |
|||
<AdditionalDependencies>OpenCL.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies> |
|||
</Link> |
|||
</ItemDefinitionGroup> |
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> |
|||
<ClCompile> |
|||
<WarningLevel>Level3</WarningLevel> |
|||
<Optimization>MaxSpeed</Optimization> |
|||
<FunctionLevelLinking>true</FunctionLevelLinking> |
|||
<IntrinsicFunctions>true</IntrinsicFunctions> |
|||
<AdditionalIncludeDirectories>$(CUDA_PATH)\include</AdditionalIncludeDirectories> |
|||
</ClCompile> |
|||
<Link> |
|||
<GenerateDebugInformation>true</GenerateDebugInformation> |
|||
<EnableCOMDATFolding>true</EnableCOMDATFolding> |
|||
<OptimizeReferences>true</OptimizeReferences> |
|||
<AdditionalLibraryDirectories>$(CUDA_PATH)\lib\Win32\</AdditionalLibraryDirectories> |
|||
<AdditionalDependencies>OpenCL.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies> |
|||
</Link> |
|||
</ItemDefinitionGroup> |
|||
<ItemGroup> |
|||
<ClInclude Include="cl.hpp" /> |
|||
<ClInclude Include="Common.h" /> |
|||
</ItemGroup> |
|||
<ItemGroup> |
|||
<ClCompile Include="MonteCarlo.cpp" /> |
|||
</ItemGroup> |
|||
<ItemGroup> |
|||
<None Include="..\kernels\mersenneTwister.cl" /> |
|||
<None Include="..\kernels\montecarlo.cl" /> |
|||
</ItemGroup> |
|||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
|||
<ImportGroup Label="ExtensionTargets"> |
|||
</ImportGroup> |
|||
</Project> |
@ -0,0 +1,41 @@ |
|||
<?xml version="1.0" encoding="utf-8"?> |
|||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> |
|||
<ItemGroup> |
|||
<Filter Include="Source Files"> |
|||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier> |
|||
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions> |
|||
</Filter> |
|||
<Filter Include="Header Files"> |
|||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier> |
|||
<Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions> |
|||
</Filter> |
|||
<Filter Include="Resource Files"> |
|||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier> |
|||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions> |
|||
</Filter> |
|||
<Filter Include="Kernels"> |
|||
<UniqueIdentifier>{fd13ccc5-a98b-4e30-9eba-12f62c7dd566}</UniqueIdentifier> |
|||
</Filter> |
|||
</ItemGroup> |
|||
<ItemGroup> |
|||
<ClInclude Include="cl.hpp"> |
|||
<Filter>Header Files</Filter> |
|||
</ClInclude> |
|||
<ClInclude Include="Common.h"> |
|||
<Filter>Header Files</Filter> |
|||
</ClInclude> |
|||
</ItemGroup> |
|||
<ItemGroup> |
|||
<ClCompile Include="MonteCarlo.cpp"> |
|||
<Filter>Source Files</Filter> |
|||
</ClCompile> |
|||
</ItemGroup> |
|||
<ItemGroup> |
|||
<None Include="..\kernels\montecarlo.cl"> |
|||
<Filter>Kernels</Filter> |
|||
</None> |
|||
<None Include="..\kernels\mersenneTwister.cl"> |
|||
<Filter>Kernels</Filter> |
|||
</None> |
|||
</ItemGroup> |
|||
</Project> |
File diff suppressed because it is too large
@ -0,0 +1,75 @@ |
|||
/* -*- mode: c++ -*- */ |
|||
|
|||
/* |
|||
|
|||
Some parts are derived from NVIDIA's MersenneTwister SDK example. |
|||
|
|||
*/ |
|||
|
|||
typedef struct{ |
|||
unsigned int matrix_a; |
|||
unsigned int mask_b; |
|||
unsigned int mask_c; |
|||
unsigned int seed; |
|||
} mt_struct_stripped; |
|||
|
|||
#define MT_RNG_COUNT 4096 |
|||
#define MT_MM 9 |
|||
#define MT_NN 19 |
|||
#define MT_WMASK 0xFFFFFFFFU |
|||
#define MT_UMASK 0xFFFFFFFEU |
|||
#define MT_LMASK 0x1U |
|||
#define MT_SHIFT0 12 |
|||
#define MT_SHIFTB 7 |
|||
#define MT_SHIFTC 15 |
|||
#define MT_SHIFT1 18 |
|||
#define PI 3.14159265358979f |
|||
|
|||
__kernel void MersenneTwister(__global float* d_Rand, |
|||
__global mt_struct_stripped* d_MT, |
|||
int nPerRng) |
|||
{ |
|||
int globalID = get_global_id(0); |
|||
|
|||
int iState, iState1, iStateM, iOut; |
|||
unsigned int mti, mti1, mtiM, x; |
|||
unsigned int mt[MT_NN], matrix_a, mask_b, mask_c; |
|||
|
|||
//Load bit-vector Mersenne Twister parameters |
|||
matrix_a = d_MT[globalID].matrix_a; |
|||
mask_b = d_MT[globalID].mask_b; |
|||
mask_c = d_MT[globalID].mask_c; |
|||
|
|||
//Initialize current state |
|||
mt[0] = d_MT[globalID].seed; |
|||
for (iState = 1; iState < MT_NN; iState++) |
|||
mt[iState] = (1812433253U * (mt[iState - 1] ^ (mt[iState - 1] >> 30)) + iState) & MT_WMASK; |
|||
|
|||
iState = 0; |
|||
mti1 = mt[0]; |
|||
for (iOut = 0; iOut < nPerRng; iOut++) { |
|||
iState1 = iState + 1; |
|||
iStateM = iState + MT_MM; |
|||
if(iState1 >= MT_NN) iState1 -= MT_NN; |
|||
if(iStateM >= MT_NN) iStateM -= MT_NN; |
|||
mti = mti1; |
|||
mti1 = mt[iState1]; |
|||
mtiM = mt[iStateM]; |
|||
|
|||
// MT recurrence |
|||
x = (mti & MT_UMASK) | (mti1 & MT_LMASK); |
|||
x = mtiM ^ (x >> 1) ^ ((x & 1) ? matrix_a : 0); |
|||
|
|||
mt[iState] = x; |
|||
iState = iState1; |
|||
|
|||
//Tempering transformation |
|||
x ^= (x >> MT_SHIFT0); |
|||
x ^= (x << MT_SHIFTB) & mask_b; |
|||
x ^= (x << MT_SHIFTC) & mask_c; |
|||
x ^= (x >> MT_SHIFT1); |
|||
|
|||
//Convert to (0, 1] float and write to global memory |
|||
d_Rand[globalID + iOut * MT_RNG_COUNT] = ((float)x + 1.0f) / 4294967296.0f; |
|||
} |
|||
} |
Binary file not shown.
@ -0,0 +1,150 @@ |
|||
/* -*- mode: c++ -*- */ |
|||
|
|||
// Linear Congruential Generator |
|||
uint stepLCG(uint *z, uint A, uint C){ |
|||
return (*z) = (A * (*z) + C); |
|||
} |
|||
|
|||
__kernel |
|||
void randomLCG(const int randomNumbers, __global float* randomsSeed, __global float* randomGPU){ |
|||
int id = get_global_id(0); |
|||
int maxID = get_global_size(0); |
|||
|
|||
uint rng = randomsSeed[id]; |
|||
for(int i=0; i < randomNumbers; ++i){ |
|||
randomGPU[id + i * maxID] = (float)stepLCG(&rng, 1664525, 1013904223UL) / 0xffffffff; |
|||
} |
|||
} |
|||
|
|||
uint stepLFG(uint *z, __global uint *znmk, uint A, uint C){ |
|||
return (*znmk) = (*z) = (A * (*z) + C) + (*znmk); |
|||
} |
|||
|
|||
// Lagged Fibonacci Generator |
|||
__kernel |
|||
void randomLFG(const int randomNumbers, __global float* randomsSeed, const int randomStateSize, __global uint* randomState, __global float* randomGPU){ |
|||
int id = get_global_id(0); |
|||
int maxID = get_global_size(0); |
|||
|
|||
// bootstrap |
|||
uint rng = randomsSeed[id]; |
|||
for(int i=0; i < randomStateSize; ++i){ |
|||
randomState[id + i * maxID] = stepLCG(&rng, 1664525, 1013904223UL); |
|||
} |
|||
|
|||
// Lagged Fibonacci Generator |
|||
int nmkIndex = 0; |
|||
for(int i=0; i < randomNumbers; ++i){ |
|||
randomGPU[id + i * maxID] = (float)stepLFG(&rng, &randomState[nmkIndex], 1664525, 1013904223UL) / 0xffffffff; |
|||
nmkIndex = (nmkIndex + 1) % randomStateSize; |
|||
} |
|||
|
|||
} |
|||
|
|||
// Combined Tausworthe Generator |
|||
uint stepCTG(uint *z, uint S1, uint S2, uint S3, uint M){ |
|||
uint b=((((*z)<<S1)^(*z))>>S2); |
|||
return (*z) = ((((*z)&M)<<S3)^b); |
|||
} |
|||
|
|||
__kernel |
|||
void randomCTG(const int randomNumbers, __global float2* randomsSeed, __global float* randomGPU){ |
|||
int id = get_global_id(0); |
|||
int maxID = get_global_size(0); |
|||
|
|||
uint rng1 = randomsSeed[id].x; |
|||
uint rng2 = randomsSeed[id].y; |
|||
for(int i=0; i < randomNumbers; ++i){ |
|||
randomGPU[id + i * maxID] = (float)(stepCTG(&rng1, 13, 19, 12, 4294967294UL) ^ |
|||
stepCTG(&rng2, 2, 25, 4, 4294967288UL)) / 0xffffffff; |
|||
} |
|||
} |
|||
|
|||
// Hybrid RNG |
|||
float stepHybrid(uint* rng1, uint* rng2, uint* rng3, uint* rng4){ |
|||
return 2.3283064365387e-10 * ( |
|||
stepCTG(rng1, 13, 19, 12, 4294967294UL) ^ |
|||
stepCTG(rng2, 2, 25, 4, 4294967288UL) ^ |
|||
stepCTG(rng3, 3, 11, 17, 4294967280UL) ^ |
|||
stepLCG(rng4,1664525,1013904223UL) |
|||
); |
|||
} |
|||
|
|||
__kernel |
|||
void hybridRNG(const int randomNumbers, __global float* randomsSeed, __global float* randomGPU){ |
|||
int id = get_global_id(0); |
|||
int maxID = get_global_size(0); |
|||
|
|||
uint rng1 = randomsSeed[id * 4 + 0]; |
|||
uint rng2 = randomsSeed[id * 4 + 1]; |
|||
uint rng3 = randomsSeed[id * 4 + 2]; |
|||
uint rng4 = randomsSeed[id * 4 + 3]; |
|||
|
|||
for(int i = 0; i < randomNumbers; ++i){ |
|||
randomGPU[id + i * maxID] = (float)stepHybrid(&rng1, &rng2, &rng3, &rng4); |
|||
} |
|||
} |
|||
|
|||
// Halton sequence |
|||
float stepHalton(float *value, float inv_base){ |
|||
float r = 1.0 - (*value) - 0.0000000001; |
|||
if(inv_base < r) { |
|||
(*value) += inv_base; |
|||
} else { |
|||
float h = inv_base, hh; |
|||
do{ |
|||
hh = h; |
|||
h *= inv_base; |
|||
} while (h >= r); |
|||
(*value) += hh + h - 1.0; |
|||
} |
|||
return (*value); |
|||
} |
|||
|
|||
void seedHalton(ulong i, int base, float* inv_base, float* value){ |
|||
float f = (*inv_base) = 1.0/base; |
|||
(*value) = 0.0; |
|||
while( i > 0){ |
|||
(*value) += f * (float)(i % base); |
|||
i /= base; |
|||
f *= (*inv_base); |
|||
} |
|||
} |
|||
|
|||
__kernel |
|||
void haltonSequence(const int randomNumbers, const int base, __global float* randomGPU){ |
|||
int id = get_global_id(0); |
|||
int maxID = get_global_size(0); |
|||
|
|||
float inv_base = 0.0; |
|||
float rng = 0.0; |
|||
seedHalton(id * randomNumbers, base, &inv_base, &rng); |
|||
|
|||
for(int i=0; i < randomNumbers; ++i){ |
|||
randomGPU[id + i * maxID] = stepHalton(&rng, inv_base); |
|||
} |
|||
} |
|||
|
|||
// 1D uniformity test |
|||
// TODO |
|||
// Generate a quantized histogram |
|||
// randomNums = number of randoms per thread |
|||
// randoms = array of random numbers |
|||
// bucketNum = number of histogram buckets |
|||
// buckets = array of histogram buckets |
|||
__kernel |
|||
void testUniform1D(const int randomNums, __global float* randoms, const int bucketNum, __global int* buckets){ |
|||
|
|||
} |
|||
|
|||
// 1D Monte-Carlo integral |
|||
// TODO |
|||
// Implement a Monte Carlo integrator: sin(x) ; x := [0:PI/2] |
|||
// sampleNumber = number of samples per thread |
|||
// seed = float4 seed array for the random number generator |
|||
// integral = partial integral |
|||
#define M_PIP2 1.57796327f |
|||
__kernel |
|||
void mcInt1D(const int sampleNumber, __global float4* seed, __global float* integral){ |
|||
|
|||
} |
Loading…
Reference in new issue