gpgpu labor viii

23
GPGPU labor VIII. OpenCL bevezetés

Upload: gasha

Post on 20-Jan-2016

42 views

Category:

Documents


0 download

DESCRIPTION

GPGPU labor VIII. OpenCL beve zetés. Kezdeti teendők. Tantárgy honlapja, OpenCL bevezet és II. A labor kiindulási alapjának letöltése (lab8_base.zip), kitömörítés a D:\GPGPU\ könyvtárba D:\GPGPU\labs\lab8\lab8_opencl\lab8_opencl.sln indítása - PowerPoint PPT Presentation

TRANSCRIPT

Page 1: GPGPU labor  VIII

GPGPU labor VIII.

OpenCL bevezetés

Page 2: GPGPU labor  VIII

Kezdeti teendők

• Tantárgy honlapja, OpenCL bevezetés II.• A labor kiindulási alapjának letöltése

(lab8_base.zip), kitömörítés a D:\GPGPU\ könyvtárba

• D:\GPGPU\labs\lab8\lab8_opencl\lab8_opencl.sln indítása

• Project tulajdonságai – Configuration Properties – Debugging – Working Directory = $(ProjectDir)\..\..\bin

Page 3: GPGPU labor  VIII

Platform// OpenCL platformcl_platform_id platform;

char* getPlatformInfo(cl_platform_id platform, cl_platform_info paramName){ size_t infoSize = 0; CL_SAFE_CALL( clGetPlatformInfo(platform, paramName, 0, NULL, &infoSize) ); char* info = (char*)malloc(infoSize); CL_SAFE_CALL( clGetPlatformInfo(platform, paramName, infoSize, info,

NULL) ); return info;}

cl_platform_id createPlatform(){ cl_platform_id platform; CL_SAFE_CALL( clGetPlatformIDs(1, &platform, NULL)); std::cout << getPlatformInfo(platform, CL_PLATFORM_VERSION) << std::endl; return platform;}

Page 4: GPGPU labor  VIII

OpenCL eszközök// OpenCL devices of the platformcl_device_id device_id;

void* getDeviceInfo(cl_device_id device_id, cl_device_info paramName){ size_t infoSize = 0; CL_SAFE_CALL( clGetDeviceInfo(device_id, paramName, 0, NULL, &infoSize) ); char* info = (char*)malloc(infoSize); CL_SAFE_CALL( clGetDeviceInfo(device_id, paramName, infoSize, info, NULL) ); return info;}

cl_device_id createDevice(cl_platform_id platform, cl_device_type type){ cl_device_id device_id; CL_SAFE_CALL( clGetDeviceIDs(platform, type, 1, &device_id, NULL) );

cl_uint* max_compute_units = (cl_uint*)getDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS);

std::cout << "Max compute units: " << *max_compute_units << std::endl;

return device_id;}

Page 5: GPGPU labor  VIII

Kontextus// OpenCL contextcl_context context;cl_context createContext(cl_device_id device_id){ cl_context context = 0; context = clCreateContext(0, 1, &device_id, NULL,

NULL, NULL); if(!context){ std::cerr << "Context creation failed!\n"; exit(EXIT_FAILURE); }

return context;}

Page 6: GPGPU labor  VIII

Parancs sor// OpenCL command queuecl_command_queue commands;cl_command_queue createCommandQueue(cl_context

context, cl_device_id device){ cl_command_queue command_queue = 0; command_queue = clCreateCommandQueue(context,

device_id, 0, NULL); if(!command_queue){ std::cerr << "Command queue creation failed!\

n"; }

return command_queue;}

Page 7: GPGPU labor  VIII

OpenCL program// OpenCL programcl_program program;

bool fileToString(const char* path, char*& out, int& len) { std::ifstream file(path, std::ios::ate | std::ios::binary); if(!file.is_open()) { return false; } len = file.tellg(); out = new char[ len+1 ]; file.seekg (0, std::ios::beg); file.read(out, len); file.close(); out[len] = 0; return true;}

Page 8: GPGPU labor  VIII

OpenCL programcl_program createProgram(cl_context context, cl_device_id device_id, const char* fileName){ char* programSource = NULL; int len = 0; int errorFlag = -1; if(!fileToString(fileName, programSource, len)){ std::cerr << "Error loading program: " << fileName << std::endl; exit(EXIT_FAILURE); } cl_program program = 0; program = clCreateProgramWithSource(context, 1, (const char**)&programSource, NULL, NULL); if (!program) { std::cerr << "Error: Failed to create compute program!" << std::endl; exit(EXIT_FAILURE); }

cl_int err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_t len; char buffer[2048];

std::cerr << "Error: Failed to build program executable!" << std::endl; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,

sizeof(buffer), buffer, &len); std::cerr << buffer << std::endl; exit(1); } return program;}

Page 9: GPGPU labor  VIII

OpenCL kernel// OpenCL kernelcl_kernel createKernel(cl_program program, const

char* kernelName){ cl_kernel kernel; cl_int err; kernel = clCreateKernel(program, kernelName, &err); if (!kernel || err != CL_SUCCESS) { std::cerr << "Error: Failed to create compute

kernel!" << std::endl; exit(1); } return kernel;}

Page 10: GPGPU labor  VIII

main() // OpenCL init platform = createPlatform(); device_id = createDevice(platform, CL_DEVICE_TYPE_GPU); context = createContext(device_id); commands = createCommandQueue(context, device_id); program = createProgram(context, device_id, "programs.cl");

// OpenCL processing

// OpenCL cleanup clReleaseProgram(program); clReleaseCommandQueue(commands); clReleaseContext(context); return 0;

Page 11: GPGPU labor  VIII

Globális címzés// simple global addressvoid globalAddress(){ cl_kernel globalAddressKernel = createKernel(program, "globalAddress");

const int data_size = 1024; float* data = (float*)malloc(sizeof(float)*data_size); cl_mem clData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * data_size, NULL, NULL); CL_SAFE_CALL( clSetKernelArg(globalAddressKernel, 0, sizeof(cl_mem), &clData) );

size_t workgroupSize = 0; CL_SAFE_CALL( clGetKernelWorkGroupInfo(globalAddressKernel, device_id,

CL_KERNEL_WORK_GROUP_SIZE, sizeof(workgroupSize), &workgroupSize, NULL) );

size_t workSize = data_size; CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, globalAddressKernel,

1, NULL, &workSize, &workgroupSize, 0, NULL, NULL) );

clFinish(commands);

CL_SAFE_CALL( clEnqueueReadBuffer(commands, clData, CL_TRUE, 0, sizeof(float) * data_size, data, 0, NULL, NULL) );

FILE* outFile = fopen("globalAddress.txt", "w"); for(int i = 0; i < data_size; ++i){ fprintf(outFile, "%f\n", data[i]); } fclose(outFile);

clReleaseKernel(globalAddressKernel); free(data);}

Page 12: GPGPU labor  VIII

Globális címzés (programs.cl)

__kernel void globalAddress(__global float* data){

int id = get_global_id(0); data[id] = id;}

Page 13: GPGPU labor  VIII

Globális címzés

Page 14: GPGPU labor  VIII

Lokális címzés// local addressvoid localAddress(){ cl_kernel localAddressKernel = createKernel(program, "localAddress");

const int data_size = 1024; float* data = (float*)malloc(sizeof(float)*data_size); cl_mem clData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * data_size, NULL, NULL); CL_SAFE_CALL( clSetKernelArg(localAddressKernel, 0, sizeof(cl_mem), &clData) );

size_t workgroupSize = 0;

CL_SAFE_CALL( clGetKernelWorkGroupInfo(localAddressKernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(workgroupSize), &workgroupSize, NULL) );

workgroupSize = workgroupSize / 4;

size_t workSize = data_size; CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, localAddressKernel,

1, NULL, &workSize, &workgroupSize, 0, NULL, NULL) );

clFinish(commands);

CL_SAFE_CALL( clEnqueueReadBuffer(commands, clData, CL_TRUE, 0, sizeof(float) * data_size, data, 0, NULL, NULL) );

FILE* outFile = fopen("localAddress.txt", "w"); for(int i = 0; i < data_size; ++i){ fprintf(outFile, "%f\n", data[i]); } fclose(outFile);

clReleaseKernel(localAddressKernel); free(data);}

Page 15: GPGPU labor  VIII

Lokális címzés (programs.cl)

__kernel void localAddress(__global float* data){ int id = get_local_id(0);

data[get_local_id(0) + get_group_id(0) * get_local_size(0)] = id;}

Page 16: GPGPU labor  VIII

Lokális címzés

Page 17: GPGPU labor  VIII

2D címzés// 2D addressvoid address2D(){ cl_kernel address2DKernel = createKernel(program, "address2D");

const int data_size[2] = {1024, 1024}; cl_float4* data = (cl_float4*)malloc(sizeof(cl_float4)*data_size[0] * data_size[1]); cl_mem clData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float4) * data_size[0] *

data_size[1], NULL, NULL); CL_SAFE_CALL( clSetKernelArg(address2DKernel, 0, sizeof(cl_mem), &clData) );

size_t workgroupSize[2] = {8, 8}; size_t workSize[2] = { data_size[0], data_size[1] }; CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, address2DKernel,

2, NULL, workSize, workgroupSize, 0, NULL, NULL) );

clFinish(commands);

CL_SAFE_CALL( clEnqueueReadBuffer(commands, clData, CL_TRUE, 0, sizeof(cl_float4) * data_size[0] * data_size[1], data, 0, NULL, NULL) );

FILE* outFile = fopen("2DAddress.txt", "w"); for(int i = 0; i < data_size[0] * data_size[1]; ++i){ fprintf(outFile, "G: [%f, %f] L: [%f, %f]\n", data[i].s[0], data[i].s[1], data[i].s[2],

data[i].s[3]); } fclose(outFile);

clReleaseKernel(address2DKernel); free(data);}

Page 18: GPGPU labor  VIII

2D címzés (programs.cl)

__kernel void address2D(__global float4* data){ int localIDX = get_local_id(0); int localIDY = get_local_id(1); int globalIDX = get_global_id(0); int globalIDY = get_global_id(1);

data[globalIDX + get_global_size(0) * globalIDY] = (float4)(globalIDX, globalIDY, localIDX, localIDY);

}

Page 19: GPGPU labor  VIII

Adatfeldolgozás// squarevoid square(){ cl_kernel squareKernel = createKernel(program, "square");

const int data_size = 1024;

float* inputData = (float*)malloc(sizeof(float) * data_size); for(int i = 0; i < data_size; ++i){ inputData[i] = i; } cl_mem clInputData = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * data_size, NULL, NULL); CL_SAFE_CALL( clEnqueueWriteBuffer(commands, clInputData,

CL_TRUE, 0, sizeof(float) * data_size, inputData, 0, NULL, NULL) );

float* data = (float*)malloc(sizeof(float)*data_size); cl_mem clData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * data_size, NULL, NULL);

CL_SAFE_CALL( clSetKernelArg(squareKernel, 0, sizeof(cl_mem), &clInputData) ); CL_SAFE_CALL( clSetKernelArg(squareKernel, 1, sizeof(cl_mem), &clData) ); CL_SAFE_CALL( clSetKernelArg(squareKernel, 2, sizeof(int), &data_size) );

size_t workgroupSize = 0; CL_SAFE_CALL( clGetKernelWorkGroupInfo(squareKernel, device_id,

CL_KERNEL_WORK_GROUP_SIZE, sizeof(workgroupSize), &workgroupSize, NULL) );

size_t workSize = data_size; CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, squareKernel,

1, NULL, &workSize, &workgroupSize, 0, NULL, NULL) );

clFinish(commands);

CL_SAFE_CALL( clEnqueueReadBuffer(commands, clData, CL_TRUE, 0, sizeof(float) * data_size, data, 0, NULL, NULL) );

int wrong = 0; for(int i = 0; i < data_size; ++i){ if(data[i] != inputData[i] * inputData[i]){ wrong++; } } std::cout << "Wrong squares: " << wrong << std::endl;

clReleaseKernel(squareKernel); free(data); free(inputData);}

Page 20: GPGPU labor  VIII

Adatfeldolgozás (programs.cl)

__kernelvoid square(__global float* inputData,

__global float* outputData, const int data_size){

int id = get_global_id(0);if(id < data_size){

outputData[id] = inputData[id] * inputData[id]; }}

Page 21: GPGPU labor  VIII

2D függvény kiértékelés// 2D functionvoid function2D(){ cl_kernel function2DKernel = createKernel(program, "function2D");

const int data_size[2] = {1024, 1024}; cl_float4* data = (cl_float4*)malloc(sizeof(cl_float4) * data_size[0] * data_size[1]); cl_mem clData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float4) * data_size[0] *

data_size[1], NULL, NULL); CL_SAFE_CALL( clSetKernelArg(function2DKernel, 0, sizeof(cl_mem), &clData) );

size_t workSize[2] = { data_size[0], data_size[1] }; CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, function2DKernel,

2, NULL, workSize, NULL, 0, NULL, NULL) );

clFinish(commands);

CL_SAFE_CALL( clEnqueueReadBuffer(commands, clData, CL_TRUE, 0, sizeof(cl_float4) * data_size[0] * data_size[1],

data, 0, NULL, NULL) );

FILE* outFile = fopen("function2D.txt", "w"); for(int i = 0; i < data_size[0] * data_size[1]; ++i){ fprintf(outFile, "%f %f %f\n", data[i].x, data[i].y, data[i].z); } fclose(outFile);

clReleaseKernel(function2DKernel); free(data);}

Page 22: GPGPU labor  VIII

2D függvény kiértékelés (programs.cl)

__kernel void function2D(__global float4* data){ int2 id = (int2)(get_global_id(0), get_global_id(1));

int2 globalSize = (int2)(get_global_size(0), get_global_size(1));

float2 point = (float2)(id.x / (float)globalSize.x * 6.0, id.y / (float)globalSize.y * 6.0f);

data[id.x + id.y * globalSize.x] = (float4)(id.x, id.y, sin(point.x) * cos(point.y), 0);

}

Page 23: GPGPU labor  VIII

2D függvény kiértékelés

• GNUPlot– splot ‘function2D.txt’ every 1000 using 1:2:3 with dots