Presentation is loading. Please wait.

Presentation is loading. Please wait.

GPGPU Labor 8.. CLWrapper OpenCL Framework primitívek – ClWrapper(cl_device_type _device_type); – cl_device_id device_id() – cl_context context() – cl_command_queue.

Similar presentations


Presentation on theme: "GPGPU Labor 8.. CLWrapper OpenCL Framework primitívek – ClWrapper(cl_device_type _device_type); – cl_device_id device_id() – cl_context context() – cl_command_queue."— Presentation transcript:

1 GPGPU Labor 8.

2 CLWrapper OpenCL Framework primitívek – ClWrapper(cl_device_type _device_type); – cl_device_id device_id() – cl_context context() – cl_command_queue cqueue() – char* getPlatformInfo(cl_platform_info paramName) – void* getDeviceInfo(cl_device_info paramName) – cl_program createProgram(const char* fileName) – cl_kernel createKernel(cl_program program, const char* kernelName) – void printOpenCLInfo()

3 Kernel futási idő void printTimeStats(cl_event event){ cl_int err = CL_SUCCESS; if(event == NULL) { std::cerr << "No event object returned!" << std::endl; } else { clWaitForEvents(1, &event); } cl_ulong execStart, execEnd; err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &execStart, NULL); err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &execEnd, NULL); std::cout << "[start] " << execStart << " [end] " << execEnd << " [time] " << (execEnd - execStart) / 1e+06 << "ms." << std::endl; }

4 Párhuzamos primitívek Map Reduce Scan Histogram Compact

5 Map // TODO // // ID := get_global_id(0) // data[ID] := square(data[ID]) __kernel void map(__global float* data) { }

6 Reduce // TODO // // ID := get_global_id(0) // // FOR s = get_global_size(0) / 2 ; s > 0 ; s >>= 1 DO: // IF (ID < s) // data[ID] = max(data[ID], data[ID + s]) // BARRIER // __kernel void reduce_global(__global float* data) { }

7 Scan (exclusive) // TODO // // ID := get_global_id(0) // IF ID > 0 THEN data[ID] = data[ID - 1] // ELSE data[ID] = 0 // BARRIER // // FOR s = 1; s < get_global_size(0); s *= 2 DO: // tmp := data[ID] // IF ( ID + s < get_global_size(0) THEN // data[ID + s] += tmp; // BARRIER // // IF(ID = 0) THEN data[ID] = 0; __kernel void exscan_global(__global int* data) { }

8 Histogram // TODO // // histogram[data[id]] := histogram[data[id]] + 1 // // SYNCHRONIZATION! __kernel void histogram_global(__global int* data, __global int* histogram) { }

9 Histogram (lokális) Lokális memória allokálása host oldalról clSetKernelArg(histogramLocalKernel, 0, sizeof(cl_mem), &gData); clSetKernelArg(histogramLocalKernel, 1, sizeof(cl_mem), &gHist); clSetKernelArg(histogramLocalKernel, 2, sizeof(int) * histogramSize, NULL); clSetKernelArg(histogramLocalKernel, 3, sizeof(int), &histogramSize);

10 Histogram // TODO // // ID := get_global_id(0) // LID := get_local_id(0) // // IF LID < histogramSize DO: // lhistogram[LID] := 0 // BARRIER // // Add data to local histogram // // BARRIER // // IF LID < histogramSize DO: // histogram[LID] = lhistogram[LID] __kernel void histogram_local(__global int* data, __global int* histogram, __local int* lhistogram, const int histogramSize) { }

11 Compact // TODO // ID := get_global_id(0) // IF data[id] < 50 THEN // predicate = 1 // ELSE // predicate = 0 __kernel void compact_predicate(__global int* data, __global int* pred) { }

12 Compact // TODO // // exclusive scan pred to prefSum __kernel void compact_exscan(__global int* pred, __global int* prefSum) { }

13 Compact // TODO // // ID := get_global_id(0) // VALUE := data[ID] // BARRIER // IF pred[ID] == 1 THEN // data[prefSum[ID]] = VALUE __kernel void compact_compact(__global int* data, __global int* pred, __global int* prefSum) { }


Download ppt "GPGPU Labor 8.. CLWrapper OpenCL Framework primitívek – ClWrapper(cl_device_type _device_type); – cl_device_id device_id() – cl_context context() – cl_command_queue."

Similar presentations


Ads by Google