Download presentation
Presentation is loading. Please wait.
Published byLea Quimby Modified over 10 years ago
1
GPGPU Labor 8.
2
CLWrapper OpenCL Framework primitívek – ClWrapper(cl_device_type _device_type); – cl_device_id device_id() – cl_context context() – cl_command_queue cqueue() – char* getPlatformInfo(cl_platform_info paramName) – void* getDeviceInfo(cl_device_info paramName) – cl_program createProgram(const char* fileName) – cl_kernel createKernel(cl_program program, const char* kernelName) – void printOpenCLInfo()
3
Kernel futási idő void printTimeStats(cl_event event){ cl_int err = CL_SUCCESS; if(event == NULL) { std::cerr << "No event object returned!" << std::endl; } else { clWaitForEvents(1, &event); } cl_ulong execStart, execEnd; err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &execStart, NULL); err = clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &execEnd, NULL); std::cout << "[start] " << execStart << " [end] " << execEnd << " [time] " << (execEnd - execStart) / 1e+06 << "ms." << std::endl; }
4
Párhuzamos primitívek Map Reduce Scan Histogram Compact
5
Map // TODO // // ID := get_global_id(0) // data[ID] := square(data[ID]) __kernel void map(__global float* data) { }
6
Reduce // TODO // // ID := get_global_id(0) // // FOR s = get_global_size(0) / 2 ; s > 0 ; s >>= 1 DO: // IF (ID < s) // data[ID] = max(data[ID], data[ID + s]) // BARRIER // __kernel void reduce_global(__global float* data) { }
7
Scan (exclusive) // TODO // // ID := get_global_id(0) // IF ID > 0 THEN data[ID] = data[ID - 1] // ELSE data[ID] = 0 // BARRIER // // FOR s = 1; s < get_global_size(0); s *= 2 DO: // tmp := data[ID] // IF ( ID + s < get_global_size(0) THEN // data[ID + s] += tmp; // BARRIER // // IF(ID = 0) THEN data[ID] = 0; __kernel void exscan_global(__global int* data) { }
8
Histogram // TODO // // histogram[data[id]] := histogram[data[id]] + 1 // // SYNCHRONIZATION! __kernel void histogram_global(__global int* data, __global int* histogram) { }
9
Histogram (lokális) Lokális memória allokálása host oldalról clSetKernelArg(histogramLocalKernel, 0, sizeof(cl_mem), &gData); clSetKernelArg(histogramLocalKernel, 1, sizeof(cl_mem), &gHist); clSetKernelArg(histogramLocalKernel, 2, sizeof(int) * histogramSize, NULL); clSetKernelArg(histogramLocalKernel, 3, sizeof(int), &histogramSize);
10
Histogram // TODO // // ID := get_global_id(0) // LID := get_local_id(0) // // IF LID < histogramSize DO: // lhistogram[LID] := 0 // BARRIER // // Add data to local histogram // // BARRIER // // IF LID < histogramSize DO: // histogram[LID] = lhistogram[LID] __kernel void histogram_local(__global int* data, __global int* histogram, __local int* lhistogram, const int histogramSize) { }
11
Compact // TODO // ID := get_global_id(0) // IF data[id] < 50 THEN // predicate = 1 // ELSE // predicate = 0 __kernel void compact_predicate(__global int* data, __global int* pred) { }
12
Compact // TODO // // exclusive scan pred to prefSum __kernel void compact_exscan(__global int* pred, __global int* prefSum) { }
13
Compact // TODO // // ID := get_global_id(0) // VALUE := data[ID] // BARRIER // IF pred[ID] == 1 THEN // data[prefSum[ID]] = VALUE __kernel void compact_compact(__global int* data, __global int* pred, __global int* prefSum) { }
Similar presentations
© 2025 SlidePlayer.com. Inc.
All rights reserved.