Kernel¶ÔÏó£º
Kernel¾ÍÊÇÔÚ³ÌÐò´úÂëÖеÄÒ»¸öº¯Êý£¬Õâ¸öº¯ÊýÄÜÔÚOpenCLÉ豸ÉÏÖ´ÐС£Ò»¸öKernel¶ÔÏó¾ÍÊÇkernelº¯ÊýÒÔ¼°ÆäÏà¹ØµÄÊäÈë²ÎÊý¡£

Kernel¶ÔÏóͨ¹ý³ÌÐò¶ÔÏóÒÔ¼°Ö¸¶¨µÄº¯ÊýÃû×Ö´´½¨¡£×¢Ò⣺º¯Êý±ØÐëÊdzÌÐòÔ´´úÂëÖдæÔڵĺ¯Êý¡£

ÔËÐÐʱ±àÒ룺
ÔÚÔËÐÐʱ£¬±àÒë³ÌÐòºÍ´´½¨kernel¶ÔÏóÊÇÓÐʱ¼ä¿ªÏúµÄ£¬µ«ÕâÑù±È½ÏÁé»î£¬Äܹ»ÊÊÓ¦²»Í¬µÄOpenCLÓ²¼þƽ̨¡£³ÌÐò¶¯Ì¬±àÒëÒ»°ãÖ»ÐèÒ»´Î£¬¶øKernel¶ÔÏóÔÚ´´½¨ºó£¬¿ÉÒÔ·´¸´µ÷Óá£

´´½¨Kernelºó£¬ÔËÐÐKernel֮ǰ£¬ÎÒÃÇ»¹ÒªÎªKernel¶ÔÏóÉèÖòÎÊý¡£ÎÒÃÇ¿ÉÒÔÔÚKernelÔËÐкó£¬ÖØÐÂÉèÖòÎÊýÔÙ´ÎÔËÐС£

arg_indexÖ¸¶¨¸Ã²ÎÊýΪKernelº¯ÊýÖеĵڼ¸¸ö²ÎÊý(±ÈÈçµÚÒ»¸ö²ÎÊýΪ0£¬µÚ¶þ¸öΪ1,¡)¡£ÄÚ´æ¶ÔÏóºÍµ¥¸öµÄÖµ¶¼¿ÉÒÔ×÷ΪKernel²ÎÊý¡£ÏÂÃæÊÇ2¸öÉèÖÃKernel²ÎÊýµÄÀý×Ó£º
clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&d_iImage);
clSetKernelArg(kernel, 1, sizeof(int), (void*)&a); |

ÔÚKernelÔËÐÐ֮ǰ£¬ÎÒÃÇÏÈ¿´¿´OpenCLÖеÄÏ߳̽ṹ£º
´ó¹æÄ£²¢ÐгÌÐòÖУ¬Í¨³£Ã¿¸öÏ̴߳¦ÀíÒ»¸öÎÊÌâµÄÒ»²¿·Ö£¬±ÈÈçÏòÁ¿¼Ó·¨£¬ÎÒÃÇ»á°ÑÁ½¸öÏòÁ¿ÖжÔÓ¦µÄÔªËØ¼ÓÆðÀ´£¬ÕâÑù£¬Ã¿¸öÏ߳̿ÉÒÔ´¦ÀíÒ»¸ö¼Ó·¨¡£
ÏÂÃæÎÒ¿´Ò»¸ö16¸öÔªËØµÄÏòÁ¿¼Ó·¨£ºÁ½¸öÊäÈ뻺³åA¡¢B£¬Ò»¸öÊä³ö»º³åC

ÔÚÕâÖÖÇé¿öÏ£¬ÎÒÃÇ¿ÉÒÔ´´½¨Ò»Î¬µÄÏ߳̽ṹȥƥÅäÕâ¸öÎÊÌâ¡£

ÿ¸öḬ̈߳Ñ×Ô¼ºµÄÏß³Ìid×÷ΪË÷Òý£¬°ÑÏàÓ¦ÔªËØ¼ÓÆðÀ´¡£

OpenCLÖеÄÏ߳̽ṹÊÇ¿ÉËõ·ÅµÄ£¬KernelµÄÿ¸öÔËÐÐʵÀý³Æ×÷WorkItem(Ò²¾ÍÊÇỊ̈߳©£¬WorkItem×éÖ¯ÔÚÒ»Æð³Æ×÷WorkGroup£¬OpenCLÖУ¬Ã¿¸öWorkgroupÖ®¼ä¶¼ÊÇÏ໥¶ÀÁ¢µÄ¡£
ͨ¹ýÒ»¸öglobal id(ÔÚË÷Òý¿Õ¼ä£¬ËüÊÇΨһµÄ£©»òÕßÒ»¸öworkgroup idºÍÒ»¸öwork groupÄÚµÄlocal
id£¬ÎÒ¾ÍÄܱ궨һ¸öworkitem¡£

ÔÚkernelº¯ÊýÖУ¬ÎÒÃÇÄܹ»Í¨¹ýAPIµ÷Óõõ½global idÒÔ¼°ÆäËûÐÅÏ¢£º
get_global_id(dim)
get_global_size(dim) |
ÕâÁ½¸öº¯ÊýÄܵõ½Ã¿¸öά¶ÈÉϵÄglobal id¡£
get_group_id(dim)
get_num_groups(dim)
get_local_id(dim)
get_local_size(dim£©
|
Õ⼸¸öº¯ÊýÓÃÀ´¼ÆËãgroup idÒÔ¼°ÔÚgroupÄÚµÄlocal id¡£
get_global_id(0) = column, get_global_id(1) = row
get_num_groups(0) * get_local_size(0) == get_global_size(0) |
AMD OpenCL´óѧ¿Î³Ì(5)
OpenCLÄÚ´æÄ£ÐÍ
OpenCLµÄÄÚ´æÄ£ÐͶ¨ÒåÁ˸÷ÖÖ¸÷ÑùÄÚ´æÀàÐÍ£¬¸÷ÖÖÄÚ´æÄ£ÐÍÖ®¼äÓв㼶¹ØÏµ¡£¸÷ÖÖÄÚ´æÖ®¼äµÄÊý¾Ý´«Ê䱨ÐëÊÇÏÔʽ½øÐе쬱ÈÈç´Óhost
memoryµ½device memory£¬´Óglobal memoryµ½local memoryµÈµÈ¡£


WorkGroup±»Ó³Éäµ½Ó²¼þµÄCUÉÏÖ´ÐУ¨ÔÚAMD 5xxxϵÁÐÏÔ¿¨ÉÏ£¬CU¾ÍÊÇsimd£¬Ò»¸ösimdÖÐÓÐ16¸öpe£¬»òÕß˵ÊÇstream
core£©£¬OpenCL²¢²»Ìṩ¸÷¸öworkgroupÖ®¼äµÄÒ»ÖÂÐÔ£¬Èç¹ûÎÒÃÇÐèÒªÔÚ¸÷¸öworkgroupÖ®¼ä¹²ÏíÊý¾Ý»òÕßͨÐÅÖ®ÀàµÄ£¬Òª×Ô¼ºÍ¨¹ýÈí¼þʵÏÖ¡£
Kernelº¯ÊýµÄд·¨
ÿ¸öỊ̈߳¨workitem£©¶¼ÓÐÒ»¸ökenerlº¯ÊýµÄʵÀý¡£ÏÂÃæÎÒÃÇ¿´ÏÂkernelµÄд·¨£º
__kernel void vecadd(__global const float* A, __global const float* B, __global float* C)
2: {
3: int id = get_global_id(0);
4: C[id] = A[id] + B[id];
5: }
|
ÿ¸öKernelº¯Êý¶¼±ØÐëÒÔ__kernel¿ªÊ¼£¬¶øÇÒ±ØÐë·µ»Øvoid¡£Ã¿¸öÊäÈë²ÎÊý¶¼±ØÐëÉùÃ÷ʹÓõÄÄÚ´æÀàÐÍ¡£Í¨¹ýһЩAPI£¬±ÈÈçget_global_idÖ®ÀàµÄµÃµ½Ïß³Ìid¡£
ÄÚ´æ¶ÔÏóµØÖ·¿Õ¼ä±êʶ·ûÓÐÒÔϼ¸ÖÖ£º
__global ¨C memory allocated from global address space
__constant ¨C a special type of read-only memory
__local ¨C memory shared by a work-group
__private ¨C private per work-item memory
__read_only/__write_only ¨C used for images
|
Kernelº¯Êý²ÎÊýÈç¹ûÊÇÄÚ´æ¶ÔÏó£¬ÄÇôһ¶¨ÊÇ__global,__local»òÕßconstant¡£
ÔËÐÐKernel
Ê×ÏÈÒªÉèÖÃÏß³ÌË÷Òý¿Õ¼äµÄάÊýÒÔ¼°workgroup´óСµÈ¡£
ÎÒÃÇͨ¹ýº¯ÊýclEnqueueNDRangeKerne°ÑKernel·ÅÔÚÒ»¸ö¶ÓÁÐÀµ«²»±£Ö¤ËüÂíÉÏÖ´ÐУ¬OpenCL
driver»á¹ÜÀí¶ÓÁУ¬µ÷¶ÈKernelµÄÖ´ÐС£×¢Ò⣺ÿ¸öÏß³ÌÖ´ÐеĴúÂë¶¼ÊÇÏàͬµÄ£¬µ«ÊÇËüÃÇÖ´ÐÐÊý¾ÝÈ´ÊDz»Í¬µÄ¡£



¸Ãº¯Êý°ÑÒªÖ´ÐеÄKernelº¯Êý·ÅÔÚÖ¸¶¨µÄÃüÁî¶ÓÁÐÖУ¬globald´óС£¨Ïß³ÌË÷Òý¿Õ¼ä£©±ØÐëÖ¸¶¨£¬local´óС£¨work
group£©¿ÉÒÔÖ¸¶¨£¬Ò²¿ÉÒÔΪ¿Õ¡£Èç¹ûΪ¿Õ£¬Ôòϵͳ»á×Ô¶¯¸ù¾ÝÓ²¼þÑ¡ÔñºÏÊʵĴóС¡£event_wait_listÓÃÀ´Ñ¡¶¨Ò»Ð©events£¬Ö»ÓÐÕâЩeventsÖ´ÐÐÍêºó£¬¸Ãkernel²Å¿ÉÄܱ»Ö´ÐУ¬Ò²¾ÍÊÇͨ¹ýʼþ»úÖÆÀ´ÊµÏÖ²»Í¬kernelº¯ÊýÖ®¼äµÄͬ²½¡£
µ±Kernelº¯ÊýÖ´ÐÐÍê±Ïºó£¬ÎÒÃÇÒª°ÑÊý¾Ý´Ódevice memoryÖп½±´µ½host memoryÖÐÈ¥¡£


ÊÍ·Å×ÊÔ´£º
´ó¶àÊýµÄOpenCL×ÊÔ´¶¼ÊÇÖ¸Õ룬²»Ê¹ÓõÄʱºòÐèÒªÊͷŵô¡£µ±È»£¬³ÌÐò¹Ø±ÕµÄʱºòÕâЩ¶ÔÏóÒ²»á±»×Ô¶¯Êͷŵô¡£
ÊÍ·Å×ÊÔ´µÄº¯ÊýÊÇ£ºclRelase{Resource} £¬±ÈÈç: clReleaseProgram(),
clReleaseMemObject()µÈ¡£
´íÎó²¶×½£º
Èç¹ûOpenCLº¯ÊýÖ´ÐÐʧ°Ü£¬»á·µ»ØÒ»¸ö´íÎóÂ룬һ°ãÊǸö¸ºÖµ£¬·µ»Ø0Ôò±íʾִÐгɹ¦¡£ÎÒÃÇ¿ÉÒÔ¸ù¾Ý¸Ã´íÎóÂëÖªµÀʲôµØ·½³ö´íÁË£¬ÐèÒªÐ޸ġ£´íÎóÂëÔÚcl.hÖж¨Ò壬ÏÂÃæÊǼ¸¸ö´íÎóÂëµÄÀý×Ó.
CL_DEVICE_NOT_FOUND -1
CL_DEVICE_NOT_AVAILABLE -2
CL_COMPILER_NOT_AVAILABLE -3
CL_MEM_OBJECT_ALLOCATION_FAILURE -4
¡ |
ÏÂÃæÊÇÒ»¸öOpenCL»úÖÆµÄʾÒâͼ

³ÌÐòÄ£ÐÍ
Êý¾Ý²¢ÐУºwork itemºÍÄÚ´æ¶ÔÏóÔªËØÖ®¼äÊÇÒ»Ò»Ó³Éä¹ØÏµ£»workgroup¿ÉÒÔÏÔʾָ¶¨£¬Ò²¿ÉÒÔÒþʽָ¶¨¡£
ÈÎÎñ²¢ÐУºkernelµÄÖ´ÐжÀÁ¢ÓÚÏß³ÌË÷Òý¿Õ¼ä£»ÓÃÆäËû·½·¨±íʾ²¢ÐУ¬±ÈÈç°Ñ²»Í¬µÄÈÎÎñ·ÅÈë¶ÓÁУ¬ÓÃÉ豸ָ¶¨µÄÌØÊâµÄÏòÁ¿ÀàÐ͵ȵȡ£
ͬ²½£ºworkgroupÄÚwork itemÖ®¼äµÄͬ²½£»ÃüÁî¶ÓÁÐÖв»Í¬ÃüÁîÖ®¼äµÄͬ²½¡£
ÍêÕû´úÂëÈçÏ£º
#include "stdafx.h"
2: #include
3: #include
4: #include
5: #include
6: #include
7: #include
8:
9: using namespace std;
10: #define NWITEMS 262144
11:
12: #pragma comment (lib,"OpenCL.lib")
13:
14: //°ÑÎı¾Îļþ¶ÁÈëÒ»¸östringÖÐ
15: int convertToString(const char *filename, std::string& s)
16: {
17: size_t size;
18: char* str;
19:
20: std::fstream f(filename, (std::fstream::in | std::fstream::binary));
21:
22: if(f.is_open())
23: {
24: size_t fileSize;
25: f.seekg(0, std::fstream::end);
26: size = fileSize = (size_t)f.tellg();
27: f.seekg(0, std::fstream::beg);
28:
29: str = new char[size+1];
30: if(!str)
31: {
32: f.close();
33: return NULL;
34: }
35:
36: f.read(str, fileSize);
37: f.close();
38: str[size] = '\0';
39:
40: s = str;
41: delete[] str;
42: return 0;
43: }
44: printf("Error: Failed to open file %s\n", filename);
45: return 1;
46: }
47:
48: int main(int argc, char* argv[])
49: {
50: //ÔÚhostÄÚ´æÖд´½¨Èý¸ö»º³åÇø
51: float *buf1 = 0;
52: float *buf2 = 0;
53: float *buf = 0;
54:
55: buf1 =(float *)malloc(NWITEMS * sizeof(float));
56: buf2 =(float *)malloc(NWITEMS * sizeof(float));
57: buf =(float *)malloc(NWITEMS * sizeof(float));
58:
59: //³õʼ»¯buf1ºÍbuf2µÄÄÚÈÝ
60: int i;
61: srand( (unsigned)time( NULL ) );
62: for(i = 0; i < NWITEMS; i++)
63: buf1[i] = rand()%65535;
64:
65: srand( (unsigned)time( NULL ) +1000);
66: for(i = 0; i < NWITEMS; i++)
67: buf2[i] = rand()%65535;
68:
69: for(i = 0; i < NWITEMS; i++)
70: buf[i] = buf1[i] + buf2[i];
71:
72: cl_uint status;
73: cl_platform_id platform;
74:
75: //´´½¨Æ½Ì¨¶ÔÏó
76: status = clGetPlatformIDs( 1, &platform, NULL );
77:
78: cl_device_id device;
79:
80: //´´½¨GPUÉ豸
81: clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU,
82: 1,
83: &device,
84: NULL);
85: //´´½¨context
86: cl_context context = clCreateContext( NULL,
87: 1,
88: &device,
89: NULL, NULL, NULL);
90: //´´½¨ÃüÁî¶ÓÁÐ
91: cl_command_queue queue = clCreateCommandQueue( context,
92: device,
93: CL_QUEUE_PROFILING_ENABLE, NULL );
94: //´´½¨Èý¸öOpenCLÄÚ´æ¶ÔÏ󣬲¢°Ñbuf1µÄÄÚÈÝͨ¹ýÒþʽ¿½±´µÄ·½Ê½
95: //¿½±´µ½clbuf1,buf2µÄÄÚÈÝͨ¹ýÏÔʾ¿½±´µÄ·½Ê½¿½±´µ½clbuf2
96: cl_mem clbuf1 = clCreateBuffer(context,
97: CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
98: NWITEMS*sizeof(cl_float),buf1,
99: NULL );
100:
101: cl_mem clbuf2 = clCreateBuffer(context,
102: CL_MEM_READ_ONLY ,
103: NWITEMS*sizeof(cl_float),NULL,
104: NULL );
105:
106: status = clEnqueueWriteBuffer(queue, clbuf2, 1,
107: 0, NWITEMS*sizeof(cl_float), buf2, 0, 0, 0);
108:
109: cl_mem buffer = clCreateBuffer( context,
110: CL_MEM_WRITE_ONLY,
111: NWITEMS * sizeof(cl_float),
112: NULL, NULL );
113:
114: const char * filename = "add.cl";
115: std::string sourceStr;
116: status = convertToString(filename, sourceStr);
117: const char * source = sourceStr.c_str();
118: size_t sourceSize[] = { strlen(source) };
119:
120: //´´½¨³ÌÐò¶ÔÏó
121: cl_program program = clCreateProgramWithSource(
122: context,
123: 1,
124: &source,
125: sourceSize,
126: NULL);
127: //±àÒë³ÌÐò¶ÔÏó
128: status = clBuildProgram( program, 1, &device, NULL, NULL, NULL );
129: if(status != 0)
130: {
131: printf("clBuild failed:%d\n", status);
132: char tbuf[0x10000];
133: clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL);
134: printf("\n%s\n", tbuf);
135: return -1;
136: }
137:
138: //´´½¨Kernel¶ÔÏó
139: cl_kernel kernel = clCreateKernel( program, "vecadd", NULL );
140: //ÉèÖÃKernel²ÎÊý
141: cl_int clnum = NWITEMS;
142: clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &clbuf1);
143: clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &clbuf2);
144: clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &buffer);
145:
146: //Ö´ÐÐkernel
147: cl_event ev;
148: size_t global_work_size = NWITEMS;
149: clEnqueueNDRangeKernel( queue,
150: kernel,
151: 1,
152: NULL,
153: &global_work_size,
154: NULL, 0, NULL, &ev);
155: clFinish( queue );
156:
157: //Êý¾Ý¿½»ØhostÄÚ´æ
158: cl_float *ptr;
159: ptr = (cl_float *) clEnqueueMapBuffer( queue,
160: buffer,
161: CL_TRUE,
162: CL_MAP_READ,
163: 0,
164: NWITEMS * sizeof(cl_float),
165: 0, NULL, NULL, NULL );
166: //½á¹ûÑéÖ¤£¬ºÍcpu¼ÆËãµÄ½á¹û±È½Ï
167: if(!memcmp(buf, ptr, NWITEMS))
168: printf("Verify passed\n");
169: else printf("verify failed");
170:
171: if(buf)
172: free(buf);
173: if(buf1)
174: free(buf1);
175: if(buf2)
176: free(buf2);
177:
178: //ɾ³ýOpenCL×ÊÔ´¶ÔÏó
179: clReleaseMemObject(clbuf1);
180: clReleaseMemObject(clbuf2);
181: clReleaseMemObject(buffer);
182: clReleaseProgram(program);
183: clReleaseCommandQueue(queue);
184: clReleaseContext(context);
185: return 0;
186: }
187:
|
|