´Ó½ñÌ쿪ʼѧϰOpenCL¡¡
°²×°AMD OpenCL APP µÄ¹ý³ÌÎÒÕâÀï¾Í²»×Ðϸ˵Ã÷ÁË¡£
Ò»¸ö¼òµ¥µÄOpenCLµÄ³ÌÐò
ÏÖÔÚ£¬ÎÒÃÇ¿ªÊ¼Ð´Ò»¸ö¼òµ¥µÄOpenCL³ÌÐò£¬¼ÆËãÁ½¸öÊý×éÏà¼ÓµÄºÍ£¬·Åµ½ÁíÒ»¸öÊý×éÖÐÈ¥¡£³ÌÐòÓÃcpuºÍgpu·Ö±ð¼ÆË㣬×îºóÑéÖ¤ËüÃÇÊÇ·ñÏàµÈ¡£OpenCL³ÌÐòµÄÁ÷³Ì´óÖÂÈçÏ£º

ÏÂÃæÊÇsource codeÖеÄÖ÷Òª´úÂ룺
int main(int argc, char* argv[])
{
//ÔÚhostÄÚ´æÖд´½¨Èý¸ö»º³åÇø
float *buf1 = 0;
float *buf2 = 0;
float *buf = 0;
buf1 =(float *)malloc(BUFSIZE * sizeof(float));
buf2 =(float *)malloc(BUFSIZE * sizeof(float));
buf =(float *)malloc(BUFSIZE * sizeof(float));
//ÓÃÒ»Ð©Ëæ»úÖµ³õʼ»¯buf1ºÍbuf2µÄÄÚÈÝ
int i;
srand( (unsigned)time( NULL ) );
for(i = 0; i < BUFSIZE; i++)
buf1[i] = rand()%65535;
srand( (unsigned)time( NULL ) +1000);
for(i = 0; i < BUFSIZE; i++)
buf2[i] = rand()%65535;
//cpu¼ÆËãbuf1,buf2µÄºÍ
for(i = 0; i < BUFSIZE; i++)
buf[i] = buf1[i] + buf2[i];
cl_uint status;
cl_platform_id platform;
//´´½¨Æ½Ì¨¶ÔÏó
status = clGetPlatformIDs( 1, &platform, NULL );
|
×¢Ò⣺Èç¹ûÎÒÃÇϵͳÖа²×°²»Ö¹Ò»¸öopenclƽ̨£¬±ÈÈçÎÒµÄosÖУ¬ÓÐintelºÍamdÁ½¼Òopenclƽ̨£¬ÓÃÉÏÃæÕâÐдúÂ룬ÓпÉÄÜ»á³ö´í£¬ÒòΪËüµÃµ½ÁËintelµÄopenclƽ̨£¬¶øintelµÄƽֻ̨֧³Öcpu£¬¶øÎÒÃǺóÃæµÄ²Ù×÷¶¼ÊÇ»ùÓÚgpu£¬ÕâʱÎÒÃÇ¿ÉÒÔÓÃÏÂÃæµÄ´úÂ룬µÃµ½AMDµÄopenclƽ̨
cl_uint numPlatforms;
std::string platformVendor;
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if(status != CL_SUCCESS)
{
return 0;
}
if (0 < numPlatforms)
{
cl_platform_id* platforms = new cl_platform_id[numPlatforms];
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
char platformName[100];
for (unsigned i = 0; i < numPlatforms; ++i)
{
status = clGetPlatformInfo(platforms[i],
CL_PLATFORM_VENDOR,
sizeof(platformName),
platformName,
NULL);
platform = platforms[i];
platformVendor.assign(platformName);
if (!strcmp(platformName, "Advanced Micro Devices, Inc."))
{
break;
}
}
std::cout << "Platform found : " << platformName << "\n";
delete[] platforms;
}
|
cl_device_id device;
//´´½¨GPUÉ豸
clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU,
1,
&device,
NULL);
//´´½¨context
cl_context context = clCreateContext( NULL,
1,
&device,
NULL, NULL, NULL);
//´´½¨ÃüÁî¶ÓÁÐ
cl_command_queue queue = clCreateCommandQueue( context,
device,
CL_QUEUE_PROFILING_ENABLE, NULL );
//´´½¨Èý¸öOpenCLÄÚ´æ¶ÔÏ󣬲¢°Ñbuf1µÄÄÚÈÝͨ¹ýÒþʽ¿½±´µÄ·½Ê½
//buf1ÄÚÈÝ¿½±´µ½clbuf1,buf2µÄÄÚÈÝͨ¹ýÏÔʾ¿½±´µÄ·½Ê½¿½±´µ½clbuf2
cl_mem clbuf1 = clCreateBuffer(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
BUFSIZE*sizeof(cl_float),buf1,
NULL );
cl_mem clbuf2 = clCreateBuffer(context,
CL_MEM_READ_ONLY ,
BUFSIZE*sizeof(cl_float),NULL,
NULL );
cl_event writeEvt;
status = clEnqueueWriteBuffer(queue, clbuf2, 1,
0, BUFSIZE*sizeof(cl_float), buf2, 0, 0, 0);
|
ÉÏÃæÕâÐдúÂë°Ñbuf2ÖеÄÄÚÈÝ¿½±´µ½clbuf2,ÒòΪbuf2λÓÚhost¶Ë£¬clbuf2λÓÚdevice¶Ë£¬ËùÒÔÕâ¸öº¯Êý»áÖ´ÐÐÒ»´Îhostµ½deviceµÄ´«Êä²Ù×÷£¬»òÕß˵һ´Îsystem
memoryµ½video memoryµÄ¿½±´²Ù×÷£¬ËùÒÔÎÒÔڸú¯ÊýµÄºóÃæ·ÅÖÃÁËclFushº¯Êý£¬±íʾ°Ñcommand
queueÖеÄËùÓÐÃüÁîÌá½»µ½device(×¢Ò⣺¸ÃÃüÁî²¢²»±£Ö¤ÃüÁîÖ´ÐÐÍê³É),ËùÒÔÎÒÃǵ÷Óú¯ÊýwaitForEventAndReleaseÀ´µÈ´ýwrite»º³åµÄÍê³É£¬waitForEventAndReleae
ÊÇÒ»¸öÓû§¶¨ÒåµÄº¯Êý£¬ËüµÄÄÚÈÝÈçÏ£¬Ö÷Òª´úÂë¾ÍÊÇͨ¹ýeventÀ´²éѯÎÒÃǵIJÙ×÷ÊÇ·ñÍê³É£¬Ã»Íê³ÉµÄ»°£¬³ÌÐò¾ÍÒ»Ö±blockÔÚÕâÐдúÂë´¦£¬ÁíÍâÎÒÃÇÒ²¿ÉÒÔÓÃopenclÖÐÄÚÖõĺ¯ÊýclWaitForEventsÀ´´úÌæclFlushºÍwaitForEventAndReleae¡£
//µÈ´ýʼþÍê³É
int waitForEventAndRelease(cl_event *event)
{
cl_int status = CL_SUCCESS;
cl_int eventStatus = CL_QUEUED;
while(eventStatus != CL_COMPLETE)
{
status = clGetEventInfo(
*event,
CL_EVENT_COMMAND_EXECUTION_STATUS,
sizeof(cl_int),
&eventStatus,
NULL);
}
status = clReleaseEvent(*event);
return 0;
}
|
status = clFlush(queue);
//µÈ´ýÊý¾Ý´«ÊäÍê³ÉÔÙ¼ÌÐøÍùÏÂÖ´ÐÐ
waitForEventAndRelease(&writeEvt);
cl_mem buffer = clCreateBuffer( context,
CL_MEM_WRITE_ONLY,
BUFSIZE * sizeof(cl_float),
NULL, NULL );
|
kernelÎļþÖзŵÄÊÇgpuÖÐÖ´ÐеĴúÂ룬Ëü±»·ÅÔÚÒ»¸öµ¥¶ÀµÄÎļþadd.clÖУ¬±¾³ÌÐòÖÐkernel´úÂë·Ç³£¼òµ¥£¬Ö»ÊÇÖ´ÐÐÁ½¸öÊý×éÏà¼Ó¡£kernelµÄ´úÂëΪ£º
__kernel void vecadd(__global const float* A, __global const float* B, __global float* C) { int id = get_global_id(0); C[id] = A[id] + B[id]; } |
//kernelÎļþΪadd.cl const char * filename = "add.cl"; std::string sourceStr; status = convertToString(filename, sourceStr);
|
convertToStringÒ²ÊÇÓû§¶¨ÒåµÄº¯Êý£¬¸Ãº¯Êý°ÑkernelÔ´Îļþ¶ÁÈëµ½Ò»¸östringÖУ¬ËüµÄ´úÂëÈçÏ£º
/ f.read(str, fileSize);
f.close();
str[size] = '\0';
s = str;
delete[] str;
return 0;
}
printf("Error: Failed to open file %s\n", filename);
return 1;
} |
const char * source = sourceStr.c_str();
size_t sourceSize[] = { strlen(source) };
//´´½¨³ÌÐò¶ÔÏó
cl_program program = clCreateProgramWithSource(
context,
1,
&source,
sourceSize,
NULL);
//±àÒë³ÌÐò¶ÔÏó
status = clBuildProgram( program, 1, &device, NULL, NULL, NULL );
if(status != 0)
{
printf("clBuild failed:%d\n", status);
char tbuf[0x10000];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL);
printf("\n%s\n", tbuf);
return -1;
}
//´´½¨Kernel¶ÔÏó
cl_kernel kernel = clCreateKernel( program, "vecadd", NULL );
//ÉèÖÃKernel²ÎÊý
cl_int clnum = BUFSIZE;
clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &clbuf1);
clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &clbuf2);
clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &buffer);
|
×¢Ò⣺ÔÚÖ´ÐÐkernelʱºò£¬ÎÒÃÇÖ»ÉèÖÃÁËglobal work itemsÊýÁ¿£¬Ã»ÓÐÉèÖÃgroup
size£¬Õâʱºò£¬ÏµÍ³»áʹÓÃĬÈϵÄwork group size£¬Í¨³£¿ÉÄÜÊÇ256Ö®ÀàµÄ¡£
//Ö´ÐÐkernel,RangeÓÃ1ά£¬work itmes sizeΪBUFSIZE
cl_event ev;
size_t global_work_size = BUFSIZE;
clEnqueueNDRangeKernel( queue,
kernel,
1,
NULL,
&global_work_size,
NULL, 0, NULL, &ev);
status = clFlush( queue );
waitForEventAndRelease(&ev);
//Êý¾Ý¿½»ØhostÄÚ´æ
cl_float *ptr;
cl_event mapevt;
ptr = (cl_float *) clEnqueueMapBuffer( queue,
buffer,
CL_TRUE,
CL_MAP_READ,
0,
BUFSIZE * sizeof(cl_float),
0, NULL, NULL, NULL );
status = clFlush( queue );
waitForEventAndRelease(&mapevt);
//½á¹ûÑéÖ¤£¬ºÍcpu¼ÆËãµÄ½á¹û±È½Ï
if(!memcmp(buf, ptr, BUFSIZE))
printf("Verify passed\n");
else printf("verify failed");
if(buf)
free(buf);
if(buf1)
free(buf1);
if(buf2)
free(buf2);
|
³ÌÐò½áÊøºó£¬ÕâЩopencl¶ÔÏóÒ»°ã»á×Ô¶¯ÊÍ·Å£¬µ«ÊÇΪÁ˳ÌÐòÍêÕû£¬Ñø³ÉÒ»¸öºÃϰ¹ß£¬Õâ¶ùÎÒ¼ÓÉÏÁËÊÖ¶¯ÊÍ·Åopencl¶ÔÏóµÄ´úÂë¡£
//ɾ³ýOpenCL×ÊÔ´¶ÔÏó clReleaseMemObject(clbuf1); clReleaseMemObject(clbuf2); clReleaseMemObject(buffer); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return 0; } |
³ÌÐòÖ´ÐкóµÄ½çÃæÈçÏ£º

´æ´¢kernelÎļþΪ¶þ½øÖÆ
Ôڽ̳Ì2ÖУ¬ÎÒÃÇͨ¹ýº¯ÊýconvertToString£¬°ÑkernelÔ´Îļþ¶Áµ½Ò»¸östring´®ÖУ¬È»ºóÓú¯ÊýclCreateProgramWithSource×°Èë³ÌÐò¶ÔÏó£¬ÔÙµ÷Óú¯ÊýclBuildProgram±àÒë³ÌÐò¶ÔÏ󡣯äʵÎÒÃÇÒ²¿ÉÒÔÖ±½Óµ÷Óöþ½øÖÆkernelÎļþ£¬ÕâÑù£¬µ±²»Ïë°ÑkernelÎļþ¸ø±ðÈË¿´µÄʱºò£¬Æðµ½Ò»¶¨µÄ±£ÃÜ×÷Óá£ÔÚ±¾½Ì³ÌÖУ¬ÎÒÃÇ»á°Ñ¶ÁÈëµÄÔ´Îļþ´æ´¢Ò»¸ö¶þ½øÖÆÎļþÖУ¬²¢ÇÒ»¹»á½¨Á¢Ò»¸ö¼ÆÊ±Æ÷À࣬ÓÃÀ´¼Ç¼Êý×é¼Ó·¨ÔÚcpuºÍgpu¶Ë·Ö±ðÖ´ÐеÄʱ¼ä¡£
Ê×ÏÈÎÒÃǽ¨Á¢¹¤³ÌÎļþgclTutorial2,ÔÚÆäÖÐÔö¼ÓÀàgclFile,¸ÃÀàÖ÷ÒªÓÃÀ´¶ÁÈ¡Îı¾kernelÎļþ£¬»òÕß¶Áд¶þ½øÖÆkernelÎļþ¡£
class gclFile
{
public:
gclFile(void);
~gclFile(void);
//´ò¿ªopencl kernelÔ´Îļþ(Îı¾Ä£Ê½)
bool open(const char* fileName);
//¶Áд¶þ½øÖÆkernelÎļþ
bool writeBinaryToFile(const char* fileName, const char* birary, size_t numBytes);
bool readBinaryFromFile(const char* fileName);
¡
}
|
gclFileÖÐÈý¸ö¶ÁдkernelÎļþµÄº¯Êý´úÂëΪ£º
bool gclFile::writeBinaryToFile(const char* fileName, const char* birary, size_t numBytes)
{
FILE *output = NULL;
output = fopen(fileName, "wb");
if(output == NULL)
return false;
fwrite(birary, sizeof(char), numBytes, output);
fclose(output);
return true;
}
|
ÏÖÔÚ£¬ÔÚmain.cppÖУ¬ÎÒÃǾͿÉÒÔÓÃgclFileÀàµÄopenº¯ÊýÀ´¶ÁÈëkernelÔ´ÎļþÁË£º
//kernelÎļþΪadd.cl
gclFile kernelFile;
if(!kernelFile.open("add.cl"))
{
printf("Failed to load kernel file \n");
exit(0);
}
const char * source = kernelFile.source().c_str();
size_t sourceSize[] = {strlen(source)};
//´´½¨³ÌÐò¶ÔÏó
cl_program program = clCreateProgramWithSource(
context,
1,
&source,
sourceSize,
NULL);
|
±àÒëºÃkernelºó£¬ÎÒÃÇ¿ÉÒÔͨ¹ýÏÂÃæµÄ´úÂ룬°Ñ±àÒëºÃµÄkernel´æ´¢ÔÚÒ»¸ö¶þ½øÖÆÎļþaddvec.binÖУ¬Ôڽ̳Ì4ÖÖ£¬ÎÒÃǽ«»áÖ±½Ó×°ÈëÕâ¸ö¶þ½øÖƵÄkernelÎļþ¡£
//´æ´¢±àÒëºÃµÄkernelÎļþ
char **binaries = (char **)malloc( sizeof(char *) * 1 ); //Ö»ÓÐÒ»¸öÉ豸
size_t *binarySizes = (size_t*)malloc( sizeof(size_t) * 1 );
status = clGetProgramInfo(program,
CL_PROGRAM_BINARY_SIZES,
sizeof(size_t) * 1,
binarySizes, NULL);
binaries[0] = (char *)malloc( sizeof(char) * binarySizes[0]);
status = clGetProgramInfo(program,
CL_PROGRAM_BINARIES,
sizeof(char *) * 1,
binaries,
NULL);
kernelFile.writeBinaryToFile("vecadd.bin", binaries[0],binarySizes[0]);
|
ÎÒÃÇ»¹»á½¨Á¢Ò»¸ö¼ÆÊ±Æ÷ÀàgclTimer,ÓÃÀ´Í³¼ÆÊ±¼ä£¬Õâ¸öÀàÖ÷ÒªÓÃQueryPerformanceFrequencyµÃµ½Ê±ÖÓÆµÂÊ£¬ÓÃQueryPerformanceCounterµÃµ½Á÷ÊŵÄticksÊý£¬×îÖյõ½Á÷ÊŵÄʱ¼ä¡£º¯Êý·Ç³£¼òµ¥£¬
class gclTimer
{
public:
gclTimer(void);
~gclTimer(void);
private:
double _freq;
double _clocks;
double _start;
public:
void Start(void); // Æô¶¯¼ÆÊ±Æ÷
void Stop(void); //Í£Ö¹¼ÆÊ±Æ÷
void Reset(void); //¸´Î»¼ÆÊ±Æ÷
double GetElapsedTime(void); //¼ÆËãÁ÷ÊŵÄʱ¼ä
};
|
ÏÂÃæÎÒÃÇÔÚcpu¶ËÖ´ÐÐÊý×é¼Ó·¨Ê±£¬Ôö¼Ó¼ÆÊ±Æ÷µÄ´úÂ룺
gclTimer clTimer; clTimer.Reset(); clTimer.Start(); //cpu¼ÆËãbuf1,buf2µÄºÍ for(i = 0; i < BUFSIZE; i++) buf[i] = buf1[i] + buf2[i]; clTimer.Stop(); printf("cpu costs time:%.6f ms \n ", clTimer.GetElapsedTime()*1000 );
|
ͬÀíÔÚgpuÖ´ÐÐkernel´úÂ룬ÒÔ¼°copy gpu½á¹ûµ½cpuʱºò£¬Ôö¼Ó¼ÆÊ±Æ÷´úÂ룺
//Ö´ÐÐkernel,RangeÓÃ1ά£¬work itmes sizeΪBUFSIZE,
cl_event ev;
size_t global_work_size = BUFSIZE;
clTimer.Reset();
clTimer.Start();
clEnqueueNDRangeKernel( queue,
kernel,
1,
NULL,
&global_work_size,
NULL, 0, NULL, &ev);
status = clFlush( queue );
waitForEventAndRelease(&ev);
//clWaitForEvents(1, &ev);
clTimer.Stop();
printf("kernal total time:%.6f ms \n ", clTimer.GetElapsedTime()*1000 );
//Êý¾Ý¿½»ØhostÄÚ´æ
cl_float *ptr;
clTimer.Reset();
clTimer.Start();
cl_event mapevt;
ptr = (cl_float *) clEnqueueMapBuffer( queue,
buffer,
CL_TRUE,
CL_MAP_READ,
0,
BUFSIZE * sizeof(cl_float),
0, NULL, &mapevt, NULL );
status = clFlush( queue );
waitForEventAndRelease(&mapevt);
//clWaitForEvents(1, &mapevt);
clTimer.Stop();
printf("copy from device to host:%.6f ms \n ", clTimer.GetElapsedTime()*1000 );
|
×îÖÕ³ÌÐòÖ´ÐнçÃæÈçÏ£¬ÔÚbufsizeΪ262144ʱ£¬ÔÚÎÒµÄÏÔ¿¨ÉÏgpu»¹ÓÐcpu¿ìÄØ¡£¬ÔÚ³ÌÐòĿ¼£¬ÎÒÃÇ¿ÉÒÔ¿´µ½Ò²²úÉúÁËvecadd.binÎļþÁË¡£

¶ÁÈë¶þ½øÖÆkernelÎļþ
±¾½Ì³ÌÖУ¬ÎÒÃÇʹÓÃÉÏһƪ½Ì³ÌÖвúÉúµÄ¶þ½øÖÆkernelÎļþvecadd.bin×÷ΪÊäÈëÀ´´´½¨³ÌÐò¶ÔÏ󣬳ÌÐò´úÂëÈçÏ£º
//kernelÎļþΪvecadd.bin
gclFile kernelFile;
if(!kernelFile.readBinaryFromFile("vecadd.bin"))
{
printf("Failed to load binary file \n");
exit(0);
}
const char * binary = kernelFile.source().c_str();
size_t binarySize = kernelFile.source().size();
cl_program program = clCreateProgramWithBinary(context,
1,
&device,
(const size_t *)&binarySize,
(const unsigned char**)&binary,
NULL,
NULL);
|
³ÌÐòÖ´ÐеĽçÃæºÍ½Ì³Ì3ÖÐÒ»ÃþÒ»Ñù¡
ÍêÕûµÄ´úÂëÇë²Î¿¼£º¹¤³ÌÎļþgclTutorial3
´úÂëÏÂÔØ£ºhttp://files.cnblogs.com/mikewolf2002/gclTutorial.zip
ʹÓöþάNDRange workgroup
ÔÚ±¾½Ì³ÌÖУ¬ÎÒÃÇʹÓöþάNDRangeÀ´ÉèÖÃworkgroup£¬ÕâÑùÔÚopenclÖУ¬workitmeµÄ×éÖ¯ÐÎʽÊǶþάµÄ£¬KernelÖÐ
µÄ´úÂëÒ²Òª×öÏàÓ¦µÄ¸Ä±ä£¬ÎÒÃÇÏÈ¿´Ò»ÏÂclEnqueueNDRangeKernelº¯ÊýµÄ±ä»¯¡£Ê×ÏÈÎÒÃÇÖ¸¶¨ÁËworkgroup
sizeΪlocalx*localy,ͨ³£Õâ¸öֵΪ64µÄ±¶Êý£¬µ«×îºÃ²»Òª³¬¹ý256¡£
//Ö´ÐÐkernel,RangeÓÃ2ά£¬work itmes sizeΪwidth*height,
cl_event ev;
size_t globalThreads[] = {width, height};
size_t localx, localy;
if(width/8 > 4)
localx = 16;
else if(width < 8)
localx = width;
else localx = 8;
if(height/8 > 4)
localy = 16;
else if (height < 8)
localy = height;
else localy = 8;
size_t localThreads[] = {localx, localy}; // localx*localyÓ¦¸ÃÊÇ64µÄ±¶Êý
printf("global_work_size =(%d,%d), local_work_size=(%d, %d)\n",width,height,localx,localy);
clTimer.Reset();
clTimer.Start();
clEnqueueNDRangeKernel( queue,
kernel,
2,
NULL,
globalThreads,
localThreads, 0, NULL, &ev);
|
×¢Ò⣺ÔÚÉÏÃæ´úÂëÖУ¬¶¨Òåglobal threadsÒÔ¼°local threadsÊýÁ¿£¬¶¼ÊÇͨ¹ý¶þάÊý×éµÄ·½Ê½½øÐеġ£
еÄKernel´úÂëÈçÏ£º
#pragma OPENCL EXTENSION cl_amd_printf : enable
__kernel void vecadd(__global const float* a, __global const float* b, __global float* c)
{
int x = get_global_id(0);
int y = get_global_id(1);
int width = get_global_size(0);
int height = get_global_size(1);
if(x == 1 && y ==1)
printf("%d, %d,%d,%d,%d,%d\n",get_local_size(0),get_local_size(1),get_local_id(0),get_local_id(1),get_group_id(0),get_group_id(1));
c[x + y * width] = a[x + y * width] + b[x + y * width];
}
|
ÎÒÃÇÔÚkernelÖÐÔö¼ÓÁË#pragma OPENCL EXTENSION cl_amd_printf
: enable£¬ÒÔ±ãÔÚkernelÖÐͨ¹ýprintfº¯Êý½øÐÐdebug£¬ÕâÊÇAMDµÄÒ»¸öÀ©Õ¹¡£printf»¹¿ÉÒÔÖ±½Ó´òÓ¡³öfloat4ÕâÑùµÄÏòÁ¿£¬±ÈÈçprintf(¡°%v4f¡±,
vec)¡£
ÁíÍ⣬ÔÚmain.cppÖÐÔö¼ÓÒ»ÐдúÂ룺
//¸æËßdriver dump ilºÍisaÎļþ
_putenv("GPU_DUMP_DEVICE_KERNEL=3"); |
ÎÒÃÇ¿ÉÒÔÔÚ³ÌÐòĿ¼dump³öilºÍisaÐÎʽµÄkernelÎļþ£¬¶ÔÓÚÊìϤisa»ã±àµÄÈË£¬ÕâÊÇÒ»¸öºÜºÃµÄµ÷ÊÔperformanceµÄ·½·¨¡£
ÔÚ×îеÄapp sdk 2.7ÖУ¬ÔÚkernelÖÐʹÓÃprintfµÄʱºò£¬Õâ¸ö³ÌÐò»áhangÔÚÄĶù£¬ÒÔǰûÕâÖÖÇé¿ö¡£
³ÌÐòÖ´ÐнçÃæ¡£

|