Äú¿ÉÒÔ¾èÖú£¬Ö§³ÖÎÒÃǵĹ«ÒæÊÂÒµ¡£

1Ôª 10Ôª 50Ôª





ÈÏÖ¤Â룺  ÑéÖ¤Âë,¿´²»Çå³þ?Çëµã»÷Ë¢ÐÂÑéÖ¤Âë ±ØÌî



  ÇóÖª ÎÄÕ ÎÄ¿â Lib ÊÓÆµ iPerson ¿Î³Ì ÈÏÖ¤ ×Éѯ ¹¤¾ß ½²×ù Modeler   Code  
»áÔ±   
 
   
 
 
     
   
 ¶©ÔÄ
  ¾èÖú
OpenCL ѧϰstep by step(һ)
 
×÷ÕߣºÂõ¿ËÀÏÀÇ2012 À´Ô´£º²©¿ÍÔ° ·¢²¼ÓÚ£º 2015-01-12
  8801  次浏览      31
 

´Ó½ñÌ쿪ʼѧϰOpenCL¡­¡­

°²×°AMD OpenCL APP µÄ¹ý³ÌÎÒÕâÀï¾Í²»×Ðϸ˵Ã÷ÁË¡£

Ò»¸ö¼òµ¥µÄOpenCLµÄ³ÌÐò

ÏÖÔÚ£¬ÎÒÃÇ¿ªÊ¼Ð´Ò»¸ö¼òµ¥µÄOpenCL³ÌÐò£¬¼ÆËãÁ½¸öÊý×éÏà¼ÓµÄºÍ£¬·Åµ½ÁíÒ»¸öÊý×éÖÐÈ¥¡£³ÌÐòÓÃcpuºÍgpu·Ö±ð¼ÆË㣬×îºóÑéÖ¤ËüÃÇÊÇ·ñÏàµÈ¡£OpenCL³ÌÐòµÄÁ÷³Ì´óÖÂÈçÏ£º

ÏÂÃæÊÇsource codeÖеÄÖ÷Òª´úÂ룺

int main(int argc, char* argv[]) 
    { 
    //ÔÚhostÄÚ´æÖд´½¨Èý¸ö»º³åÇø 
    float *buf1 = 0; 
    float *buf2 = 0; 
    float *buf = 0;

    buf1 =(float *)malloc(BUFSIZE * sizeof(float)); 
    buf2 =(float *)malloc(BUFSIZE * sizeof(float)); 
    buf =(float *)malloc(BUFSIZE * sizeof(float));

    //ÓÃÒ»Ð©Ëæ»úÖµ³õʼ»¯buf1ºÍbuf2µÄÄÚÈÝ 
    int i; 
    srand( (unsigned)time( NULL ) ); 
    for(i = 0; i < BUFSIZE; i++) 
        buf1[i] = rand()%65535;

    srand( (unsigned)time( NULL ) +1000); 
    for(i = 0; i < BUFSIZE; i++) 
        buf2[i] = rand()%65535;

    //cpu¼ÆËãbuf1,buf2µÄºÍ 
    for(i = 0; i < BUFSIZE; i++) 
        buf[i] = buf1[i] + buf2[i];

    cl_uint status; 
    cl_platform_id platform;

    //´´½¨Æ½Ì¨¶ÔÏó 
    status = clGetPlatformIDs( 1, &platform, NULL );

×¢Ò⣺Èç¹ûÎÒÃÇϵͳÖа²×°²»Ö¹Ò»¸öopenclƽ̨£¬±ÈÈçÎÒµÄosÖУ¬ÓÐintelºÍamdÁ½¼Òopenclƽ̨£¬ÓÃÉÏÃæÕâÐдúÂ룬ÓпÉÄÜ»á³ö´í£¬ÒòΪËüµÃµ½ÁËintelµÄopenclƽ̨£¬¶øintelµÄƽֻ̨֧³Öcpu£¬¶øÎÒÃǺóÃæµÄ²Ù×÷¶¼ÊÇ»ùÓÚgpu£¬ÕâʱÎÒÃÇ¿ÉÒÔÓÃÏÂÃæµÄ´úÂ룬µÃµ½AMDµÄopenclƽ̨

cl_uint numPlatforms;
 std::string platformVendor; 
 status = clGetPlatformIDs(0, NULL, &numPlatforms);
 if(status != CL_SUCCESS)
     {
     return 0;
     }
 if (0 < numPlatforms) 
     {
     cl_platform_id* platforms = new cl_platform_id[numPlatforms];
     status = clGetPlatformIDs(numPlatforms, platforms, NULL);

     char platformName[100];
     for (unsigned i = 0; i < numPlatforms; ++i) 
         {
         status = clGetPlatformInfo(platforms[i],
             CL_PLATFORM_VENDOR,
             sizeof(platformName),
             platformName,
             NULL);

         platform = platforms[i];
         platformVendor.assign(platformName);

         if (!strcmp(platformName, "Advanced Micro Devices, Inc.")) 
             {
             break;
             }
         }

     std::cout << "Platform found : " << platformName << "\n";
     delete[] platforms;
     }



cl_device_id device; 
    //´´½¨GPUÉ豸 
    clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 
        1, 
        &device, 
        NULL); 
    //´´½¨context 
    cl_context context = clCreateContext( NULL, 
        1, 
        &device, 
        NULL, NULL, NULL); 
    //´´½¨ÃüÁî¶ÓÁÐ 
    cl_command_queue queue = clCreateCommandQueue( context, 
        device, 
        CL_QUEUE_PROFILING_ENABLE, NULL ); 
    //´´½¨Èý¸öOpenCLÄÚ´æ¶ÔÏ󣬲¢°Ñbuf1µÄÄÚÈÝͨ¹ýÒþʽ¿½±´µÄ·½Ê½ 
    //buf1ÄÚÈÝ¿½±´µ½clbuf1,buf2µÄÄÚÈÝͨ¹ýÏÔʾ¿½±´µÄ·½Ê½¿½±´µ½clbuf2 
    cl_mem clbuf1 = clCreateBuffer(context, 
        CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
        BUFSIZE*sizeof(cl_float),buf1, 
        NULL );

    cl_mem clbuf2 = clCreateBuffer(context, 
        CL_MEM_READ_ONLY , 
        BUFSIZE*sizeof(cl_float),NULL, 
        NULL );

   cl_event writeEvt;

    status = clEnqueueWriteBuffer(queue, clbuf2, 1, 
        0, BUFSIZE*sizeof(cl_float), buf2, 0, 0, 0);

ÉÏÃæÕâÐдúÂë°Ñbuf2ÖеÄÄÚÈÝ¿½±´µ½clbuf2,ÒòΪbuf2λÓÚhost¶Ë£¬clbuf2λÓÚdevice¶Ë£¬ËùÒÔÕâ¸öº¯Êý»áÖ´ÐÐÒ»´Îhostµ½deviceµÄ´«Êä²Ù×÷£¬»òÕß˵һ´Îsystem memoryµ½video memoryµÄ¿½±´²Ù×÷£¬ËùÒÔÎÒÔڸú¯ÊýµÄºóÃæ·ÅÖÃÁËclFushº¯Êý£¬±íʾ°Ñcommand queueÖеÄËùÓÐÃüÁîÌá½»µ½device(×¢Ò⣺¸ÃÃüÁî²¢²»±£Ö¤ÃüÁîÖ´ÐÐÍê³É),ËùÒÔÎÒÃǵ÷Óú¯ÊýwaitForEventAndReleaseÀ´µÈ´ýwrite»º³åµÄÍê³É£¬waitForEventAndReleae ÊÇÒ»¸öÓû§¶¨ÒåµÄº¯Êý£¬ËüµÄÄÚÈÝÈçÏ£¬Ö÷Òª´úÂë¾ÍÊÇͨ¹ýeventÀ´²éѯÎÒÃǵIJÙ×÷ÊÇ·ñÍê³É£¬Ã»Íê³ÉµÄ»°£¬³ÌÐò¾ÍÒ»Ö±blockÔÚÕâÐдúÂë´¦£¬ÁíÍâÎÒÃÇÒ²¿ÉÒÔÓÃopenclÖÐÄÚÖõĺ¯ÊýclWaitForEventsÀ´´úÌæclFlushºÍwaitForEventAndReleae¡£

//µÈ´ýʼþÍê³É
int waitForEventAndRelease(cl_event *event)
    {
    cl_int status = CL_SUCCESS;
    cl_int eventStatus = CL_QUEUED;
    while(eventStatus != CL_COMPLETE)
        {
        status = clGetEventInfo(
            *event, 
            CL_EVENT_COMMAND_EXECUTION_STATUS, 
            sizeof(cl_int),
            &eventStatus,
            NULL);
        }

    status = clReleaseEvent(*event);

    return 0;
    }

status = clFlush(queue); 
     //µÈ´ýÊý¾Ý´«ÊäÍê³ÉÔÙ¼ÌÐøÍùÏÂÖ´ÐÐ 
     waitForEventAndRelease(&writeEvt); 


    cl_mem buffer = clCreateBuffer( context, 
        CL_MEM_WRITE_ONLY, 
        BUFSIZE * sizeof(cl_float), 
        NULL, NULL );

kernelÎļþÖзŵÄÊÇgpuÖÐÖ´ÐеĴúÂ룬Ëü±»·ÅÔÚÒ»¸öµ¥¶ÀµÄÎļþadd.clÖУ¬±¾³ÌÐòÖÐkernel´úÂë·Ç³£¼òµ¥£¬Ö»ÊÇÖ´ÐÐÁ½¸öÊý×éÏà¼Ó¡£kernelµÄ´úÂëΪ£º

__kernel void vecadd(__global const float* A, __global const float* B, __global float* C)
{
int id = get_global_id(0);
C[id] = A[id] + B[id];
}

//kernelÎļþΪadd.cl 
const char * filename = "add.cl";
std::string sourceStr;
status = convertToString(filename, sourceStr);

convertToStringÒ²ÊÇÓû§¶¨ÒåµÄº¯Êý£¬¸Ãº¯Êý°ÑkernelÔ´Îļþ¶ÁÈëµ½Ò»¸östringÖУ¬ËüµÄ´úÂëÈçÏ£º

/ f.read(str, fileSize);
        f.close();
        str[size] = '\0';

        s = str;
        delete[] str;
        return 0;
        }
    printf("Error: Failed to open file %s\n", filename);
    return 1;
    }

const char * source    = sourceStr.c_str(); 
    size_t sourceSize[]    = { strlen(source) }; 
    //´´½¨³ÌÐò¶ÔÏó 
    cl_program program = clCreateProgramWithSource( 
        context, 
        1, 
        &source, 
        sourceSize, 
        NULL); 
    //±àÒë³ÌÐò¶ÔÏó 
    status = clBuildProgram( program, 1, &device, NULL, NULL, NULL ); 
    if(status != 0) 
        { 
        printf("clBuild failed:%d\n", status); 
        char tbuf[0x10000]; 
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL); 
        printf("\n%s\n", tbuf); 
        return -1; 
        }

    //´´½¨Kernel¶ÔÏó 
    cl_kernel kernel = clCreateKernel( program, "vecadd", NULL ); 
    //ÉèÖÃKernel²ÎÊý 
    cl_int clnum = BUFSIZE; 
    clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*) &clbuf1); 
    clSetKernelArg(kernel, 1, sizeof(cl_mem), (void*) &clbuf2); 
    clSetKernelArg(kernel, 2, sizeof(cl_mem), (void*) &buffer);

×¢Ò⣺ÔÚÖ´ÐÐkernelʱºò£¬ÎÒÃÇÖ»ÉèÖÃÁËglobal work itemsÊýÁ¿£¬Ã»ÓÐÉèÖÃgroup size£¬Õâʱºò£¬ÏµÍ³»áʹÓÃĬÈϵÄwork group size£¬Í¨³£¿ÉÄÜÊÇ256Ö®ÀàµÄ¡£

//Ö´ÐÐkernel,RangeÓÃ1ά£¬work itmes sizeΪBUFSIZE 
    cl_event ev; 
    size_t global_work_size = BUFSIZE; 
    clEnqueueNDRangeKernel( queue, 
        kernel, 
        1, 
        NULL, 
        &global_work_size, 
        NULL, 0, NULL, &ev); 
   status = clFlush( queue ); 
   waitForEventAndRelease(&ev); 


    //Êý¾Ý¿½»ØhostÄÚ´æ 
    cl_float *ptr; 

    cl_event mapevt; 
    ptr = (cl_float *) clEnqueueMapBuffer( queue, 
        buffer, 
        CL_TRUE, 
        CL_MAP_READ, 
        0, 
        BUFSIZE * sizeof(cl_float), 
        0, NULL, NULL, NULL ); 

   status = clFlush( queue ); 
   waitForEventAndRelease(&mapevt); 


    
    //½á¹ûÑéÖ¤£¬ºÍcpu¼ÆËãµÄ½á¹û±È½Ï 
    if(!memcmp(buf, ptr, BUFSIZE)) 
        printf("Verify passed\n"); 
    else printf("verify failed");

    if(buf) 
        free(buf); 
    if(buf1) 
        free(buf1); 
    if(buf2) 
        free(buf2);

³ÌÐò½áÊøºó£¬ÕâЩopencl¶ÔÏóÒ»°ã»á×Ô¶¯ÊÍ·Å£¬µ«ÊÇΪÁ˳ÌÐòÍêÕû£¬Ñø³ÉÒ»¸öºÃϰ¹ß£¬Õâ¶ùÎÒ¼ÓÉÏÁËÊÖ¶¯ÊÍ·Åopencl¶ÔÏóµÄ´úÂë¡£

//ɾ³ýOpenCL×ÊÔ´¶ÔÏó 
clReleaseMemObject(clbuf1);
clReleaseMemObject(clbuf2);
clReleaseMemObject(buffer);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}

³ÌÐòÖ´ÐкóµÄ½çÃæÈçÏ£º

´æ´¢kernelÎļþΪ¶þ½øÖÆ

Ôڽ̳Ì2ÖУ¬ÎÒÃÇͨ¹ýº¯ÊýconvertToString£¬°ÑkernelÔ´Îļþ¶Áµ½Ò»¸östring´®ÖУ¬È»ºóÓú¯ÊýclCreateProgramWithSource×°Èë³ÌÐò¶ÔÏó£¬ÔÙµ÷Óú¯ÊýclBuildProgram±àÒë³ÌÐò¶ÔÏ󡣯äʵÎÒÃÇÒ²¿ÉÒÔÖ±½Óµ÷Óöþ½øÖÆkernelÎļþ£¬ÕâÑù£¬µ±²»Ïë°ÑkernelÎļþ¸ø±ðÈË¿´µÄʱºò£¬Æðµ½Ò»¶¨µÄ±£ÃÜ×÷Óá£ÔÚ±¾½Ì³ÌÖУ¬ÎÒÃÇ»á°Ñ¶ÁÈëµÄÔ´Îļþ´æ´¢Ò»¸ö¶þ½øÖÆÎļþÖУ¬²¢ÇÒ»¹»á½¨Á¢Ò»¸ö¼ÆÊ±Æ÷À࣬ÓÃÀ´¼Ç¼Êý×é¼Ó·¨ÔÚcpuºÍgpu¶Ë·Ö±ðÖ´ÐеÄʱ¼ä¡£

Ê×ÏÈÎÒÃǽ¨Á¢¹¤³ÌÎļþgclTutorial2,ÔÚÆäÖÐÔö¼ÓÀàgclFile,¸ÃÀàÖ÷ÒªÓÃÀ´¶ÁÈ¡Îı¾kernelÎļþ£¬»òÕß¶Áд¶þ½øÖÆkernelÎļþ¡£

class gclFile 
{ 
public: 
    gclFile(void); 
    ~gclFile(void);

    //´ò¿ªopencl kernelÔ´Îļþ(Îı¾Ä£Ê½) 
    bool open(const char* fileName); 

    //¶Áд¶þ½øÖÆkernelÎļþ 
    bool writeBinaryToFile(const char* fileName, const char* birary, size_t numBytes); 
    bool readBinaryFromFile(const char* fileName);

¡­

}

gclFileÖÐÈý¸ö¶ÁдkernelÎļþµÄº¯Êý´úÂëΪ£º

bool gclFile::writeBinaryToFile(const char* fileName, const char* birary, size_t numBytes)
{
    FILE *output = NULL;
    output = fopen(fileName, "wb");
    if(output == NULL)
        return false;

    fwrite(birary, sizeof(char), numBytes, output);
    fclose(output);

    return true;
}

ÏÖÔÚ£¬ÔÚmain.cppÖУ¬ÎÒÃǾͿÉÒÔÓÃgclFileÀàµÄopenº¯ÊýÀ´¶ÁÈëkernelÔ´ÎļþÁË£º

//kernelÎļþΪadd.cl

gclFile kernelFile; 
if(!kernelFile.open("add.cl")) 
    { 
    printf("Failed to load kernel file \n"); 
    exit(0); 
    } 
const char * source = kernelFile.source().c_str(); 
size_t sourceSize[] = {strlen(source)}; 
//´´½¨³ÌÐò¶ÔÏó 
cl_program program = clCreateProgramWithSource( 
    context, 
    1, 
    &source, 
    sourceSize, 
    NULL); 

±àÒëºÃkernelºó£¬ÎÒÃÇ¿ÉÒÔͨ¹ýÏÂÃæµÄ´úÂ룬°Ñ±àÒëºÃµÄkernel´æ´¢ÔÚÒ»¸ö¶þ½øÖÆÎļþaddvec.binÖУ¬Ôڽ̳Ì4ÖÖ£¬ÎÒÃǽ«»áÖ±½Ó×°ÈëÕâ¸ö¶þ½øÖƵÄkernelÎļþ¡£

//´æ´¢±àÒëºÃµÄkernelÎļþ
char **binaries = (char **)malloc( sizeof(char *) * 1 ); //Ö»ÓÐÒ»¸öÉ豸
size_t *binarySizes = (size_t*)malloc( sizeof(size_t) * 1 );

status = clGetProgramInfo(program, 
    CL_PROGRAM_BINARY_SIZES,
    sizeof(size_t) * 1, 
    binarySizes, NULL);
binaries[0] = (char *)malloc( sizeof(char) * binarySizes[0]);
status = clGetProgramInfo(program, 
    CL_PROGRAM_BINARIES,
    sizeof(char *) * 1, 
    binaries, 
    NULL);    
kernelFile.writeBinaryToFile("vecadd.bin", binaries[0],binarySizes[0]);

ÎÒÃÇ»¹»á½¨Á¢Ò»¸ö¼ÆÊ±Æ÷ÀàgclTimer,ÓÃÀ´Í³¼ÆÊ±¼ä£¬Õâ¸öÀàÖ÷ÒªÓÃQueryPerformanceFrequencyµÃµ½Ê±ÖÓÆµÂÊ£¬ÓÃQueryPerformanceCounterµÃµ½Á÷ÊŵÄticksÊý£¬×îÖյõ½Á÷ÊŵÄʱ¼ä¡£º¯Êý·Ç³£¼òµ¥£¬

class gclTimer 
{ 
public: 
    gclTimer(void); 
    ~gclTimer(void);

private:

    double _freq; 
    double _clocks; 
    double _start; 
public: 
    void Start(void); // Æô¶¯¼ÆÊ±Æ÷ 
    void Stop(void); //Í£Ö¹¼ÆÊ±Æ÷ 
    void Reset(void); //¸´Î»¼ÆÊ±Æ÷ 
    double GetElapsedTime(void); //¼ÆËãÁ÷ÊŵÄʱ¼ä 
};

ÏÂÃæÎÒÃÇÔÚcpu¶ËÖ´ÐÐÊý×é¼Ó·¨Ê±£¬Ôö¼Ó¼ÆÊ±Æ÷µÄ´úÂ룺

gclTimer clTimer; 
clTimer.Reset();
clTimer.Start();
//cpu¼ÆËãbuf1,buf2µÄºÍ
for(i = 0; i < BUFSIZE; i++)
buf[i] = buf1[i] + buf2[i];
clTimer.Stop();
printf("cpu costs time:%.6f ms \n ", clTimer.GetElapsedTime()*1000 );

ͬÀíÔÚgpuÖ´ÐÐkernel´úÂ룬ÒÔ¼°copy gpu½á¹ûµ½cpuʱºò£¬Ôö¼Ó¼ÆÊ±Æ÷´úÂ룺

//Ö´ÐÐkernel,RangeÓÃ1ά£¬work itmes sizeΪBUFSIZE,
cl_event ev;
size_t global_work_size = BUFSIZE;

clTimer.Reset();
clTimer.Start();
clEnqueueNDRangeKernel( queue,
    kernel,
    1,
    NULL,
    &global_work_size,
    NULL, 0, NULL, &ev);
status = clFlush( queue );
waitForEventAndRelease(&ev);
 //clWaitForEvents(1, &ev);

clTimer.Stop();
printf("kernal total time:%.6f ms \n ", clTimer.GetElapsedTime()*1000 );

//Êý¾Ý¿½»ØhostÄÚ´æ
cl_float *ptr;
clTimer.Reset();
clTimer.Start();
cl_event mapevt;
ptr = (cl_float *) clEnqueueMapBuffer( queue,
    buffer,
    CL_TRUE,
    CL_MAP_READ,
    0,
    BUFSIZE * sizeof(cl_float),
    0, NULL, &mapevt, NULL );
status = clFlush( queue );
waitForEventAndRelease(&mapevt);
 //clWaitForEvents(1, &mapevt);

clTimer.Stop();
printf("copy from device to host:%.6f ms \n ", clTimer.GetElapsedTime()*1000 );

×îÖÕ³ÌÐòÖ´ÐнçÃæÈçÏ£¬ÔÚbufsizeΪ262144ʱ£¬ÔÚÎÒµÄÏÔ¿¨ÉÏgpu»¹ÓÐcpu¿ìÄØ¡­£¬ÔÚ³ÌÐòĿ¼£¬ÎÒÃÇ¿ÉÒÔ¿´µ½Ò²²úÉúÁËvecadd.binÎļþÁË¡£

¶ÁÈë¶þ½øÖÆkernelÎļþ

±¾½Ì³ÌÖУ¬ÎÒÃÇʹÓÃÉÏһƪ½Ì³ÌÖвúÉúµÄ¶þ½øÖÆkernelÎļþvecadd.bin×÷ΪÊäÈëÀ´´´½¨³ÌÐò¶ÔÏ󣬳ÌÐò´úÂëÈçÏ£º

//kernelÎļþΪvecadd.bin 
gclFile kernelFile; 
if(!kernelFile.readBinaryFromFile("vecadd.bin")) 
    { 
    printf("Failed to load binary file \n"); 
    exit(0); 
    } 
const char * binary = kernelFile.source().c_str(); 
size_t binarySize = kernelFile.source().size();

cl_program program = clCreateProgramWithBinary(context, 
    1, 
    &device, 
    (const size_t *)&binarySize, 
    (const unsigned char**)&binary, 
    NULL, 
    NULL); 

³ÌÐòÖ´ÐеĽçÃæºÍ½Ì³Ì3ÖÐÒ»ÃþÒ»Ñù¡­

ÍêÕûµÄ´úÂëÇë²Î¿¼£º¹¤³ÌÎļþgclTutorial3

´úÂëÏÂÔØ£ºhttp://files.cnblogs.com/mikewolf2002/gclTutorial.zip

ʹÓöþάNDRange workgroup

ÔÚ±¾½Ì³ÌÖУ¬ÎÒÃÇʹÓöþάNDRangeÀ´ÉèÖÃworkgroup£¬ÕâÑùÔÚopenclÖУ¬workitmeµÄ×éÖ¯ÐÎʽÊǶþάµÄ£¬KernelÖÐ µÄ´úÂëÒ²Òª×öÏàÓ¦µÄ¸Ä±ä£¬ÎÒÃÇÏÈ¿´Ò»ÏÂclEnqueueNDRangeKernelº¯ÊýµÄ±ä»¯¡£Ê×ÏÈÎÒÃÇÖ¸¶¨ÁËworkgroup sizeΪlocalx*localy,ͨ³£Õâ¸öֵΪ64µÄ±¶Êý£¬µ«×îºÃ²»Òª³¬¹ý256¡£

//Ö´ÐÐkernel,RangeÓÃ2ά£¬work itmes sizeΪwidth*height, 
cl_event ev; 
size_t globalThreads[] = {width, height}; 
size_t localx, localy; 
if(width/8 > 4) 
    localx = 16; 
else if(width < 8) 
    localx = width; 
else localx = 8;

if(height/8 > 4) 
    localy = 16; 
else if (height < 8) 
    localy = height; 
else localy = 8;

size_t localThreads[] = {localx, localy}; // localx*localyÓ¦¸ÃÊÇ64µÄ±¶Êý 
printf("global_work_size =(%d,%d), local_work_size=(%d, %d)\n",width,height,localx,localy);

clTimer.Reset(); 
clTimer.Start(); 
clEnqueueNDRangeKernel( queue, 
    kernel, 
    2, 
    NULL, 
    globalThreads, 
    localThreads, 0, NULL, &ev); 

×¢Ò⣺ÔÚÉÏÃæ´úÂëÖУ¬¶¨Òåglobal threadsÒÔ¼°local threadsÊýÁ¿£¬¶¼ÊÇͨ¹ý¶þάÊý×éµÄ·½Ê½½øÐеġ£

еÄKernel´úÂëÈçÏ£º

#pragma OPENCL EXTENSION cl_amd_printf : enable

__kernel void vecadd(__global const float* a, __global const float* b, __global float* c)
{
    int x = get_global_id(0);
    int y = get_global_id(1);
    int width = get_global_size(0);
    int height = get_global_size(1);
    if(x == 1 && y ==1)
         printf("%d, %d,%d,%d,%d,%d\n",get_local_size(0),get_local_size(1),get_local_id(0),get_local_id(1),get_group_id(0),get_group_id(1));
    
    c[x + y * width] = a[x + y * width] + b[x + y * width];
    
}

ÎÒÃÇÔÚkernelÖÐÔö¼ÓÁË#pragma OPENCL EXTENSION cl_amd_printf : enable£¬ÒÔ±ãÔÚkernelÖÐͨ¹ýprintfº¯Êý½øÐÐdebug£¬ÕâÊÇAMDµÄÒ»¸öÀ©Õ¹¡£printf»¹¿ÉÒÔÖ±½Ó´òÓ¡³öfloat4ÕâÑùµÄÏòÁ¿£¬±ÈÈçprintf(¡°%v4f¡±, vec)¡£

ÁíÍ⣬ÔÚmain.cppÖÐÔö¼ÓÒ»ÐдúÂ룺

//¸æËßdriver dump ilºÍisaÎļþ 
_putenv("GPU_DUMP_DEVICE_KERNEL=3"); 

ÎÒÃÇ¿ÉÒÔÔÚ³ÌÐòĿ¼dump³öilºÍisaÐÎʽµÄkernelÎļþ£¬¶ÔÓÚÊìϤisa»ã±àµÄÈË£¬ÕâÊÇÒ»¸öºÜºÃµÄµ÷ÊÔperformanceµÄ·½·¨¡£

ÔÚ×îеÄapp sdk 2.7ÖУ¬ÔÚkernelÖÐʹÓÃprintfµÄʱºò£¬Õâ¸ö³ÌÐò»áhangÔÚÄĶù£¬ÒÔǰûÕâÖÖÇé¿ö¡£

³ÌÐòÖ´ÐнçÃæ¡£

   
8801 ´Îä¯ÀÀ       31
Ïà¹ØÎÄÕÂ

ÆóÒµ¼Ü¹¹¡¢TOGAFÓëArchiMate¸ÅÀÀ
¼Ü¹¹Ê¦Ö®Â·-ÈçºÎ×öºÃÒµÎñ½¨Ä££¿
´óÐÍÍøÕ¾µçÉÌÍøÕ¾¼Ü¹¹°¸ÀýºÍ¼¼Êõ¼Ü¹¹µÄʾÀý
ÍêÕûµÄArchimateÊÓµãÖ¸ÄÏ£¨°üÀ¨Ê¾Àý£©
Ïà¹ØÎĵµ

Êý¾ÝÖÐ̨¼¼Êõ¼Ü¹¹·½·¨ÂÛÓëʵ¼ù
ÊÊÓÃArchiMate¡¢EA ºÍ iSpace½øÐÐÆóÒµ¼Ü¹¹½¨Ä£
ZachmanÆóÒµ¼Ü¹¹¿ò¼Ü¼ò½é
ÆóÒµ¼Ü¹¹ÈÃSOAÂ䵨
Ïà¹Ø¿Î³Ì

ÔÆÆ½Ì¨Óë΢·þÎñ¼Ü¹¹Éè¼Æ
ÖÐ̨սÂÔ¡¢ÖÐ̨½¨ÉèÓëÊý×ÖÉÌÒµ
ÒÚ¼¶Óû§¸ß²¢·¢¡¢¸ß¿ÉÓÃϵͳ¼Ü¹¹
¸ß¿ÉÓ÷ֲ¼Ê½¼Ü¹¹Éè¼ÆÓëʵ¼ù
×îл¼Æ»®
DeepSeekÔÚÈí¼þ²âÊÔÓ¦ÓÃʵ¼ù 4-12[ÔÚÏß]
DeepSeek´óÄ£ÐÍÓ¦Óÿª·¢Êµ¼ù 4-19[ÔÚÏß]
UAF¼Ü¹¹ÌåϵÓëʵ¼ù 4-11[±±¾©]
AIÖÇÄÜ»¯Èí¼þ²âÊÔ·½·¨Óëʵ¼ù 5-23[ÉϺ£]
»ùÓÚ UML ºÍEA½øÐзÖÎöÉè¼Æ 4-26[±±¾©]
ÒµÎñ¼Ü¹¹Éè¼ÆÓ뽨ģ 4-18[±±¾©]

ר¼ÒÊӽǿ´ITÓë¼Ü¹¹
Èí¼þ¼Ü¹¹Éè¼Æ
ÃæÏò·þÎñÌåϵ¼Ü¹¹ºÍÒµÎñ×é¼þ
ÈËÈËÍøÒÆ¶¯¿ª·¢¼Ü¹¹
¼Ü¹¹¸¯»¯Ö®ÃÕ
̸ƽ̨¼´·þÎñPaaS


ÃæÏòÓ¦Óõļܹ¹Éè¼ÆÊµ¼ù
µ¥Ôª²âÊÔ+ÖØ¹¹+Éè¼ÆÄ£Ê½
Èí¼þ¼Ü¹¹Ê¦¡ª¸ß¼¶Êµ¼ù
Èí¼þ¼Ü¹¹Éè¼Æ·½·¨¡¢°¸ÀýÓëʵ¼ù
ǶÈëʽÈí¼þ¼Ü¹¹Éè¼Æ¡ª¸ß¼¶Êµ¼ù
SOAÌåϵ½á¹¹Êµ¼ù


Èñ°²¿Æ¼¼ Èí¼þ¼Ü¹¹Éè¼Æ·½·¨
³É¶¼ ǶÈëʽÈí¼þ¼Ü¹¹Éè¼Æ
ÉϺ£Æû³µ ǶÈëʽÈí¼þ¼Ü¹¹Éè¼Æ
±±¾© Èí¼þ¼Ü¹¹Éè¼Æ
ÉϺ£ Èí¼þ¼Ü¹¹Éè¼Æ°¸ÀýÓëʵ¼ù
±±¾© ¼Ü¹¹Éè¼Æ·½·¨°¸ÀýÓëʵ¼ù
ÉîÛÚ ¼Ü¹¹Éè¼Æ·½·¨°¸ÀýÓëʵ¼ù
ǶÈëʽÈí¼þ¼Ü¹¹Éè¼Æ¡ª¸ß¼¶Êµ¼ù