I currently use Intel i3 CPU + AMD Radeon RX560 graphics,
In the program to use overlap, namely, multiple queue in parallel, found that there is a problem, the following tests on three queue, parallel call three asynchronous write function, respectively the three pieces of data (YUV three component, each 2 MB) write in, the program loop written three times,
Why clEnqueueWriteBuffer dozens of ms delay? The delay time is too long.
Procedure is as follows:
Int main ()//main_v2
{
Cl_platform_id * platformIds;
Cl_device_id device;//TODO: the extend for multi devices on platform
//should release below after use
Cl_context ocl_ctx;
Cl_command_queue mvQueue;
Cl_command_queue yuvQueue [4].
Cl_command_queue queue;//"according to CUDA 's default stream?
Int err=0;
//access platform
PlatformIds=(cl_platform_id *) alloca (sizeof (cl_platform_id));//application stack space for list platform
Err=clGetPlatformIDs (1, platformIds, NULL);
//to get equipment
Err=clGetDeviceIDs (platformIds [0], CL_DEVICE_TYPE_GPU, 1, & amp; Device, NULL);
If (err!={CL_SUCCESS)
Printf (" can 't get the gpu device, try the CPU... \n");
Err=clGetDeviceIDs (platformIds [0], CL_DEVICE_TYPE_CPU, 1, & amp; Device, NULL);
}
//create the OpenCL Context
Ocl_ctx=clCreateContext (NULL, 1, & amp; Device, NULL, NULL, & amp; Err);
If (ocl_ctx==NULL) {
Printf (" create OpenCL context fail \ n ");
The exit (EXIT_FAILURE);
}
//create the command queue
Cl_queue_properties props []={
CL_QUEUE_PROPERTIES,
CL_QUEUE_PROFILING_ENABLE,
0
};
for(int i=0; I<4. I++)
{
YuvQueue [I]=clCreateCommandQueueWithProperties (ocl_ctx, device, NULL/* */props, & amp; Err);
If (yuvQueue [I]==NULL) {
Printf (" the create command queue fail % d \ n ", err);
The exit (EXIT_FAILURE);
}
}
//create a memory object
Cl_mem pCurFrameObj;
Char * orig_buffer=(char *) malloc (1920 * 1080 * 3);
PCurFrameObj=clCreateBuffer (ocl_ctx, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
1920 * 1080 * 3, orig_buffer, & amp; Err);
//pCurFrameObj=clCreateBuffer (ocl_ctx, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
//1920 * 1080 * 3, NULL, & amp; Err);
int i=0;
While (i++ & lt; 3)
{
Unsigned char * y=(unsigned char *) malloc * WIDTH (HEIGHT);
Unsigned char * u=(unsigned char *) malloc * WIDTH (HEIGHT);
Unsigned char * v=(unsigned char *) malloc * WIDTH (HEIGHT);
ReadFrameFromYUVFile (" z: \ \ test_files \ \ test_mv yuv ", y, u, v, 0, WIDTH, HEIGHT);
Err=clEnqueueWriteBuffer (yuvQueue [0], pCurFrameObj, CL_FALSE, 0, 1080 * 1920, (void *) y, 0, NULL, NULL);
Err=clEnqueueWriteBuffer (yuvQueue [1], pCurFrameObj, CL_FALSE, 1080 * 1920, 1080 * 1920, (void *) u, 0, NULL, NULL);
Err=clEnqueueWriteBuffer (yuvQueue [2], pCurFrameObj, CL_FALSE, 1080 * 1920 * 2, 1080 * 1920, (void *) v, 0, NULL, NULL);
//Sleep (10);
//free (v);
//free (u);
//free (y);
}
Sleep (60);
//free all opencl resources, like the queues, the memory objs, etc.
ClReleaseMemObject (pCurFrameObj);
for(int i=0; I<4. I++)
ClReleaseCommandQueue (yuvQueue [I]);
ClReleaseContext (ocl_ctx);
Printf (" Press anykey to quit... ");
getchar();
return 0;
}