I am currently trying to implement the Sobel Edge Detection method in C using OpenCL to implement part of the code in parallel. I am managing to detect the edges of an input image correctly, however, my output image is a rotated and reflected version of in the input image. Please see the images below as a reference:
Input Image
Output Image
I have tried debugging my code by looking at how the image is being read into an array or how the output array is being written back to the image files but have been unsuccessful.
Does anyone have any suggestions to outputting a correctly orientated image?
The main file code is below:
/* Commands needed to run this file:
* g sobel.cpp -o sobel.out -lOpenCL ----> compiles file and creates an executable file
* ./sobel.out chess.pgm 100 35 ----> runs the executable file on the chess image for a high threshold of 100 and
* low threshold value of 35
*******************************************************************************************************************/
#include<stdio.h>
#include<CL/cl.h>
#include<iostream>
#include<fstream>
#include<string>
#include<cmath>
#include <tuple>
using namespace std;
int main(int argc, char **argv)
{
if (argc != 4)
{
cout << "Proper syntax: ./a.out <input_filename> <high_threshold> <low_threshold>" << endl;
return 0;
}
// Exit program if file doesn't open
string filename(argv[1]);
string path = "./input_images/" filename;
ifstream infile(path, ios::binary);
if (!infile.is_open())
{
cout << "File " << path << " not found in directory." << endl;
return 0;
}
ofstream img_mag("./output_images/sobel_mag.pgm", ios::binary);
ofstream img_hi("./output_images/sobel_hi.pgm", ios::binary);
ofstream img_lo("./output_images/sobel_lo.pgm", ios::binary);
ofstream img_x("./output_images/sobel_x.pgm", ios::binary);
ofstream img_y("./output_images/sobel_y.pgm", ios::binary);
char buffer[1024];
int width, height, intensity, hi = stoi(argv[2]), lo = stoi(argv[3]);
int sumx, sumy;
// Storing header information and copying into the new ouput images
infile >> buffer >> width >> height >> intensity;
img_mag << buffer << endl << width << " " << height << endl << intensity << endl;
img_hi << buffer << endl << width << " " << height << endl << intensity << endl;
img_lo << buffer << endl << width << " " << height << endl << intensity << endl;
img_x << buffer << endl << width << " " << height << endl << intensity << endl;
img_y << buffer << endl << width << " " << height << endl << intensity << endl;
// These matrices will hold the integer values of the input image
int Size = width * height;
int pic[Size];
// Reading in the input image
for (int i = 0; i < Size; i ){
pic[i] = (int)infile.get();
}
// setting up the OpenCL
clock_t start, end; //Timers to for execution timing & performance
//Initialize Buffers, memory space the allows for communication between the host and the target device
cl_mem width_buffer, height_buffer, input_buffer, xConv_buffer, yConv_buffer, size_buffer, magOutput_buffer;
//Get the platform you want to use
cl_uint platformCount; //keeps track of the number of platforms you have installed on your device
cl_platform_id *platforms;
// get platform count
clGetPlatformIDs(5, NULL, &platformCount); //sets platformCount to the number of platforms
// get all platforms
platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
clGetPlatformIDs(platformCount, platforms, NULL); //saves a list of platforms in the platforms variable
//Select the platform you would like to use in this program (change index to do this). If you would like to see all available platforms run platform.cpp.
cl_platform_id platform = platforms[0];
//Outputs the information of the chosen platform
char* Info = (char*)malloc(0x1000*sizeof(char));
clGetPlatformInfo(platform, CL_PLATFORM_NAME , 0x1000, Info, 0);
printf("Name : %s\n", Info);
clGetPlatformInfo(platform, CL_PLATFORM_VENDOR , 0x1000, Info, 0);
printf("Vendor : %s\n", Info);
clGetPlatformInfo(platform, CL_PLATFORM_VERSION , 0x1000, Info, 0);
printf("Version : %s\n", Info);
clGetPlatformInfo(platform, CL_PLATFORM_PROFILE , 0x1000, Info, 0);
printf("Profile : %s\n", Info);
// get device ID must first get platform
cl_device_id device; //this is your deviceID
cl_int err, err1, err2;
// Access a device
//The if statement checks to see if the chosen platform uses a GPU, if not it setups the device using the CPU
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if(err == CL_DEVICE_NOT_FOUND) {
err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, NULL);
}
printf("Device ID = %i\n",err);
// creates a context that allows devices to send and receive kernels and transfer data
cl_context context; //This is your contextID, the line below must just run
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
// get details about the kernel.cl file in order to create it (read the kernel.cl file and place it in a buffer)
//read file in
FILE *program_handle;
program_handle = fopen("OpenCL/Kernel.cl", "r");
//get program size
size_t program_size;//, log_size;
fseek(program_handle, 0, SEEK_END);
program_size = ftell(program_handle);
rewind(program_handle);
//sort buffer out
char *program_buffer;//, *program_log;
program_buffer = (char*)malloc(program_size 1);
program_buffer[program_size] = '\0';
fread(program_buffer, sizeof(char), program_size, program_handle);
fclose(program_handle);
// create program from source because the kernel is in a separate file 'kernel.cl', therefore the compiler must run twice once on main and once on kernel
cl_program program = clCreateProgramWithSource(context, 1, (const char**)&program_buffer, &program_size, NULL); //this compiles the kernels code
// build the program, this compiles the source code from above for the devices that the code has to run on (ie GPU or CPU)
cl_int err3= clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
printf("program ID = %i\n", err3);
// creates the kernel, this creates a kernel from one of the functions in the cl_program you just built
// select the kernel you are running
cl_kernel kernel = clCreateKernel(program, "sobelEdgeDetection", &err);
// create command queue to the target device. This is the queue that the kernels get dispatched too, to get the the desired device.
cl_command_queue queue = clCreateCommandQueueWithProperties(context, device, 0, NULL);
// create data buffers for memory management between the host and the target device
size_t global_size = Size; //total number of work items
size_t local_size = height; //Size of each work group
cl_int num_groups = global_size/local_size; //number of work groups needed
int magOutput[global_size];
int xConv[global_size];
int yConv[global_size];
//Buffer (memory block) that both the host and target device can access
width_buffer = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,sizeof(int), &width, &err);
height_buffer = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,sizeof(int), &height, &err);
input_buffer = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,global_size*sizeof(int), &pic, &err);
xConv_buffer = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,global_size*sizeof(int), &xConv, &err);
yConv_buffer = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,global_size*sizeof(int), &yConv, &err);
size_buffer = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,sizeof(int), &Size, &err);
magOutput_buffer = clCreateBuffer(context,CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,global_size*sizeof(int), &magOutput, &err);
// create the arguments for the kernel (link these to the buffers set above, using the pointers for the respective buffers)
clSetKernelArg(kernel, 0, sizeof(cl_mem), &width_buffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &height_buffer);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &input_buffer);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &xConv_buffer);
clSetKernelArg(kernel, 4, sizeof(cl_mem), &yConv_buffer);
clSetKernelArg(kernel, 5, sizeof(cl_mem), &size_buffer);
clSetKernelArg(kernel, 6, sizeof(cl_mem), &magOutput_buffer);
//enqueue kernel, deploys the kernels and determines the number of work-items that should be generated to execute the kernel (global_size) and the number of work-items in each work-group (local_size).
cl_int err4 = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
printf("\nKernel check: %i \n",err4);
// Allows the host to read from the buffer object
err = clEnqueueReadBuffer(queue, xConv_buffer, CL_TRUE, 0, sizeof(xConv), xConv, 0, NULL, NULL);
err = clEnqueueReadBuffer(queue, yConv_buffer, CL_TRUE, 0, sizeof(yConv), yConv, 0, NULL, NULL);
err = clEnqueueReadBuffer(queue, magOutput_buffer, CL_TRUE, 0, sizeof(magOutput), magOutput, 0, NULL, NULL);
//This command stops the program here until everything in the queue has been run
clFinish(queue);
// Once OpenCL has been used finish off the processing by normalising the magOutput array
// Make sure all the x,y and output magnitude values are between 0-255
int maxVal = 0;
int maxx = 0;
int maxy = 0;
for (int j = 0; j < Size; j ){
if (xConv[j] > maxx)
maxx = xConv[j];
if (yConv[j] > maxy)
maxy = yConv[j];
if (magOutput[j] > maxy)
maxVal = magOutput[j];
}
int tempx;
// Make sure all the magnitude values are between 0-255
for (int z = 0; z < Size; z ){
xConv[z] = xConv[z] * 255 / maxx;
yConv[z] = yConv[z] * 255 / maxy;
magOutput[z] = magOutput[z] * 255 / maxVal;
}
printf("\nMaxx: %i \n",maxx);
printf("Maxy: %i \n",maxy);
printf("MaxVal: %i \n",maxVal);
// Make sure to cast back to char before outputting
// Also to avoid any wonky results, get rid of any decimals by casting to int first
for (int j = 0; j < Size; j ){
// Output the x image
img_x << (char)((int)xConv[j]);
// Output the y image
img_y << (char)((int)yConv[j]);
// Output the magnitude image
img_mag << (char)((int)magOutput[j]);
// Ouput the low threshold image
if (magOutput[j] > lo)
img_lo << (char)255;
else
img_lo << (char)0;
// Ouput the high threshold image
if (magOutput[j] > hi)
img_hi << (char)255;
else
img_hi << (char)0;
}
// Deallocate all the OpenCL resources
clReleaseKernel(kernel);
clReleaseMemObject(width_buffer);
clReleaseMemObject(height_buffer);
clReleaseMemObject(input_buffer);
clReleaseMemObject(xConv_buffer);
clReleaseMemObject(yConv_buffer);
clReleaseMemObject(size_buffer);
clReleaseMemObject(magOutput_buffer);
clReleaseCommandQueue(queue);
clReleaseProgram(program);
clReleaseContext(context);
return 0;;
}
The following kernel code was also used:
__kernel void sobelEdgeDetection(__global int* width,__global int* height, __global int* pic, __global int* xConv, __global int* yConv, __global int* Size, __global int* magOutput){
int workItemNum = get_global_id(0); //Work item ID
int workGroupNum = get_group_id(0); //Work group ID
int localGroupID = get_local_id(0); //Work items ID within each work group
// size refers to the total size of a matrix. So for a 3x3 size = 9
int dim = *Size;
int row = *height; // only square matrices are used and as such the sqrt of size produces the row length
int col = *width; // only square matrices are used and as such the sqrt of size produces the column length
int current_row = workItemNum/col; // the current row is calculated by using the current workitem number divided by the total size of the matrix
int current_col = workItemNum % col; // the current column is calculated by using the current workitem number modulus by the total size of the matrix
if (workItemNum == dim-1)
{
printf("\nColumn size: %i \n",col);
printf("Row size: %i \n",row);
printf("Image Size: %i \n",dim);
}
// This if statement excludes all boundary pixels from the calculation as you require the neighbouring pixel cells
// for this calculation
if (current_col == 0 || current_col == col-1 || current_row == 0 || current_row == row - 1){
xConv[workItemNum] = 0;
yConv[workItemNum] = 0;
magOutput[workItemNum] = 0; // do not assess the bondary pixels and just set the value of the output array to zero
//printf("Workitemnum: %i \n", workItemNum);
}
else{
/****************************************************************************************************************
* The xConv array performs the kernal convultion of the input grey scale values with the following matrix:
*
* [-1 0 1]
* X - Directional Kernel = [-2 0 2]
* [-1 0 1]
*
* This scans across the X direction of the image and enhances all edges in the X-direction
*****************************************************************************************************************/
xConv[workItemNum] = pic[(current_col - 1)*col current_row - 1]*-1
pic[(current_col)*col current_row - 1]*-2
pic[(current_col 1)*col current_row - 1]*-1
pic[(current_col - 1)*col current_row]*0
pic[(current_col)*col current_row]*0
pic[(current_col 1)*col current_row]*0
pic[(current_col - 1)*col current_row 1]*1
pic[(current_col)*col current_row 1]*2
pic[(current_col 1)*col current_row 1]*1;
/****************************************************************************************************************
* The xConv array performs the kernal convultion of the input grey scale values with the following matrix:
*
* [ 1 2 1]
* Y - Directional Kernel = [ 0 0 0]
* [-1 -2 -1]
*
* This scans across the Y direction of the image and enhances all edges in the Y-direction
*****************************************************************************************************************/
yConv[workItemNum] = pic[(current_col - 1)*col current_row - 1]*1
pic[(current_col)*col current_row - 1]*0
pic[(current_col 1)*col current_row - 1]*-1
pic[(current_col - 1)*col current_row]*2
pic[(current_col)*col current_row]*0
pic[(current_col 1)*col current_row]*-2
pic[(current_col - 1)*col current_row 1]*1
pic[(current_col)*col current_row 1]*0
pic[(current_col 1)*col current_row 1]*-1;
/*****************************************************************************************************************
* Calculates the convolution matrix of the X and Y arrays. Does so by squaring each item of the X and Y arrays,
* adding them and taking the square root. This is the basic magnitude formula. This is done for by each workItem
******************************************************************************************************************/
const float xConvf = (float)xConv[workItemNum], yConvf = (float)yConv[workItemNum];
magOutput[workItemNum] = (int)(sqrt(xConvf*xConvf yConvf*yConvf) 0.5f);
}
}
CodePudding user response:
Your host (c ) code looks fine but your kernel code contains an error:
xConv[workItemNum] = pic[(current_col - 1)*col current_row - 1]*-1
pic[(current_col)*col current_row - 1]*-2
pic[(current_col 1)*col current_row - 1]*-1
pic[(current_col - 1)*col current_row]*0
pic[(current_col)*col current_row]*0
pic[(current_col 1)*col current_row]*0
pic[(current_col - 1)*col current_row 1]*1
pic[(current_col)*col current_row 1]*2
pic[(current_col 1)*col current_row 1]*1;
/****************************************************************************************************************
* The xConv array performs the kernal convultion of the input grey scale values with the following matrix:
*
* [ 1 2 1]
* Y - Directional Kernel = [ 0 0 0]
* [-1 -2 -1]
*
* This scans across the Y direction of the image and enhances all edges in the Y-direction
*****************************************************************************************************************/
yConv[workItemNum] = pic[(current_col - 1)*col current_row - 1]*1
pic[(current_col)*col current_row - 1]*0
pic[(current_col 1)*col current_row - 1]*-1
pic[(current_col - 1)*col current_row]*2
pic[(current_col)*col current_row]*0
pic[(current_col 1)*col current_row]*-2
pic[(current_col - 1)*col current_row 1]*1
pic[(current_col)*col current_row 1]*0
pic[(current_col 1)*col current_row 1]*-1;
I am not familiar with the sobel algorithm, but it seems you are indexing the pic
array incorrectly.
If your intention was to select the pixel at (row=current_row,col=current_col)
,
then you should index your array like pic[(current_row)*col current_col]
.
If your intention was to index the pixel at (row=current_col,col=current_row)
, then your original code would work, however you could only index (row=current_col,col=current_row)
if row
and col
were identical. With the image you provided, you would end up indexing past the bounds of the array.
Please reexamine your kernel code.
P.S. I strongly recommend renaming row
to numRows
and col
to numCols
.