Blurring creates stripes on the output image-CodePudding

I wrote this piece of code to make a median Blur in CUDA but I am running into an issue, where the channel of image is blurred but it creates stripes which look unusual for blurring.

#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgcodecs.hpp>

using namespace std;
using namespace cv;

#define BLOCK_SIZE      16
#define TILE_SIZE       14
#define FILTER_WIDTH    3
#define FILTER_HEIGHT   3


__device__ void sort(unsigned char* filterVector)
{
    for (int i = 0; i < FILTER_WIDTH*FILTER_HEIGHT; i  ) {
        for (int j = i   1; j < FILTER_WIDTH*FILTER_HEIGHT; j  ) {
            if (filterVector[i] > filterVector[j]) {
                unsigned char tmp = filterVector[i];
                filterVector[i] = filterVector[j];
                filterVector[j] = tmp;
            }
        }
    }
}

__global__ void medianFilter(unsigned char *srcImage, unsigned char *dstImage, unsigned int width, unsigned int height)
{

    int x_o = TILE_SIZE * blockIdx.x   threadIdx.x;
    int y_o = TILE_SIZE * blockIdx.y   threadIdx.y;

    int x_i = x_o - (FILTER_HEIGHT / 2);
    int y_i = y_o - (FILTER_WIDTH / 2);

    __shared__ unsigned char sBuffer[BLOCK_SIZE][BLOCK_SIZE];

    if ((x_i >= 0) && (x_i < width) && (y_i >= 0) && (y_i < height)) {
        sBuffer[threadIdx.y][threadIdx.x] = srcImage[y_i * width   x_i];
    } else {
        sBuffer[threadIdx.y][threadIdx.x] = 0;
    }

    __syncthreads();

    unsigned char filterVector[FILTER_WIDTH*FILTER_HEIGHT];

    // int size_vec = sizeof(filterVector) / sizeof(filterVector[0]);

    // printf("%d \n", size_vec);

    if (threadIdx.x < TILE_SIZE && threadIdx.y < TILE_SIZE) {
        for (int r = 0; r < FILTER_HEIGHT; r  ) {
            for (int c = 0; c < FILTER_HEIGHT; c  ) {
                filterVector[r*FILTER_HEIGHT c] = sBuffer[threadIdx.y   r][threadIdx.x   c];
            }
        }
    }

    sort(filterVector);

    if (x_o < width && y_o < height) {
        dstImage[y_o * width   x_o] =  filterVector[4]; // (FILTER_WIDTH*FILTER_HEIGHT)/2
    }

}

int main(int argc, char **argv)
{

    std::string image_path = "./test.jpg";
    cv::Mat img = imread(image_path, IMREAD_COLOR);
    std::string output_file = "test_gpu.jpg";

    if(img.empty())
    {
        std::cout << "Couldn't read img:" << image_path << std::endl;
    }

    Mat bgr[3];
    split(img, bgr);
    
    cv::Mat dstImg (bgr[1].size(), bgr[1].type());

    const int inputSize = img.cols * img.rows;
    const int outputSize = dstImg.cols * dstImg.rows; 
    unsigned char *d_input, *d_output;

    cudaMalloc<unsigned char>(&d_input, inputSize);
    cudaMalloc<unsigned char>(&d_output, outputSize);

    cudaMemcpy(d_input, bgr[1].ptr(), inputSize, cudaMemcpyHostToDevice);

    const dim3 block(BLOCK_SIZE, BLOCK_SIZE);
    const dim3 grid((dstImg.cols   TILE_SIZE - 1)/TILE_SIZE, (dstImg.rows   TILE_SIZE - 1)/TILE_SIZE);

    medianFilter<<<grid,block>>>(d_input, d_output, dstImg.cols, dstImg.rows);

    cudaMemcpy(dstImg.ptr(), d_output, outputSize, cudaMemcpyDeviceToHost);

    cudaFree(d_input);
    cudaFree(d_output);

    imwrite(output_file, dstImg);
}

This is my original image:

and here is one blurred channel:

For some reason I get those stripes on the output image, which is just one of the channels for now. Any idea why this is happening?

CodePudding user response：

Your intention is that even though you are launching a block of dimension (BLOCK_SIZE, BLOCK_SIZE), you only intend (TILE_SIZE, TILE_SIZE) threads in that block to actually compute the values for output pixels.

However you are not properly accounting for that here:

if (x_o < width && y_o < height) {

that should be, instead:

if (x_o < width && y_o < height  && threadIdx.x < TILE_SIZE && threadIdx.y < TILE_SIZE) {

(In fact, everything after the __syncthreads() in your kernel can be conditioned to only execute if threadIdx.x < TILE_SIZE && threadIdx.y < TILE_SIZE if you wish.)