// 高斯滤波
__kernel void gaussian_blur(__global uchar* input, __global uchar* output, int width, int height) {
const int x = get_global_id(0);
const int y = get_global_id(1);
const int w = width;
const int h = height;
const float ksize[5][5] = {
{2, 4, 5, 4, 2},
{4, 9,12, 9, 4},
{5,12,15,12, 5},
{4, 9,12, 9, 4},
{2, 4, 5, 4, 2}
};
float sum = 0.0f;
float weight = 0.0f;
for(int dy = -2; dy <= 2; dy++) {
for(int dx = -2; dx <= 2; dx++) {
int nx = clamp(x + dx, 0, w - 1);
int ny = clamp(y + dy, 0, h - 1);
float k = ksize[dy + 2][dx + 2];
sum += input[ny * w + nx] * k;
weight += k;
}
}
output[y * w + x] = (uchar)(sum / weight);
}
// Sobel 梯度
__kernel void sobel_gradient(__global uchar* input, __global float* grad, __global float* angle, int width, int height) {
const int x = get_global_id(0);
const int y = get_global_id(1);
int w = width;
int gx =
-input[(y-1)*w + (x-1)] - 2*input[y*w + (x-1)] - input[(y+1)*w + (x-1)] +
input[(y-1)*w + (x+1)] + 2*input[y*w + (x+1)] + input[(y+1)*w + (x+1)];
int gy =
-input[(y-1)*w + (x-1)] - 2*input[(y-1)*w + x] - input[(y-1)*w + (x+1)] +
input[(y+1)*w + (x-1)] + 2*input[(y+1)*w + x] + input[(y+1)*w + (x+1)];
grad[y*w + x] = hypot((float)gx, (float)gy);
angle[y*w + x] = atan2((float)gy, (float)gx);
}
// 非极大值抑制
__kernel void non_maximum_suppression(__global float* grad, __global float* angle, __global uchar* output, int width, int height) {
const int x = get_global_id(0);
const int y = get_global_id(1);
int w = width;
float dir = angle[y*w + x] * (180.0f / 3.14159f);
if (dir < 0) dir += 180;
float g = grad[y*w + x];
float g1 = 0.0f, g2 = 0.0f;
if ((dir >= 0 && dir < 22.5f) || (dir >= 157.5f && dir <= 180)) {
g1 = grad[y*w + (x-1)];
g2 = grad[y*w + (x+1)];
} else if (dir >= 22.5f && dir < 67.5f) {
g1 = grad[(y-1)*w + (x+1)];
g2 = grad[(y+1)*w + (x-1)];
} else if (dir >= 67.5f && dir < 112.5f) {
g1 = grad[(y-1)*w + x];
g2 = grad[(y+1)*w + x];
} else if (dir >= 112.5f && dir < 157.5f) {
g1 = grad[(y-1)*w + (x-1)];
g2 = grad[(y+1)*w + (x+1)];
}
if (g >= g1 && g >= g2)
output[y*w + x] = (uchar)g;
else
output[y*w + x] = 0;
}
// 双阈值连接
__kernel void hysteresis(__global uchar* input, __global uchar* output, int width, int height, uchar low, uchar high) {
const int x = get_global_id(0);
const int y = get_global_id(1);
int w = width;
uchar val = input[y*w + x];
if (val >= high) {
output[y*w + x] = 255;
} else if (val >= low) {
// 8邻域搜索强边缘
bool connected = false;
for (int dy = -1; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++) {
int nx = clamp(x + dx, 0, w - 1);
int ny = clamp(y + dy, 0, height - 1);
if (input[ny*w + nx] >= high) {
connected = true;
}
}
}
output[y*w + x] = connected ? 255 : 0;
} else {
output[y*w + x] = 0;
}
}
#ifndef COMPUTERVISION_CANNYMAIN_H
#define COMPUTERVISION_CANNYMAIN_H
#include <opencv2/opencv.hpp>
#include "../helper/opencl_helper.h"
int runCanny() {
cv::Mat image = cv::imread("../src/opencl/sources/img.png", cv::IMREAD_GRAYSCALE);
if (image.empty()) {
printf("Image load failed!\n");
return -1;
}
int width = image.cols;
int height = image.rows;
size_t imgSize = width * height;
OpenCLObjects ocl = init_opencl("canny.cl", "gaussian_blur");
cl_int err;
cl_mem buf_input = clCreateBuffer(ocl.context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
imgSize, image.data, &err);
cl_mem buf_blur = clCreateBuffer(ocl.context, CL_MEM_READ_WRITE, imgSize, NULL, &err);
cl_mem buf_grad = clCreateBuffer(ocl.context, CL_MEM_READ_WRITE, sizeof(float) * imgSize, NULL, &err);
cl_mem buf_angle = clCreateBuffer(ocl.context, CL_MEM_READ_WRITE, sizeof(float) * imgSize, NULL, &err);
cl_mem buf_nms = clCreateBuffer(ocl.context, CL_MEM_READ_WRITE, imgSize, NULL, &err);
cl_mem buf_output = clCreateBuffer(ocl.context, CL_MEM_WRITE_ONLY, imgSize, NULL, &err);
ocl.kernel = clCreateKernel(ocl.program, "gaussian_blur", &err);
clSetKernelArg(ocl.kernel, 0, sizeof(cl_mem), &buf_input);
clSetKernelArg(ocl.kernel, 1, sizeof(cl_mem), &buf_blur);
clSetKernelArg(ocl.kernel, 2, sizeof(int), &width);
clSetKernelArg(ocl.kernel, 3, sizeof(int), &height);
size_t gsize[] = { (size_t)width, (size_t)height };
clEnqueueNDRangeKernel(ocl.queue, ocl.kernel, 2, NULL, gsize, NULL, 0, NULL, NULL);
clFinish(ocl.queue);
ocl.kernel = clCreateKernel(ocl.program, "sobel_gradient", &err);
clSetKernelArg(ocl.kernel, 0, sizeof(cl_mem), &buf_blur);
clSetKernelArg(ocl.kernel, 1, sizeof(cl_mem), &buf_grad);
clSetKernelArg(ocl.kernel, 2, sizeof(cl_mem), &buf_angle);
clSetKernelArg(ocl.kernel, 3, sizeof(int), &width);
clSetKernelArg(ocl.kernel, 4, sizeof(int), &height);
clEnqueueNDRangeKernel(ocl.queue, ocl.kernel, 2, NULL, gsize, NULL, 0, NULL, NULL);
clFinish(ocl.queue);
ocl.kernel = clCreateKernel(ocl.program, "non_maximum_suppression", &err);
clSetKernelArg(ocl.kernel, 0, sizeof(cl_mem), &buf_grad);
clSetKernelArg(ocl.kernel, 1, sizeof(cl_mem), &buf_angle);
clSetKernelArg(ocl.kernel, 2, sizeof(cl_mem), &buf_nms);
clSetKernelArg(ocl.kernel, 3, sizeof(int), &width);
clSetKernelArg(ocl.kernel, 4, sizeof(int), &height);
clEnqueueNDRangeKernel(ocl.queue, ocl.kernel, 2, NULL, gsize, NULL, 0, NULL, NULL);
clFinish(ocl.queue);
uchar low = 50, high = 100;
ocl.kernel = clCreateKernel(ocl.program, "hysteresis", &err);
clSetKernelArg(ocl.kernel, 0, sizeof(cl_mem), &buf_nms);
clSetKernelArg(ocl.kernel, 1, sizeof(cl_mem), &buf_output);
clSetKernelArg(ocl.kernel, 2, sizeof(int), &width);
clSetKernelArg(ocl.kernel, 3, sizeof(int), &height);
clSetKernelArg(ocl.kernel, 4, sizeof(uchar), &low);
clSetKernelArg(ocl.kernel, 5, sizeof(uchar), &high);
clEnqueueNDRangeKernel(ocl.queue, ocl.kernel, 2, NULL, gsize, NULL, 0, NULL, NULL);
clFinish(ocl.queue);
std::vector<uchar> result(imgSize);
clEnqueueReadBuffer(ocl.queue, buf_output, CL_TRUE, 0, imgSize, result.data(), 0, NULL, NULL);
cv::Mat outputImg(height, width, CV_8UC1, result.data());
cv::imshow("Original", image);
cv::imshow("Canny Edge", outputImg);
cv::waitKey(0);
release_opencl(&ocl);
clReleaseMemObject(buf_input);
clReleaseMemObject(buf_blur);
clReleaseMemObject(buf_grad);
clReleaseMemObject(buf_angle);
clReleaseMemObject(buf_nms);
clReleaseMemObject(buf_output);
return 0;
}
#endif
#ifndef COMPUTERVISION_OPENCL_HELPER_H
#define COMPUTERVISION_OPENCL_HELPER_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <OpenCL/opencl.h>
#define CHECK_ERROR(err, msg) \
if (err != CL_SUCCESS) { \
fprintf(stderr, "%s failed with error %d\n", msg, err); \
exit(1); \
}
typedef struct {
cl_platform_id platform;
cl_device_id device;
cl_context context;
cl_command_queue queue;
cl_program program;
cl_kernel kernel;
} OpenCLObjects;
char *read_source(const char *filename) {
FILE *fp = fopen(filename, "r");
if (!fp) {
perror("Failed to open kernel file");
exit(1);
}
fseek(fp, 0, SEEK_END);
size_t size = ftell(fp);
rewind(fp);
char *source = (char *)malloc(size + 1);
fread(source, 1, size, fp);
source[size] = '\0';
fclose(fp);
return source;
}
OpenCLObjects init_opencl(const char *source_file, const char *kernel_name) {
OpenCLObjects ocl;
cl_int err;
err = clGetPlatformIDs(1, &ocl.platform, NULL);
CHECK_ERROR(err, "clGetPlatformIDs");
err = clGetDeviceIDs(ocl.platform, CL_DEVICE_TYPE_DEFAULT, 1, &ocl.device, NULL);
CHECK_ERROR(err, "clGetDeviceIDs");
ocl.context = clCreateContext(NULL, 1, &ocl.device, NULL, NULL, &err);
CHECK_ERROR(err, "clCreateContext");
ocl.queue = clCreateCommandQueue(ocl.context, ocl.device, 0, &err);
CHECK_ERROR(err, "clCreateCommandQueue");
char *source = read_source(source_file);
ocl.program = clCreateProgramWithSource(ocl.context, 1, (const char **)&source, NULL, &err);
CHECK_ERROR(err, "clCreateProgramWithSource");
err = clBuildProgram(ocl.program, 1, &ocl.device, NULL, NULL, NULL);
if (err != CL_SUCCESS) {
char log[4096];
clGetProgramBuildInfo(ocl.program, ocl.device, CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL);
fprintf(stderr, "Build Error:\n%s\n", log);
exit(1);
}
ocl.kernel = clCreateKernel(ocl.program, kernel_name, &err);
CHECK_ERROR(err, "clCreateKernel");
free(source);
return ocl;
}
void release_opencl(OpenCLObjects *ocl) {
clReleaseKernel(ocl->kernel);
clReleaseProgram(ocl->program);
clReleaseCommandQueue(ocl->queue);
clReleaseContext(ocl->context);
}
#endif