[gegl] box-blur using auxiliary cl-iterator to keep intermediate results
- From: Ãyvind KolÃs <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] box-blur using auxiliary cl-iterator to keep intermediate results
- Date: Tue, 20 Mar 2012 13:54:26 +0000 (UTC)
commit 671b66ed43bc62a03bbcdab1032f6e6043a076b7
Author: Victor Oliveira <victormatheus gmail com>
Date: Mon Mar 5 16:41:37 2012 -0300
box-blur using auxiliary cl-iterator to keep intermediate results
gegl/opencl/gegl-cl-init.c | 8 ++++
gegl/opencl/gegl-cl-init.h | 5 ++-
operations/common/box-blur.c | 92 +++++++++++++++++++++++-------------------
3 files changed, 62 insertions(+), 43 deletions(-)
---
diff --git a/gegl/opencl/gegl-cl-init.c b/gegl/opencl/gegl-cl-init.c
index 7a4bc90..2f846d6 100644
--- a/gegl/opencl/gegl-cl-init.c
+++ b/gegl/opencl/gegl-cl-init.c
@@ -111,6 +111,12 @@ gegl_cl_get_command_queue (void)
return cl_state.cq;
}
+cl_ulong
+gegl_cl_get_local_mem_size (void)
+{
+ return cl_state.local_mem_size;
+}
+
#ifdef G_OS_WIN32
#include <windows.h>
@@ -232,6 +238,7 @@ gegl_cl_init (GError **error)
gegl_clGetDeviceInfo (cl_state.device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &cl_state.image_support, NULL);
gegl_clGetDeviceInfo (cl_state.device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &cl_state.max_mem_alloc, NULL);
+ gegl_clGetDeviceInfo (cl_state.device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &cl_state.local_mem_size, NULL);
cl_state.max_image_width = 4096;
cl_state.max_image_height = 4096;
@@ -241,6 +248,7 @@ gegl_cl_init (GError **error)
g_printf("[OpenCL] Extensions:%s\n", cl_state.platform_ext);
g_printf("[OpenCL] Default Device Name:%s\n", cl_state.device_name);
g_printf("[OpenCL] Max Alloc: %lu bytes\n", cl_state.max_mem_alloc);
+ g_printf("[OpenCL] Local Mem: %lu bytes\n", cl_state.local_mem_size);
while (cl_state.max_image_width * cl_state.max_image_height * 16 > cl_state.max_mem_alloc)
{
diff --git a/gegl/opencl/gegl-cl-init.h b/gegl/opencl/gegl-cl-init.h
index 583e76a..f77984b 100644
--- a/gegl/opencl/gegl-cl-init.h
+++ b/gegl/opencl/gegl-cl-init.h
@@ -22,6 +22,7 @@ typedef struct
size_t max_image_height;
size_t max_image_width;
cl_ulong max_mem_alloc;
+ cl_ulong local_mem_size;
char platform_name [1024];
char platform_version[1024];
@@ -44,6 +45,8 @@ cl_context gegl_cl_get_context (void);
cl_command_queue gegl_cl_get_command_queue (void);
+cl_ulong gegl_cl_get_local_mem_size (void);
+
typedef struct
{
cl_program program;
@@ -55,7 +58,7 @@ gegl_cl_run_data *gegl_cl_compile_and_build (const char *program_source,
#ifdef __GEGL_CL_INIT_MAIN__
-gegl_cl_state cl_state = {FALSE, NULL, NULL, NULL, NULL, FALSE, 0, 0, 0, "", "", "", ""};
+gegl_cl_state cl_state = {FALSE, NULL, NULL, NULL, NULL, FALSE, 0, 0, 0, 0, "", "", "", ""};
GHashTable *cl_program_hash = NULL;
t_clGetPlatformIDs gegl_clGetPlatformIDs = NULL;
diff --git a/operations/common/box-blur.c b/operations/common/box-blur.c
index 7f26cdf..8a0e4d2 100644
--- a/operations/common/box-blur.c
+++ b/operations/common/box-blur.c
@@ -226,89 +226,97 @@ static void prepare (GeglOperation *operation)
#include "buffer/gegl-buffer-cl-iterator.h"
static const char* kernel_source =
-"__kernel void kernel_blur(__global const float4 *in, \n"
-" __global float4 *out, \n"
-" __local float4 *shared_roi, \n"
-" int width, int radius) \n"
+"__kernel void kernel_blur_hor (__global const float4 *in, \n"
+" __global float4 *aux, \n"
+" int width, int radius) \n"
"{ \n"
+" const int in_index = get_global_id(0) * (width + 2 * radius) \n"
+" + (radius + get_global_id (1)); \n"
" \n"
-" const int out_index = get_global_id(0) * width + get_global_id(1); \n"
-" const int in_top_index = (get_group_id (0) * get_local_size (0)) * (width + 2 * radius) \n"
-" + (get_group_id (1) * get_local_size (1)); \n"
-" \n"
-" const int local_width = (2 * radius + get_local_size (1)); \n"
-" const int local_index = (radius + get_local_id (0)) * local_width + (radius + get_local_id (1)); \n"
-" int i, x, y; \n"
-" \n"
+" const int aux_index = get_global_id(0) * width + get_global_id (1); \n"
+" int i; \n"
" float4 mean; \n"
" \n"
-" for (y = get_local_id (0); y < get_local_size (0) + 2 * radius; y += get_local_size (0)) \n"
-" for (x = get_local_id (1); x < get_local_size (1) + 2 * radius; x += get_local_size (1)) \n"
-" shared_roi[y*local_width+x] = in[in_top_index + y * (width + 2 * radius) + x]; \n"
-" \n"
-" barrier(CLK_LOCAL_MEM_FENCE); \n"
-" \n"
" mean = (float4)(0.0f); \n"
" \n"
" for (i=-radius; i <= radius; i++) \n"
" { \n"
-" mean += shared_roi[local_index + i]; \n"
+" mean += in[in_index + i]; \n"
" } \n"
" \n"
-" shared_roi[local_index] = mean / (2 * radius + 1); \n"
+" aux[aux_index] = mean / (2 * radius + 1); \n"
+"} \n"
+
+"__kernel void kernel_blur_ver (__global const float4 *aux, \n"
+" __global float4 *out, \n"
+" int width, int radius) \n"
+"{ \n"
+" const int aux_index = (radius + get_global_id(0)) * width + get_global_id (1); \n"
" \n"
-" barrier(CLK_LOCAL_MEM_FENCE); \n"
+" const int out_index = get_global_id(0) * width + get_global_id (1); \n"
+" int i; \n"
+" float4 mean; \n"
" \n"
" mean = (float4)(0.0f); \n"
" \n"
" for (i=-radius; i <= radius; i++) \n"
" { \n"
-" mean += shared_roi[local_index + i * local_width]; \n"
+" mean += aux[aux_index + i * width]; \n"
" } \n"
" \n"
-" shared_roi[local_index] = mean / (2 * radius + 1); \n"
-" \n"
-" barrier(CLK_LOCAL_MEM_FENCE); \n"
-" \n"
-" out[out_index] = shared_roi[local_index]; \n"
+" out[out_index] = mean / (2 * radius + 1); \n"
"} \n";
static gegl_cl_run_data *cl_data = NULL;
static cl_int
cl_box_blur (cl_mem in_tex,
+ cl_mem aux_tex,
cl_mem out_tex,
size_t global_worksize,
const GeglRectangle *roi,
gint radius)
{
cl_int cl_err = 0;
- size_t local_ws[2], global_ws[2], local_mem_size;
+ size_t global_ws_hor[2], global_ws_ver[2];
if (!cl_data)
{
- const char *kernel_name[] = {"kernel_blur", NULL};
+ const char *kernel_name[] = {"kernel_blur_hor", "kernel_blur_ver", NULL};
cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
}
if (!cl_data) return 1;
- local_ws[0] = 16;
- local_ws[1] = 16;
- global_ws[0] = roi->height;
- global_ws[1] = roi->width;
- local_mem_size = sizeof(cl_float4) * (local_ws[0] + 2 * radius) * (local_ws[1] + 2 * radius);
+ global_ws_hor[0] = roi->height + 2 * radius;
+ global_ws_hor[1] = roi->width;
+
+ global_ws_ver[0] = roi->height;
+ global_ws_ver[1] = roi->width;
cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, local_mem_size, NULL);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&roi->width);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&radius);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&aux_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&roi->width);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&radius);
if (cl_err != CL_SUCCESS) return cl_err;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 2,
- NULL, global_ws, local_ws,
+ NULL, global_ws_hor, NULL,
+ 0, NULL, NULL);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ gegl_clEnqueueBarrier (gegl_cl_get_command_queue ());
+
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&out_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_int), (void*)&roi->width);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_int), (void*)&radius);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[1], 2,
+ NULL, global_ws_ver, NULL,
0, NULL, NULL);
if (cl_err != CL_SUCCESS) return cl_err;
@@ -332,16 +340,16 @@ cl_process (GeglOperation *operation,
GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output, result, out_format, GEGL_CL_BUFFER_WRITE);
gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ, op_area->left, op_area->right, op_area->top, op_area->bottom);
+ gint aux = gegl_buffer_cl_iterator_add_2 (i, NULL, result, in_format, GEGL_CL_BUFFER_AUX, 0, 0, op_area->top, op_area->bottom);
while (gegl_buffer_cl_iterator_next (i, &err))
{
if (err) return FALSE;
for (j=0; j < i->n; j++)
{
- cl_err = cl_box_blur(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], o->radius);
+ cl_err = cl_box_blur(i->tex[read][j], i->tex[aux][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], ceil (o->radius));
if (cl_err != CL_SUCCESS)
{
- g_warning("[OpenCL] Error in %s [GeglOperationPointFilter] Kernel\n",
- GEGL_OPERATION_CLASS (operation)->name);
+ g_warning("[OpenCL] Error in box-blur: %s\n", gegl_cl_errstring(cl_err));
return FALSE;
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]