[gegl] opencl: many changes
- From: Victor Matheus de Araujo Oliveira <vmaolive src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] opencl: many changes
- Date: Mon, 14 Jan 2013 19:08:21 +0000 (UTC)
commit a3b19bc8c3f5582818021453b24d269c0195e010
Author: Victor Oliveira <victormatheus gmail com>
Date: Mon Jan 14 17:05:47 2013 -0200
opencl: many changes
- splitting kernels from remaining filters
- removing unnecessary opencl barriers
- stop using cl_int to report GEGL errors (just opencl ones)
gegl/opencl/gegl-cl-color.c | 6 -
gegl/operation/gegl-operation-point-composer.c | 6 +-
gegl/operation/gegl-operation-point-composer.h | 2 +-
gegl/operation/gegl-operation-point-filter.c | 6 +-
gegl/operation/gegl-operation-point-filter.h | 2 +-
opencl/levels.cl | 13 ++
opencl/levels.cl.h | 15 ++
opencl/motion-blur.cl | 75 +++++++++
opencl/motion-blur.cl.h | 77 +++++++++
opencl/noise-reduction.cl | 77 +++++++++
opencl/noise-reduction.cl.h | 79 ++++++++++
opencl/oilify.cl | 125 +++++++++++++++
opencl/oilify.cl.h | 127 +++++++++++++++
opencl/pixelize.cl | 48 ++++++
opencl/pixelize.cl.h | 50 ++++++
opencl/snn-mean.cl | 115 ++++++++++++++
opencl/snn-mean.cl.h | 117 ++++++++++++++
opencl/vignette.cl | 62 ++++++++
opencl/vignette.cl.h | 64 ++++++++
operations/common/bilateral-filter.c | 46 +++---
operations/common/box-blur.c | 49 ++++---
operations/common/c2g.c | 92 +++++++-----
operations/common/color-temperature.c | 25 ++-
operations/common/edge-laplace.c | 3 -
operations/common/gaussian-blur.c | 98 +++++++-----
operations/common/grey.c | 8 +-
operations/common/levels.c | 46 +++---
operations/common/motion-blur.c | 151 +++++-------------
operations/common/noise-reduction.c | 148 +++++-------------
operations/common/oilify.c | 199 +++++-------------------
operations/common/opacity.c | 9 +-
operations/common/pixelize.c | 137 +++++++----------
operations/common/snn-mean.c | 160 +++----------------
operations/common/vignette.c | 123 +++++----------
34 files changed, 1505 insertions(+), 855 deletions(-)
---
diff --git a/gegl/opencl/gegl-cl-color.c b/gegl/opencl/gegl-cl-color.c
index aed1d77..3aa6c65 100644
--- a/gegl/opencl/gegl-cl-color.c
+++ b/gegl/opencl/gegl-cl-color.c
@@ -297,9 +297,6 @@ gegl_cl_color_conv (cl_mem in_tex,
in_tex, out_tex, 0, 0, size * s,
0, NULL, NULL);
CL_CHECK;
-
- cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
- CL_CHECK;
}
else
{
@@ -316,9 +313,6 @@ gegl_cl_color_conv (cl_mem in_tex,
NULL, &size, NULL,
0, NULL, NULL);
CL_CHECK;
-
- cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
- CL_CHECK;
}
return FALSE;
diff --git a/gegl/operation/gegl-operation-point-composer.c b/gegl/operation/gegl-operation-point-composer.c
index 2ace71a..a95f381 100644
--- a/gegl/operation/gegl-operation-point-composer.c
+++ b/gegl/operation/gegl-operation-point-composer.c
@@ -189,7 +189,11 @@ gegl_operation_point_composer_cl_process (GeglOperation *operation,
err = point_composer_class->cl_process(operation, i->tex[read][j],
(aux)? i->tex[foo][j] : NULL,
i->tex[0][j], i->size[0][j], &i->roi[0][j], level);
- if (err) return FALSE;
+ if (err)
+ {
+ GEGL_NOTE (GEGL_DEBUG_OPENCL, "Error: %s", operation_class->name);
+ return FALSE;
+ }
}
else if (operation_class->cl_data)
{
diff --git a/gegl/operation/gegl-operation-point-composer.h b/gegl/operation/gegl-operation-point-composer.h
index 9882877..5e5b0c2 100644
--- a/gegl/operation/gegl-operation-point-composer.h
+++ b/gegl/operation/gegl-operation-point-composer.h
@@ -58,7 +58,7 @@ struct _GeglOperationPointComposerClass
const GeglRectangle *roi, /* rectangular region in output buffer */
gint level);
- cl_int (* cl_process) (GeglOperation *self,
+ gboolean (* cl_process) (GeglOperation *self,
cl_mem in_tex,
cl_mem aux_tex,
cl_mem out_tex,
diff --git a/gegl/operation/gegl-operation-point-filter.c b/gegl/operation/gegl-operation-point-filter.c
index 6b5c0ac..b718708 100644
--- a/gegl/operation/gegl-operation-point-filter.c
+++ b/gegl/operation/gegl-operation-point-filter.c
@@ -115,7 +115,11 @@ gegl_operation_point_filter_cl_process (GeglOperation *operation,
{
err = point_filter_class->cl_process(operation, i->tex[read][j], i->tex[0][j],
i->size[0][j], &i->roi[0][j], level);
- if (err) return FALSE;
+ if (err)
+ {
+ GEGL_NOTE (GEGL_DEBUG_OPENCL, "Error: %s", operation_class->name);
+ return FALSE;
+ }
}
else if (operation_class->cl_data)
{
diff --git a/gegl/operation/gegl-operation-point-filter.h b/gegl/operation/gegl-operation-point-filter.h
index 938da7b..4fde7ef 100644
--- a/gegl/operation/gegl-operation-point-filter.h
+++ b/gegl/operation/gegl-operation-point-filter.h
@@ -59,7 +59,7 @@ struct _GeglOperationPointFilterClass
checkerboard op for
semantics */
gint level);
- cl_int (* cl_process) (GeglOperation *self,
+ gboolean (* cl_process) (GeglOperation *self,
cl_mem in_tex,
cl_mem out_tex,
size_t global_worksize,
diff --git a/opencl/levels.cl b/opencl/levels.cl
new file mode 100644
index 0000000..80b91d2
--- /dev/null
+++ b/opencl/levels.cl
@@ -0,0 +1,13 @@
+__kernel void kernel_levels(__global const float4 *in,
+ __global float4 *out,
+ float in_offset,
+ float out_offset,
+ float scale)
+{
+ int gid = get_global_id(0);
+ float4 in_v = in[gid];
+ float4 out_v;
+ out_v.xyz = (in_v.xyz - in_offset) * scale + out_offset;
+ out_v.w = in_v.w;
+ out[gid] = out_v;
+}
diff --git a/opencl/levels.cl.h b/opencl/levels.cl.h
new file mode 100644
index 0000000..88e3876
--- /dev/null
+++ b/opencl/levels.cl.h
@@ -0,0 +1,15 @@
+static const char* levels_cl_source =
+"__kernel void kernel_levels(__global const float4 *in, \n"
+" __global float4 *out, \n"
+" float in_offset, \n"
+" float out_offset, \n"
+" float scale) \n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
+" float4 out_v; \n"
+" out_v.xyz = (in_v.xyz - in_offset) * scale + out_offset; \n"
+" out_v.w = in_v.w; \n"
+" out[gid] = out_v; \n"
+"} \n"
+;
diff --git a/opencl/motion-blur.cl b/opencl/motion-blur.cl
new file mode 100644
index 0000000..a4cd49c
--- /dev/null
+++ b/opencl/motion-blur.cl
@@ -0,0 +1,75 @@
+int CLAMP(int val,int lo,int hi)
+{
+ return (val < lo) ? lo : ((hi < val) ? hi : val);
+}
+
+float4 get_pixel_color(const __global float4 *in_buf,
+ int rect_width,
+ int rect_height,
+ int rect_x,
+ int rect_y,
+ int x,
+ int y)
+{
+ int ix = x - rect_x;
+ int iy = y - rect_y;
+
+ ix = CLAMP(ix, 0, rect_width-1);
+ iy = CLAMP(iy, 0, rect_height-1);
+
+ return in_buf[iy * rect_width + ix];
+}
+
+__kernel void motion_blur(const __global float4 *src_buf,
+ int src_width,
+ int src_height,
+ int src_x,
+ int src_y,
+ __global float4 *dst_buf,
+ int dst_x,
+ int dst_y,
+ int num_steps,
+ float offset_x,
+ float offset_y)
+{
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ float4 sum = 0.0f;
+ int px = gidx + dst_x;
+ int py = gidy + dst_y;
+
+ for(int step = 0; step < num_steps; ++step)
+ {
+ float t = num_steps == 1 ? 0.0f :
+ step / (float)(num_steps - 1) - 0.5f;
+
+ float xx = px + t * offset_x;
+ float yy = py + t * offset_y;
+
+ int ix = (int)floor(xx);
+ int iy = (int)floor(yy);
+
+ float dx = xx - floor(xx);
+ float dy = yy - floor(yy);
+
+ float4 mixy0,mixy1,pix0,pix1,pix2,pix3;
+
+ pix0 = get_pixel_color(src_buf, src_width,
+ src_height, src_x, src_y, ix, iy);
+ pix1 = get_pixel_color(src_buf, src_width,
+ src_height, src_x, src_y, ix+1, iy);
+ pix2 = get_pixel_color(src_buf, src_width,
+ src_height, src_x, src_y, ix, iy+1);
+ pix3 = get_pixel_color(src_buf, src_width,
+ src_height, src_x, src_y, ix+1, iy+1);
+
+ mixy0 = dy * (pix2 - pix0) + pix0;
+ mixy1 = dy * (pix3 - pix1) + pix1;
+
+ sum += dx * (mixy1 - mixy0) + mixy0;
+ }
+
+ dst_buf[gidy * get_global_size(0) + gidx] =
+ sum / num_steps;
+}
diff --git a/opencl/motion-blur.cl.h b/opencl/motion-blur.cl.h
new file mode 100644
index 0000000..807fdeb
--- /dev/null
+++ b/opencl/motion-blur.cl.h
@@ -0,0 +1,77 @@
+static const char* motion_blur_cl_source =
+"int CLAMP(int val,int lo,int hi) \n"
+"{ \n"
+" return (val < lo) ? lo : ((hi < val) ? hi : val); \n"
+"} \n"
+" \n"
+"float4 get_pixel_color(const __global float4 *in_buf, \n"
+" int rect_width, \n"
+" int rect_height, \n"
+" int rect_x, \n"
+" int rect_y, \n"
+" int x, \n"
+" int y) \n"
+"{ \n"
+" int ix = x - rect_x; \n"
+" int iy = y - rect_y; \n"
+" \n"
+" ix = CLAMP(ix, 0, rect_width-1); \n"
+" iy = CLAMP(iy, 0, rect_height-1); \n"
+" \n"
+" return in_buf[iy * rect_width + ix]; \n"
+"} \n"
+" \n"
+"__kernel void motion_blur(const __global float4 *src_buf, \n"
+" int src_width, \n"
+" int src_height, \n"
+" int src_x, \n"
+" int src_y, \n"
+" __global float4 *dst_buf, \n"
+" int dst_x, \n"
+" int dst_y, \n"
+" int num_steps, \n"
+" float offset_x, \n"
+" float offset_y) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" \n"
+" float4 sum = 0.0f; \n"
+" int px = gidx + dst_x; \n"
+" int py = gidy + dst_y; \n"
+" \n"
+" for(int step = 0; step < num_steps; ++step) \n"
+" { \n"
+" float t = num_steps == 1 ? 0.0f : \n"
+" step / (float)(num_steps - 1) - 0.5f; \n"
+" \n"
+" float xx = px + t * offset_x; \n"
+" float yy = py + t * offset_y; \n"
+" \n"
+" int ix = (int)floor(xx); \n"
+" int iy = (int)floor(yy); \n"
+" \n"
+" float dx = xx - floor(xx); \n"
+" float dy = yy - floor(yy); \n"
+" \n"
+" float4 mixy0,mixy1,pix0,pix1,pix2,pix3; \n"
+" \n"
+" pix0 = get_pixel_color(src_buf, src_width, \n"
+" src_height, src_x, src_y, ix, iy); \n"
+" pix1 = get_pixel_color(src_buf, src_width, \n"
+" src_height, src_x, src_y, ix+1, iy); \n"
+" pix2 = get_pixel_color(src_buf, src_width, \n"
+" src_height, src_x, src_y, ix, iy+1); \n"
+" pix3 = get_pixel_color(src_buf, src_width, \n"
+" src_height, src_x, src_y, ix+1, iy+1); \n"
+" \n"
+" mixy0 = dy * (pix2 - pix0) + pix0; \n"
+" mixy1 = dy * (pix3 - pix1) + pix1; \n"
+" \n"
+" sum += dx * (mixy1 - mixy0) + mixy0; \n"
+" } \n"
+" \n"
+" dst_buf[gidy * get_global_size(0) + gidx] = \n"
+" sum / num_steps; \n"
+"} \n"
+;
diff --git a/opencl/noise-reduction.cl b/opencl/noise-reduction.cl
new file mode 100644
index 0000000..312087b
--- /dev/null
+++ b/opencl/noise-reduction.cl
@@ -0,0 +1,77 @@
+#define NEIGHBOURS 8
+#define AXES (NEIGHBOURS/2)
+
+#define POW2(a) ((a)*(a))
+
+#define GEN_METRIC(before, center, after) POW2((center) * 2 - (before) - (after))
+
+#define BAIL_CONDITION(new,original) ((new) < (original))
+
+#define SYMMETRY(a) (NEIGHBOURS - (a) - 1)
+
+#define O(u,v) (((u)+((v) * (src_stride))))
+
+__kernel void noise_reduction_cl (__global float4 *src_buf,
+ int src_stride,
+ __global float4 *dst_buf,
+ int dst_stride)
+{
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ __global float4 *center_pix = src_buf + (gidy + 1) * src_stride + gidx + 1;
+ int dst_offset = dst_stride * gidy + gidx;
+
+ int offsets[NEIGHBOURS] = {
+ O(-1, -1), O( 0, -1), O( 1, -1),
+ O(-1, 0), O( 1, 0),
+ O(-1, 1), O( 0, 1), O( 1, 1)
+ };
+
+ float4 sum;
+ int4 count;
+ float4 cur;
+ float4 metric_reference[AXES];
+
+ for (int axis = 0; axis < AXES; axis++)
+ {
+ float4 before_pix = *(center_pix + offsets[axis]);
+ float4 after_pix = *(center_pix + offsets[SYMMETRY(axis)]);
+ metric_reference[axis] = GEN_METRIC (before_pix, *center_pix, after_pix);
+ }
+
+ cur = sum = *center_pix;
+ count = 1;
+
+ for (int direction = 0; direction < NEIGHBOURS; direction++)
+ {
+ float4 pix = *(center_pix + offsets[direction]);
+ float4 value = (pix + cur) * (0.5f);
+ int axis;
+ int4 mask = {1, 1, 1, 0};
+
+ for (axis = 0; axis < AXES; axis++)
+ {
+ float4 before_pix = *(center_pix + offsets[axis]);
+ float4 after_pix = *(center_pix + offsets[SYMMETRY(axis)]);
+
+ float4 metric_new = GEN_METRIC (before_pix,
+ value,
+ after_pix);
+ mask = BAIL_CONDITION (metric_new, metric_reference[axis]) & mask;
+ }
+ sum += mask >0 ? value : 0;
+ count += mask >0 ? 1 : 0;
+ }
+ dst_buf[dst_offset] = (sum/convert_float4(count));
+ dst_buf[dst_offset].w = cur.w;
+}
+__kernel void transfer(__global float4 * in,
+ int in_width,
+ __global float4 * out)
+{
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+ int width = get_global_size(0);
+ out[gidy * width + gidx] = in[gidy * in_width + gidx];
+}
diff --git a/opencl/noise-reduction.cl.h b/opencl/noise-reduction.cl.h
new file mode 100644
index 0000000..969fe2a
--- /dev/null
+++ b/opencl/noise-reduction.cl.h
@@ -0,0 +1,79 @@
+static const char* noise_reduction_cl_source =
+"#define NEIGHBOURS 8 \n"
+"#define AXES (NEIGHBOURS/2) \n"
+" \n"
+"#define POW2(a) ((a)*(a)) \n"
+" \n"
+"#define GEN_METRIC(before, center, after) POW2((center) * 2 - (before) - (after))\n"
+" \n"
+"#define BAIL_CONDITION(new,original) ((new) < (original)) \n"
+" \n"
+"#define SYMMETRY(a) (NEIGHBOURS - (a) - 1) \n"
+" \n"
+"#define O(u,v) (((u)+((v) * (src_stride)))) \n"
+" \n"
+"__kernel void noise_reduction_cl (__global float4 *src_buf, \n"
+" int src_stride, \n"
+" __global float4 *dst_buf, \n"
+" int dst_stride) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" \n"
+" __global float4 *center_pix = src_buf + (gidy + 1) * src_stride + gidx + 1;\n"
+" int dst_offset = dst_stride * gidy + gidx; \n"
+" \n"
+" int offsets[NEIGHBOURS] = { \n"
+" O(-1, -1), O( 0, -1), O( 1, -1), \n"
+" O(-1, 0), O( 1, 0), \n"
+" O(-1, 1), O( 0, 1), O( 1, 1) \n"
+" }; \n"
+" \n"
+" float4 sum; \n"
+" int4 count; \n"
+" float4 cur; \n"
+" float4 metric_reference[AXES]; \n"
+" \n"
+" for (int axis = 0; axis < AXES; axis++) \n"
+" { \n"
+" float4 before_pix = *(center_pix + offsets[axis]); \n"
+" float4 after_pix = *(center_pix + offsets[SYMMETRY(axis)]); \n"
+" metric_reference[axis] = GEN_METRIC (before_pix, *center_pix, after_pix);\n"
+" } \n"
+" \n"
+" cur = sum = *center_pix; \n"
+" count = 1; \n"
+" \n"
+" for (int direction = 0; direction < NEIGHBOURS; direction++) \n"
+" { \n"
+" float4 pix = *(center_pix + offsets[direction]); \n"
+" float4 value = (pix + cur) * (0.5f); \n"
+" int axis; \n"
+" int4 mask = {1, 1, 1, 0}; \n"
+" \n"
+" for (axis = 0; axis < AXES; axis++) \n"
+" { \n"
+" float4 before_pix = *(center_pix + offsets[axis]); \n"
+" float4 after_pix = *(center_pix + offsets[SYMMETRY(axis)]); \n"
+" \n"
+" float4 metric_new = GEN_METRIC (before_pix, \n"
+" value, \n"
+" after_pix); \n"
+" mask = BAIL_CONDITION (metric_new, metric_reference[axis]) & mask;\n"
+" } \n"
+" sum += mask >0 ? value : 0; \n"
+" count += mask >0 ? 1 : 0; \n"
+" } \n"
+" dst_buf[dst_offset] = (sum/convert_float4(count)); \n"
+" dst_buf[dst_offset].w = cur.w; \n"
+"} \n"
+"__kernel void transfer(__global float4 * in, \n"
+" int in_width, \n"
+" __global float4 * out) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" int width = get_global_size(0); \n"
+" out[gidy * width + gidx] = in[gidy * in_width + gidx]; \n"
+"} \n"
+;
diff --git a/opencl/oilify.cl b/opencl/oilify.cl
new file mode 100644
index 0000000..29237bf
--- /dev/null
+++ b/opencl/oilify.cl
@@ -0,0 +1,125 @@
+/* two small different kernels are better than one big */
+
+#define NUM_INTENSITIES 256
+
+kernel void kernel_oilify(global float4 *in,
+ global float4 *out,
+ const int mask_radius,
+ const int intensities,
+ const float exponent)
+{
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+ int x = gidx + mask_radius;
+ int y = gidy + mask_radius;
+ int dst_width = get_global_size(0);
+ int src_width = dst_width + mask_radius * 2;
+ float4 hist[NUM_INTENSITIES];
+ float4 hist_max = 1.0;
+ int i, j, intensity;
+ int radius_sq = mask_radius * mask_radius;
+ float4 temp_pixel;
+ for (i = 0; i < intensities; i++)
+ hist[i] = 0.0;
+
+ for (i = -mask_radius; i <= mask_radius; i++)
+ {
+ for (j = -mask_radius; j <= mask_radius; j++)
+ {
+ if (i*i + j*j <= radius_sq)
+ {
+ temp_pixel = in[x + i + (y + j) * src_width];
+ hist[(int)(temp_pixel.x * (intensities - 1))].x+=1;
+ hist[(int)(temp_pixel.y * (intensities - 1))].y+=1;
+ hist[(int)(temp_pixel.z * (intensities - 1))].z+=1;
+ hist[(int)(temp_pixel.w * (intensities - 1))].w+=1;
+ }
+ }
+ }
+
+ for (i = 0; i < intensities; i++) {
+ if(hist_max.x < hist[i].x)
+ hist_max.x = hist[i].x;
+ if(hist_max.y < hist[i].y)
+ hist_max.y = hist[i].y;
+ if(hist_max.z < hist[i].z)
+ hist_max.z = hist[i].z;
+ if(hist_max.w < hist[i].w)
+ hist_max.w = hist[i].w;
+ }
+ float4 div = 0.0;
+ float4 sum = 0.0;
+ float4 ratio, weight;
+ for (i = 0; i < intensities; i++)
+ {
+ ratio = hist[i] / hist_max;
+ weight = pow(ratio, (float4)exponent);
+ sum += weight * (float4)i;
+ div += weight;
+ }
+ out[gidx + gidy * dst_width] = sum / div / (float)(intensities - 1);
+}
+
+kernel void kernel_oilify_inten(global float4 *in,
+ global float4 *out,
+ const int mask_radius,
+ const int intensities,
+ const float exponent)
+{
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+ int x = gidx + mask_radius;
+ int y = gidy + mask_radius;
+ int dst_width = get_global_size(0);
+ int src_width = dst_width + mask_radius * 2;
+ float4 cumulative_rgb[NUM_INTENSITIES];
+ int hist_inten[NUM_INTENSITIES], inten_max;
+ int i, j, intensity;
+ int radius_sq = mask_radius * mask_radius;
+ float4 temp_pixel;
+ for (i = 0; i < intensities; i++)
+ {
+ hist_inten[i] = 0;
+ cumulative_rgb[i] = 0.0;
+ }
+ for (i = -mask_radius; i <= mask_radius; i++)
+ {
+ for (j = -mask_radius; j <= mask_radius; j++)
+ {
+ if (i*i + j*j <= radius_sq)
+ {
+ temp_pixel = in[x + i + (y + j) * src_width];
+ /*Calculate intensity on the fly, GPU does it fast*/
+ intensity = (int)((0.299 * temp_pixel.x
+ +0.587 * temp_pixel.y
+ +0.114 * temp_pixel.z) * (float)(intensities-1));
+ hist_inten[intensity] += 1;
+ cumulative_rgb[intensity] += temp_pixel;
+ }
+ }
+ }
+ inten_max = 1;
+
+ /* calculated maximums */
+ for (i = 0; i < intensities; i++) {
+ if(hist_inten[i] > inten_max)
+ inten_max = hist_inten[i];
+ }
+ float div = 0.0;
+ float ratio, weight, mult_inten;
+
+ float4 color = 0.0;
+ for (i = 0; i < intensities; i++)
+ {
+ if (hist_inten[i] > 0)
+ {
+ ratio = (float)(hist_inten[i]) / (float)(inten_max);
+ weight = pow(ratio, exponent);
+ mult_inten = weight / (float)(hist_inten[i]);
+
+ div += weight;
+ color += mult_inten * cumulative_rgb[i];
+ }
+ }
+ out[gidx + gidy * dst_width] = color/div;
+}
diff --git a/opencl/oilify.cl.h b/opencl/oilify.cl.h
new file mode 100644
index 0000000..b1d3f11
--- /dev/null
+++ b/opencl/oilify.cl.h
@@ -0,0 +1,127 @@
+static const char* oilify_cl_source =
+"/* two small different kernels are better than one big */ \n"
+" \n"
+"#define NUM_INTENSITIES 256 \n"
+" \n"
+"kernel void kernel_oilify(global float4 *in, \n"
+" global float4 *out, \n"
+" const int mask_radius, \n"
+" const int intensities, \n"
+" const float exponent) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" int x = gidx + mask_radius; \n"
+" int y = gidy + mask_radius; \n"
+" int dst_width = get_global_size(0); \n"
+" int src_width = dst_width + mask_radius * 2; \n"
+" float4 hist[NUM_INTENSITIES]; \n"
+" float4 hist_max = 1.0; \n"
+" int i, j, intensity; \n"
+" int radius_sq = mask_radius * mask_radius; \n"
+" float4 temp_pixel; \n"
+" for (i = 0; i < intensities; i++) \n"
+" hist[i] = 0.0; \n"
+" \n"
+" for (i = -mask_radius; i <= mask_radius; i++) \n"
+" { \n"
+" for (j = -mask_radius; j <= mask_radius; j++) \n"
+" { \n"
+" if (i*i + j*j <= radius_sq) \n"
+" { \n"
+" temp_pixel = in[x + i + (y + j) * src_width]; \n"
+" hist[(int)(temp_pixel.x * (intensities - 1))].x+=1; \n"
+" hist[(int)(temp_pixel.y * (intensities - 1))].y+=1; \n"
+" hist[(int)(temp_pixel.z * (intensities - 1))].z+=1; \n"
+" hist[(int)(temp_pixel.w * (intensities - 1))].w+=1; \n"
+" } \n"
+" } \n"
+" } \n"
+" \n"
+" for (i = 0; i < intensities; i++) { \n"
+" if(hist_max.x < hist[i].x) \n"
+" hist_max.x = hist[i].x; \n"
+" if(hist_max.y < hist[i].y) \n"
+" hist_max.y = hist[i].y; \n"
+" if(hist_max.z < hist[i].z) \n"
+" hist_max.z = hist[i].z; \n"
+" if(hist_max.w < hist[i].w) \n"
+" hist_max.w = hist[i].w; \n"
+" } \n"
+" float4 div = 0.0; \n"
+" float4 sum = 0.0; \n"
+" float4 ratio, weight; \n"
+" for (i = 0; i < intensities; i++) \n"
+" { \n"
+" ratio = hist[i] / hist_max; \n"
+" weight = pow(ratio, (float4)exponent); \n"
+" sum += weight * (float4)i; \n"
+" div += weight; \n"
+" } \n"
+" out[gidx + gidy * dst_width] = sum / div / (float)(intensities - 1); \n"
+"} \n"
+" \n"
+"kernel void kernel_oilify_inten(global float4 *in, \n"
+" global float4 *out, \n"
+" const int mask_radius, \n"
+" const int intensities, \n"
+" const float exponent) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" int x = gidx + mask_radius; \n"
+" int y = gidy + mask_radius; \n"
+" int dst_width = get_global_size(0); \n"
+" int src_width = dst_width + mask_radius * 2; \n"
+" float4 cumulative_rgb[NUM_INTENSITIES]; \n"
+" int hist_inten[NUM_INTENSITIES], inten_max; \n"
+" int i, j, intensity; \n"
+" int radius_sq = mask_radius * mask_radius; \n"
+" float4 temp_pixel; \n"
+" for (i = 0; i < intensities; i++) \n"
+" { \n"
+" hist_inten[i] = 0; \n"
+" cumulative_rgb[i] = 0.0; \n"
+" } \n"
+" for (i = -mask_radius; i <= mask_radius; i++) \n"
+" { \n"
+" for (j = -mask_radius; j <= mask_radius; j++) \n"
+" { \n"
+" if (i*i + j*j <= radius_sq) \n"
+" { \n"
+" temp_pixel = in[x + i + (y + j) * src_width]; \n"
+" /*Calculate intensity on the fly, GPU does it fast*/ \n"
+" intensity = (int)((0.299 * temp_pixel.x \n"
+" +0.587 * temp_pixel.y \n"
+" +0.114 * temp_pixel.z) * (float)(intensities-1)); \n"
+" hist_inten[intensity] += 1; \n"
+" cumulative_rgb[intensity] += temp_pixel; \n"
+" } \n"
+" } \n"
+" } \n"
+" inten_max = 1; \n"
+" \n"
+" /* calculated maximums */ \n"
+" for (i = 0; i < intensities; i++) { \n"
+" if(hist_inten[i] > inten_max) \n"
+" inten_max = hist_inten[i]; \n"
+" } \n"
+" float div = 0.0; \n"
+" float ratio, weight, mult_inten; \n"
+" \n"
+" float4 color = 0.0; \n"
+" for (i = 0; i < intensities; i++) \n"
+" { \n"
+" if (hist_inten[i] > 0) \n"
+" { \n"
+" ratio = (float)(hist_inten[i]) / (float)(inten_max); \n"
+" weight = pow(ratio, exponent); \n"
+" mult_inten = weight / (float)(hist_inten[i]); \n"
+" \n"
+" div += weight; \n"
+" color += mult_inten * cumulative_rgb[i]; \n"
+" } \n"
+" } \n"
+" out[gidx + gidy * dst_width] = color/div; \n"
+"} \n"
+;
diff --git a/opencl/pixelize.cl b/opencl/pixelize.cl
new file mode 100644
index 0000000..2e22476
--- /dev/null
+++ b/opencl/pixelize.cl
@@ -0,0 +1,48 @@
+__kernel void calc_block_color(__global float4 *in,
+ __global float4 *out,
+ int xsize,
+ int ysize,
+ int roi_x,
+ int roi_y,
+ int line_width,
+ int block_count_x )
+{
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+ int cx = roi_x / xsize + gidx;
+ int cy = roi_y / ysize + gidy;
+
+ float weight = 1.0f / (xsize * ysize);
+
+ int px = cx * xsize + xsize - roi_x;
+ int py = cy * ysize + ysize - roi_y;
+
+ int i,j;
+ float4 col = 0.0f;
+ for (j = py;j < py + ysize; ++j)
+ {
+ for (i = px;i < px + xsize; ++i)
+ {
+ col += in[j * line_width + i];
+ }
+ }
+ out[gidy * block_count_x + gidx] = col * weight;
+
+}
+
+__kernel void kernel_pixelise (__global float4 *in,
+ __global float4 *out,
+ int xsize,
+ int ysize,
+ int roi_x,
+ int roi_y,
+ int block_count_x)
+{
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+
+ int src_width = get_global_size(0);
+ int cx = (gidx + roi_x) / xsize - roi_x / xsize;
+ int cy = (gidy + roi_y) / ysize - roi_y / ysize;
+ out[gidx + gidy * src_width] = in[cx + cy * block_count_x];
+}
diff --git a/opencl/pixelize.cl.h b/opencl/pixelize.cl.h
new file mode 100644
index 0000000..3715450
--- /dev/null
+++ b/opencl/pixelize.cl.h
@@ -0,0 +1,50 @@
+static const char* pixelize_cl_source =
+"__kernel void calc_block_color(__global float4 *in, \n"
+" __global float4 *out, \n"
+" int xsize, \n"
+" int ysize, \n"
+" int roi_x, \n"
+" int roi_y, \n"
+" int line_width, \n"
+" int block_count_x ) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" int cx = roi_x / xsize + gidx; \n"
+" int cy = roi_y / ysize + gidy; \n"
+" \n"
+" float weight = 1.0f / (xsize * ysize); \n"
+" \n"
+" int px = cx * xsize + xsize - roi_x; \n"
+" int py = cy * ysize + ysize - roi_y; \n"
+" \n"
+" int i,j; \n"
+" float4 col = 0.0f; \n"
+" for (j = py;j < py + ysize; ++j) \n"
+" { \n"
+" for (i = px;i < px + xsize; ++i) \n"
+" { \n"
+" col += in[j * line_width + i]; \n"
+" } \n"
+" } \n"
+" out[gidy * block_count_x + gidx] = col * weight; \n"
+" \n"
+"} \n"
+" \n"
+"__kernel void kernel_pixelise (__global float4 *in, \n"
+" __global float4 *out, \n"
+" int xsize, \n"
+" int ysize, \n"
+" int roi_x, \n"
+" int roi_y, \n"
+" int block_count_x) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" \n"
+" int src_width = get_global_size(0); \n"
+" int cx = (gidx + roi_x) / xsize - roi_x / xsize; \n"
+" int cy = (gidy + roi_y) / ysize - roi_y / ysize; \n"
+" out[gidx + gidy * src_width] = in[cx + cy * block_count_x]; \n"
+"} \n"
+;
diff --git a/opencl/snn-mean.cl b/opencl/snn-mean.cl
new file mode 100644
index 0000000..9802b46
--- /dev/null
+++ b/opencl/snn-mean.cl
@@ -0,0 +1,115 @@
+float colordiff (float4 pixA,
+ float4 pixB)
+{
+ float4 pix = pixA-pixB;
+ pix *= pix;
+ return pix.x+pix.y+pix.z;
+}
+
+__kernel void snn_mean (__global const float4 *src_buf,
+ int src_width,
+ int src_height,
+ __global float4 *dst_buf,
+ int radius,
+ int pairs)
+{
+ int gidx =get_global_id(0);
+ int gidy =get_global_id(1);
+ int offset =gidy * get_global_size(0) + gidx;
+
+ __global const float4 *center_pix=
+ src_buf + ((radius+gidx) + (gidy+radius)* src_width);
+ float4 accumulated=0;
+
+ int count=0;
+ if(pairs==2)
+ {
+ for(int i=-radius;i<0;i++)
+ {
+ for(int j=-radius;j<0;j++)
+ {
+ __global const float4 *selected_pix = center_pix;
+ float best_diff = 1000.0f;
+
+ int xs[4]={
+ gidx+j+radius, gidx-j+radius,
+ gidx-j+radius, gidx+j+radius
+ };
+ int ys[4]={
+ gidy+i+radius, gidy-i+radius,
+ gidy+i+radius, gidy-i+radius};
+
+ for (int k=0;k<4;k++)
+ {
+ if (xs[k] >= 0 && xs[k] < src_width &&
+ ys[k] >= 0 && ys[k] < src_height)
+ {
+ __global const float4 *tpix =
+ src_buf + (xs[k] + ys[k] * src_width);
+ float diff=colordiff(*tpix, *center_pix);
+ if (diff < best_diff)
+ {
+ best_diff = diff;
+ selected_pix = tpix;
+ }
+ }
+ }
+
+ accumulated += *selected_pix;
+
+ ++count;
+ if (i==0 && j==0)
+ break;
+ }
+ }
+ dst_buf[offset] = accumulated/count;
+ return;
+ }
+ else if(pairs==1)
+ {
+ for(int i=-radius;i<=0;i++)
+ {
+ for(int j=-radius;j<=radius;j++)
+ {
+ __global const float4 *selected_pix = center_pix;
+ float best_diff = 1000.0f;
+
+ /* skip computations for the center pixel */
+ if (i != 0 && j != 0)
+ {
+ int xs[4]={
+ gidx+i+radius, gidx-i+radius,
+ gidx-i+radius, gidx+i+radius
+ };
+ int ys[4]={
+ gidy+j+radius, gidy-j+radius,
+ gidy+j+radius, gidy-j+radius
+ };
+
+ for (i=0;i<2;i++)
+ {
+ if (xs[i] >= 0 && xs[i] < src_width &&
+ ys[i] >= 0 && ys[i] < src_height)
+ {
+ __global const float4 *tpix =
+ src_buf + (xs[i] + ys[i] * src_width);
+ float diff=colordiff (*tpix, *center_pix);
+ if (diff < best_diff)
+ {
+ best_diff = diff;
+ selected_pix = tpix;
+ }
+ }
+ }
+ }
+ accumulated += *selected_pix;
+ ++count;
+ if (i==0 && j==0)
+ break;
+ }
+ }
+ dst_buf[offset] = accumulated/count;
+ return;
+ }
+ return;
+}
diff --git a/opencl/snn-mean.cl.h b/opencl/snn-mean.cl.h
new file mode 100644
index 0000000..21b4935
--- /dev/null
+++ b/opencl/snn-mean.cl.h
@@ -0,0 +1,117 @@
+static const char* snn_mean_cl_source =
+"float colordiff (float4 pixA, \n"
+" float4 pixB) \n"
+"{ \n"
+" float4 pix = pixA-pixB; \n"
+" pix *= pix; \n"
+" return pix.x+pix.y+pix.z; \n"
+"} \n"
+" \n"
+"__kernel void snn_mean (__global const float4 *src_buf, \n"
+" int src_width, \n"
+" int src_height, \n"
+" __global float4 *dst_buf, \n"
+" int radius, \n"
+" int pairs) \n"
+"{ \n"
+" int gidx =get_global_id(0); \n"
+" int gidy =get_global_id(1); \n"
+" int offset =gidy * get_global_size(0) + gidx; \n"
+" \n"
+" __global const float4 *center_pix= \n"
+" src_buf + ((radius+gidx) + (gidy+radius)* src_width); \n"
+" float4 accumulated=0; \n"
+" \n"
+" int count=0; \n"
+" if(pairs==2) \n"
+" { \n"
+" for(int i=-radius;i<0;i++) \n"
+" { \n"
+" for(int j=-radius;j<0;j++) \n"
+" { \n"
+" __global const float4 *selected_pix = center_pix; \n"
+" float best_diff = 1000.0f; \n"
+" \n"
+" int xs[4]={ \n"
+" gidx+j+radius, gidx-j+radius, \n"
+" gidx-j+radius, gidx+j+radius \n"
+" }; \n"
+" int ys[4]={ \n"
+" gidy+i+radius, gidy-i+radius, \n"
+" gidy+i+radius, gidy-i+radius}; \n"
+" \n"
+" for (int k=0;k<4;k++) \n"
+" { \n"
+" if (xs[k] >= 0 && xs[k] < src_width && \n"
+" ys[k] >= 0 && ys[k] < src_height) \n"
+" { \n"
+" __global const float4 *tpix = \n"
+" src_buf + (xs[k] + ys[k] * src_width); \n"
+" float diff=colordiff(*tpix, *center_pix); \n"
+" if (diff < best_diff) \n"
+" { \n"
+" best_diff = diff; \n"
+" selected_pix = tpix; \n"
+" } \n"
+" } \n"
+" } \n"
+" \n"
+" accumulated += *selected_pix; \n"
+" \n"
+" ++count; \n"
+" if (i==0 && j==0) \n"
+" break; \n"
+" } \n"
+" } \n"
+" dst_buf[offset] = accumulated/count; \n"
+" return; \n"
+" } \n"
+" else if(pairs==1) \n"
+" { \n"
+" for(int i=-radius;i<=0;i++) \n"
+" { \n"
+" for(int j=-radius;j<=radius;j++) \n"
+" { \n"
+" __global const float4 *selected_pix = center_pix; \n"
+" float best_diff = 1000.0f; \n"
+" \n"
+" /* skip computations for the center pixel */ \n"
+" if (i != 0 && j != 0) \n"
+" { \n"
+" int xs[4]={ \n"
+" gidx+i+radius, gidx-i+radius, \n"
+" gidx-i+radius, gidx+i+radius \n"
+" }; \n"
+" int ys[4]={ \n"
+" gidy+j+radius, gidy-j+radius, \n"
+" gidy+j+radius, gidy-j+radius \n"
+" }; \n"
+" \n"
+" for (i=0;i<2;i++) \n"
+" { \n"
+" if (xs[i] >= 0 && xs[i] < src_width && \n"
+" ys[i] >= 0 && ys[i] < src_height) \n"
+" { \n"
+" __global const float4 *tpix = \n"
+" src_buf + (xs[i] + ys[i] * src_width); \n"
+" float diff=colordiff (*tpix, *center_pix); \n"
+" if (diff < best_diff) \n"
+" { \n"
+" best_diff = diff; \n"
+" selected_pix = tpix; \n"
+" } \n"
+" } \n"
+" } \n"
+" } \n"
+" accumulated += *selected_pix; \n"
+" ++count; \n"
+" if (i==0 && j==0) \n"
+" break; \n"
+" } \n"
+" } \n"
+" dst_buf[offset] = accumulated/count; \n"
+" return; \n"
+" } \n"
+" return; \n"
+"} \n"
+;
diff --git a/opencl/vignette.cl b/opencl/vignette.cl
new file mode 100644
index 0000000..4d7561a
--- /dev/null
+++ b/opencl/vignette.cl
@@ -0,0 +1,62 @@
+__kernel void vignette_cl (__global const float4 *in,
+ __global float4 *out,
+ float4 color,
+ float scale,
+ float cost,
+ float sint,
+ int roi_x,
+ int roi_y,
+ int midx,
+ int midy,
+ int o_shape,
+ float gamma,
+ float length,
+ float radius0,
+ float rdiff)
+{
+ int gidx = get_global_id(0);
+ int gidy = get_global_id(1);
+ int gid = gidx + gidy * get_global_size(0);
+ float strength = 0.0f;
+ float u,v,costy,sinty;
+ int x,y;
+ x = gidx + roi_x;
+ y = gidy + roi_y;
+ sinty = sint * (y-midy) - midx;
+ costy = cost * (y-midy) + midy;
+
+ u = cost * (x-midx) - sinty;
+ v = sint * (x-midx) + costy;
+
+ if (length == 0.0f)
+ strength = 0.0f;
+ else
+ {
+ switch (o_shape)
+ {
+ case 0:
+ strength = hypot ((u-midx) / scale, v-midy);
+ break;
+
+ case 1:
+ strength = fmax (fabs(u-midx)/scale, fabs(v-midy));
+ break;
+
+ case 2:
+ strength = fabs (u-midx) / scale + fabs(v-midy);
+ break;
+ }
+ strength /= length;
+ strength = (strength-radius0) / rdiff;
+ }
+
+ if (strength < 0.0f) strength = 0.0f;
+ if (strength > 1.0f) strength = 1.0f;
+
+ if (gamma > 0.9999f && gamma < 2.0001f)
+ strength *= strength;
+ else if (gamma != 1.0f)
+ strength = pow(strength, gamma);
+
+ out[gid] = in[gid]*(1.0f-strength) + color * strength;
+}
diff --git a/opencl/vignette.cl.h b/opencl/vignette.cl.h
new file mode 100644
index 0000000..243433c
--- /dev/null
+++ b/opencl/vignette.cl.h
@@ -0,0 +1,64 @@
+static const char* vignette_cl_source =
+"__kernel void vignette_cl (__global const float4 *in, \n"
+" __global float4 *out, \n"
+" float4 color, \n"
+" float scale, \n"
+" float cost, \n"
+" float sint, \n"
+" int roi_x, \n"
+" int roi_y, \n"
+" int midx, \n"
+" int midy, \n"
+" int o_shape, \n"
+" float gamma, \n"
+" float length, \n"
+" float radius0, \n"
+" float rdiff) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" int gid = gidx + gidy * get_global_size(0); \n"
+" float strength = 0.0f; \n"
+" float u,v,costy,sinty; \n"
+" int x,y; \n"
+" x = gidx + roi_x; \n"
+" y = gidy + roi_y; \n"
+" sinty = sint * (y-midy) - midx; \n"
+" costy = cost * (y-midy) + midy; \n"
+" \n"
+" u = cost * (x-midx) - sinty; \n"
+" v = sint * (x-midx) + costy; \n"
+" \n"
+" if (length == 0.0f) \n"
+" strength = 0.0f; \n"
+" else \n"
+" { \n"
+" switch (o_shape) \n"
+" { \n"
+" case 0: \n"
+" strength = hypot ((u-midx) / scale, v-midy); \n"
+" break; \n"
+" \n"
+" case 1: \n"
+" strength = fmax (fabs(u-midx)/scale, fabs(v-midy)); \n"
+" break; \n"
+" \n"
+" case 2: \n"
+" strength = fabs (u-midx) / scale + fabs(v-midy); \n"
+" break; \n"
+" } \n"
+" strength /= length; \n"
+" strength = (strength-radius0) / rdiff; \n"
+" } \n"
+" \n"
+" if (strength < 0.0f) strength = 0.0f; \n"
+" if (strength > 1.0f) strength = 1.0f; \n"
+" \n"
+" if (gamma > 0.9999f && gamma < 2.0001f) \n"
+" strength *= strength; \n"
+" else if (gamma != 1.0f) \n"
+" strength = pow(strength, gamma); \n"
+" \n"
+" out[gid] = in[gid]*(1.0f-strength) + color * strength; \n"
+"} \n"
+;
diff --git a/operations/common/bilateral-filter.c b/operations/common/bilateral-filter.c
index 97cafd4..de4f775 100644
--- a/operations/common/bilateral-filter.c
+++ b/operations/common/bilateral-filter.c
@@ -64,7 +64,7 @@ static void prepare (GeglOperation *operation)
static GeglClRunData *cl_data = NULL;
-static cl_int
+static gboolean
cl_bilateral_filter (cl_mem in_tex,
cl_mem out_tex,
size_t global_worksize,
@@ -80,25 +80,30 @@ cl_bilateral_filter (cl_mem in_tex,
const char *kernel_name[] = {"bilateral_filter", NULL};
cl_data = gegl_cl_compile_and_build (bilateral_filter_cl_source, kernel_name);
}
-
- if (!cl_data) return 1;
+ if (!cl_data) return TRUE;
global_ws[0] = roi->width;
global_ws[1] = roi->height;
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&radius);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&preserve);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&radius);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&preserve);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 2,
NULL, global_ws, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
+
+ return FALSE;
- return cl_err;
+error:
+ return TRUE;
}
static gboolean
@@ -119,18 +124,19 @@ cl_process (GeglOperation *operation,
GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output, result, out_format, GEGL_CL_BUFFER_WRITE);
gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ, op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
while (gegl_buffer_cl_iterator_next (i, &err))
- {
- if (err) return FALSE;
- for (j=0; j < i->n; j++)
{
- cl_err = cl_bilateral_filter(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], ceil(o->blur_radius), o->edge_preservation);
- if (cl_err != CL_SUCCESS)
- {
- g_warning("[OpenCL] Error in gegl:bilateral-filter: %s", gegl_cl_errstring(cl_err));
- return FALSE;
- }
+ if (err) return FALSE;
+ for (j=0; j < i->n; j++)
+ {
+ err = cl_bilateral_filter(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], ceil(o->blur_radius), o->edge_preservation);
+ if (err)
+ {
+ g_warning("[OpenCL] Error in gegl:bilateral-filter");
+ return FALSE;
+ }
+ }
}
- }
+
return TRUE;
}
diff --git a/operations/common/box-blur.c b/operations/common/box-blur.c
index 760c3a7..1553897 100644
--- a/operations/common/box-blur.c
+++ b/operations/common/box-blur.c
@@ -31,7 +31,6 @@ gegl_chant_double_ui (radius, _("Radius"), 0.0, 1000.0, 4.0, 0.0, 100.0, 1.5,
#define GEGL_CHANT_C_FILE "box-blur.c"
#include "gegl-chant.h"
-#include "gegl/gegl-debug.h"
#include <stdio.h>
#include <math.h>
@@ -184,7 +183,7 @@ static void prepare (GeglOperation *operation)
static GeglClRunData *cl_data = NULL;
-static cl_int
+static gboolean
cl_box_blur (cl_mem in_tex,
cl_mem aux_tex,
cl_mem out_tex,
@@ -201,8 +200,7 @@ cl_box_blur (cl_mem in_tex,
const char *kernel_name[] = {"kernel_blur_hor", "kernel_blur_ver", NULL};
cl_data = gegl_cl_compile_and_build (box_blur_cl_source, kernel_name);
}
-
- if (!cl_data) return 1;
+ if (!cl_data) return TRUE;
local_ws_hor[0] = 1;
local_ws_hor[1] = 256;
@@ -214,33 +212,40 @@ cl_box_blur (cl_mem in_tex,
global_ws_ver[0] = roi->height;
global_ws_ver[1] = ((roi->width + local_ws_ver[1] -1)/local_ws_ver[1]) * local_ws_ver[1];
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&aux_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&roi->width);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&radius);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&aux_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&roi->width);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&radius);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 2,
NULL, global_ws_hor, local_ws_hor,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
-
- gegl_clEnqueueBarrier (gegl_cl_get_command_queue ());
+ CL_CHECK;
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_int), (void*)&roi->width);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_int), (void*)&radius);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_int), (void*)&roi->width);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_int), (void*)&radius);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[1], 2,
NULL, global_ws_ver, local_ws_ver,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
- return cl_err;
+ return FALSE;
+
+error:
+ return TRUE;
}
static gboolean
@@ -266,10 +271,10 @@ cl_process (GeglOperation *operation,
if (err) return FALSE;
for (j=0; j < i->n; j++)
{
- cl_err = cl_box_blur(i->tex[read][j], i->tex[aux][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], ceil (o->radius));
- if (cl_err != CL_SUCCESS)
+ err = cl_box_blur(i->tex[read][j], i->tex[aux][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], ceil (o->radius));
+ if (err)
{
- GEGL_NOTE (GEGL_DEBUG_OPENCL, "Error in gegl:box-blur: %s", gegl_cl_errstring(cl_err));
+ g_warning("[OpenCL] Error in gegl:box-blur");
return FALSE;
}
}
diff --git a/operations/common/c2g.c b/operations/common/c2g.c
index 742cf07..5df7b4a 100644
--- a/operations/common/c2g.c
+++ b/operations/common/c2g.c
@@ -156,7 +156,7 @@ get_bounding_box (GeglOperation *operation)
static GeglClRunData *cl_data = NULL;
-static cl_int
+static gboolean
cl_c2g (cl_mem in_tex,
cl_mem out_tex,
size_t global_worksize,
@@ -176,36 +176,38 @@ cl_c2g (cl_mem in_tex,
const char *kernel_name[] ={"c2g", NULL};
cl_data = gegl_cl_compile_and_build(c2g_cl_source, kernel_name);
}
- if (!cl_data) return 0;
+ if (!cl_data) return TRUE;
compute_luts(rgamma);
cl_lut_cos = gegl_clCreateBuffer(gegl_cl_get_context(),
CL_MEM_READ_ONLY,
ANGLE_PRIME * sizeof(cl_float), NULL, &cl_err);
+ CL_CHECK;
- cl_err |= gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_lut_cos,
+ cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_lut_cos,
CL_TRUE, 0, ANGLE_PRIME * sizeof(cl_float), lut_cos,
0, NULL, NULL);
- if (CL_SUCCESS != cl_err) return cl_err;
+ CL_CHECK;
cl_lut_sin = gegl_clCreateBuffer(gegl_cl_get_context(),
CL_MEM_READ_ONLY,
ANGLE_PRIME * sizeof(cl_float), NULL, &cl_err);
- cl_err |= gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_lut_sin,
+ cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_lut_sin,
CL_TRUE, 0, ANGLE_PRIME * sizeof(cl_float), lut_sin,
0, NULL, NULL);
- if (CL_SUCCESS != cl_err) return cl_err;
+ CL_CHECK;
cl_radiuses = gegl_clCreateBuffer(gegl_cl_get_context(),
CL_MEM_READ_ONLY,
RADIUS_PRIME * sizeof(cl_float), NULL, &cl_err);
+ CL_CHECK;
- cl_err |= gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_radiuses,
+ cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_radiuses,
CL_TRUE, 0, RADIUS_PRIME * sizeof(cl_float), radiuses,
0, NULL, NULL);
- if (CL_SUCCESS != cl_err) return cl_err;
+ CL_CHECK;
{
cl_int cl_src_width = src_roi->width;
@@ -214,34 +216,47 @@ cl_c2g (cl_mem in_tex,
cl_int cl_samples = samples;
cl_int cl_iterations = iterations;
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&cl_src_width);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&cl_src_height);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&cl_radiuses);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_mem), (void*)&cl_lut_cos);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_mem), (void*)&cl_lut_sin);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&cl_radius);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 8, sizeof(cl_int), (void*)&cl_samples);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 9, sizeof(cl_int), (void*)&cl_iterations);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&cl_src_width);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&cl_src_height);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&cl_radiuses);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_mem), (void*)&cl_lut_cos);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_mem), (void*)&cl_lut_sin);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&cl_radius);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 8, sizeof(cl_int), (void*)&cl_samples);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 9, sizeof(cl_int), (void*)&cl_iterations);
+ CL_CHECK;
}
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(), cl_data->kernel[0],
2, NULL, gbl_size, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
- cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
- if (CL_SUCCESS != cl_err) return cl_err;
+ cl_err = gegl_clFinish(gegl_cl_get_command_queue ());
+ CL_CHECK;
- gegl_clFinish(gegl_cl_get_command_queue ());
+ cl_err = gegl_clReleaseMemObject(cl_radiuses);
+ CL_CHECK;
+ cl_err = gegl_clReleaseMemObject(cl_lut_cos);
+ CL_CHECK;
+ cl_err = gegl_clReleaseMemObject(cl_lut_sin);
+ CL_CHECK;
- gegl_clReleaseMemObject(cl_radiuses);
- gegl_clReleaseMemObject(cl_lut_cos);
- gegl_clReleaseMemObject(cl_lut_sin);
+ return FALSE;
- return cl_err;
+error:
+ return TRUE;
}
static gboolean
@@ -253,8 +268,8 @@ cl_process (GeglOperation *operation,
const Babl *in_format = babl_format("RGBA float");
const Babl *out_format = gegl_operation_get_format (operation, "output");
gint err;
- gint j;
cl_int cl_err;
+ gint j;
GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
@@ -263,19 +278,18 @@ cl_process (GeglOperation *operation,
gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ,
op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
while (gegl_buffer_cl_iterator_next (i, &err))
- {
- if (err) return FALSE;
- for (j=0; j < i->n; j++)
{
- cl_err = cl_c2g(i->tex[read][j], i->tex[0][j],i->size[0][j], &i->roi[read][j],&i->roi[0][j],
- o->radius,o->samples,o->iterations,RGAMMA);
- if (cl_err != CL_SUCCESS)
- {
- g_warning("[OpenCL] Error in gegl:c2g: %s", gegl_cl_errstring(cl_err));
- return FALSE;
- }
+ if (err) return FALSE;
+ for (j=0; j < i->n; j++)
+ {
+ err = cl_c2g(i->tex[read][j], i->tex[0][j],i->size[0][j], &i->roi[read][j], &i->roi[0][j], o->radius, o->samples, o->iterations, RGAMMA);
+ if (err)
+ {
+ g_warning("[OpenCL] Error in gegl:c2g");
+ return FALSE;
+ }
+ }
}
- }
return TRUE;
}
diff --git a/operations/common/color-temperature.c b/operations/common/color-temperature.c
index a177380..b52bb08 100644
--- a/operations/common/color-temperature.c
+++ b/operations/common/color-temperature.c
@@ -177,7 +177,7 @@ process (GeglOperation *op,
static GeglClRunData *cl_data = NULL;
/* OpenCL processing function */
-static cl_int
+static gboolean
cl_process (GeglOperation *op,
cl_mem in_tex,
cl_mem out_tex,
@@ -207,20 +207,27 @@ cl_process (GeglOperation *op,
if (!cl_data) return 1;
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&coeffs[0]);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&coeffs[1]);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float), (void*)&coeffs[2]);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&coeffs[0]);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&coeffs[1]);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float), (void*)&coeffs[2]);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 1,
NULL, &global_worksize, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
+
+ return FALSE;
- return cl_err;
+error:
+ return TRUE;
}
diff --git a/operations/common/edge-laplace.c b/operations/common/edge-laplace.c
index 6d20398..b12e5e4 100644
--- a/operations/common/edge-laplace.c
+++ b/operations/common/edge-laplace.c
@@ -452,9 +452,6 @@ cl_edge_laplace (cl_mem in_tex,
0, NULL, NULL);
if (cl_err != CL_SUCCESS) return cl_err;
- cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
- if (CL_SUCCESS != cl_err) return cl_err;
-
cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&out_tex);
if (cl_err != CL_SUCCESS) return cl_err;
diff --git a/operations/common/gaussian-blur.c b/operations/common/gaussian-blur.c
index 20f8723..a158342 100644
--- a/operations/common/gaussian-blur.c
+++ b/operations/common/gaussian-blur.c
@@ -422,7 +422,7 @@ static void prepare (GeglOperation *operation)
static GeglClRunData *cl_data = NULL;
-static cl_int
+static gboolean
cl_gaussian_blur (cl_mem in_tex,
cl_mem out_tex,
cl_mem aux_tex,
@@ -448,72 +448,88 @@ cl_gaussian_blur (cl_mem in_tex,
const char *kernel_name[] = {"fir_ver_blur", "fir_hor_blur", NULL};
cl_data = gegl_cl_compile_and_build (gaussian_blur_cl_source, kernel_name);
}
- if (!cl_data) return 1;
+ if (!cl_data) return TRUE;
cl_matrix_x = gegl_clCreateBuffer(gegl_cl_get_context(),
CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
matrix_length_x * sizeof(cl_float), NULL, &cl_err);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_matrix_x,
CL_TRUE, 0, matrix_length_x * sizeof(cl_float), dmatrix_x,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
cl_matrix_y = gegl_clCreateBuffer(gegl_cl_get_context(),
CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
matrix_length_y * sizeof(cl_float), NULL, &cl_err);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_matrix_y,
CL_TRUE, 0, matrix_length_y * sizeof(cl_float), dmatrix_y,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
{
global_ws[0] = aux_rect->width;
global_ws[1] = aux_rect->height;
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_int), (void*)&src_rect->width);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&aux_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_mem), (void*)&cl_matrix_x);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 4, sizeof(cl_int), (void*)&matrix_length_x);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 5, sizeof(cl_int), (void*)&xoff);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_int), (void*)&src_rect->width);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&aux_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_mem), (void*)&cl_matrix_x);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 4, sizeof(cl_int), (void*)&matrix_length_x);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 5, sizeof(cl_int), (void*)&xoff);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[1], 2,
NULL, global_ws, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
}
{
global_ws[0] = roi->width;
global_ws[1] = roi->height;
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&aux_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&aux_rect->width);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&cl_matrix_y);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&matrix_length_y);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&yoff);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&aux_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&aux_rect->width);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&cl_matrix_y);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&matrix_length_y);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&yoff);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 2,
NULL, global_ws, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
}
- gegl_clFinish(gegl_cl_get_command_queue ());
+ cl_err = gegl_clFinish(gegl_cl_get_command_queue ());
+ CL_CHECK;
+
+ cl_err = gegl_clReleaseMemObject(cl_matrix_x);
+ CL_CHECK;
+ cl_err = gegl_clReleaseMemObject(cl_matrix_y);
+ CL_CHECK;
- gegl_clReleaseMemObject(cl_matrix_x);
- gegl_clReleaseMemObject(cl_matrix_y);
+ return FALSE;
- return CL_SUCCESS;
+error:
+ return TRUE;
}
static gboolean
@@ -560,22 +576,22 @@ cl_process (GeglOperation *operation,
if (err) return FALSE;
for (j=0; j < i->n; j++)
{
- cl_err = cl_gaussian_blur(i->tex[read][j],
- i->tex[0][j],
- i->tex[aux][j],
- i->size[0][j],
- &i->roi[0][j],
- &i->roi[read][j],
- &i->roi[aux][j],
- fmatrix_x,
- cmatrix_len_x,
- op_area->left,
- fmatrix_y,
- cmatrix_len_y,
- op_area->top);
- if (cl_err != CL_SUCCESS)
+ err = cl_gaussian_blur(i->tex[read][j],
+ i->tex[0][j],
+ i->tex[aux][j],
+ i->size[0][j],
+ &i->roi[0][j],
+ &i->roi[read][j],
+ &i->roi[aux][j],
+ fmatrix_x,
+ cmatrix_len_x,
+ op_area->left,
+ fmatrix_y,
+ cmatrix_len_y,
+ op_area->top);
+ if (err)
{
- g_warning("[OpenCL] Error in gegl:gaussian-blur: %s", gegl_cl_errstring(cl_err));
+ g_warning("[OpenCL] Error in gegl:gaussian-blur");
return FALSE;
}
}
diff --git a/operations/common/grey.c b/operations/common/grey.c
index d2bb3b1..bc79dc7 100644
--- a/operations/common/grey.c
+++ b/operations/common/grey.c
@@ -62,7 +62,7 @@ process (GeglOperation *op,
#include "opencl/gegl-cl.h"
-static cl_int
+static gboolean
cl_process (GeglOperation *op,
cl_mem in_tex,
cl_mem out_tex,
@@ -76,10 +76,12 @@ cl_process (GeglOperation *op,
in_tex , out_tex , 0 , 0 ,
global_worksize * sizeof (cl_float2),
0, NULL, NULL);
+ CL_CHECK;
- if (CL_SUCCESS != cl_err) return cl_err;
+ return FALSE;
- return cl_err;
+error:
+ return TRUE;
}
diff --git a/operations/common/levels.c b/operations/common/levels.c
index 2c3f9f0..c3dd955 100644
--- a/operations/common/levels.c
+++ b/operations/common/levels.c
@@ -87,25 +87,12 @@ process (GeglOperation *op,
#include "opencl/gegl-cl.h"
-static const char* kernel_source =
-"__kernel void kernel_levels(__global const float4 *in, \n"
-" __global float4 *out, \n"
-" float in_offset, \n"
-" float out_offset, \n"
-" float scale) \n"
-"{ \n"
-" int gid = get_global_id(0); \n"
-" float4 in_v = in[gid]; \n"
-" float4 out_v; \n"
-" out_v.xyz = (in_v.xyz - in_offset) * scale + out_offset; \n"
-" out_v.w = in_v.w; \n"
-" out[gid] = out_v; \n"
-"} \n";
+#include "opencl/levels.cl.h"
static GeglClRunData *cl_data = NULL;
/* OpenCL processing function */
-static cl_int
+static gboolean
cl_process (GeglOperation *op,
cl_mem in_tex,
cl_mem out_tex,
@@ -140,24 +127,31 @@ cl_process (GeglOperation *op,
if (!cl_data)
{
const char *kernel_name[] = {"kernel_levels", NULL};
- cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+ cl_data = gegl_cl_compile_and_build (levels_cl_source, kernel_name);
}
- if (!cl_data) return 1;
-
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&in_offset);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&out_offset);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float), (void*)&scale);
- if (cl_err != CL_SUCCESS) return cl_err;
+ if (!cl_data) return TRUE;
+
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&in_offset);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&out_offset);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float), (void*)&scale);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 1,
NULL, &global_worksize, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
- return cl_err;
+ return FALSE;
+
+error:
+ return TRUE;
}
diff --git a/operations/common/motion-blur.c b/operations/common/motion-blur.c
index 2a23dc6..b6ac612 100644
--- a/operations/common/motion-blur.c
+++ b/operations/common/motion-blur.c
@@ -51,93 +51,18 @@ prepare (GeglOperation *operation)
op_area->top =
op_area->bottom = (gint)ceil(0.5 * offset_y);
- gegl_operation_set_format (operation, "input", babl_format ("RaGaBaA float"));
+ gegl_operation_set_format (operation, "input", babl_format ("RaGaBaA float"));
gegl_operation_set_format (operation, "output", babl_format ("RaGaBaA float"));
}
#include "opencl/gegl-cl.h"
#include "buffer/gegl-buffer-cl-iterator.h"
-static const char* kernel_source =
-"int CLAMP(int val,int lo,int hi) \n"
-"{ \n"
-" return (val < lo) ? lo : ((hi < val) ? hi : val); \n"
-"} \n"
-" \n"
-"float4 get_pixel_color_CL(const __global float4 *in_buf, \n"
-" int rect_width, \n"
-" int rect_height, \n"
-" int rect_x, \n"
-" int rect_y, \n"
-" int x, \n"
-" int y) \n"
-"{ \n"
-" int ix = x - rect_x; \n"
-" int iy = y - rect_y; \n"
-" \n"
-" ix = CLAMP(ix, 0, rect_width-1); \n"
-" iy = CLAMP(iy, 0, rect_height-1); \n"
-" \n"
-" return in_buf[iy * rect_width + ix]; \n"
-"} \n"
-" \n"
-"__kernel void motion_blur_CL(const __global float4 *src_buf, \n"
-" int src_width, \n"
-" int src_height, \n"
-" int src_x, \n"
-" int src_y, \n"
-" __global float4 *dst_buf, \n"
-" int dst_x, \n"
-" int dst_y, \n"
-" int num_steps, \n"
-" float offset_x, \n"
-" float offset_y) \n"
-"{ \n"
-" int gidx = get_global_id(0); \n"
-" int gidy = get_global_id(1); \n"
-" \n"
-" float4 sum = 0.0f; \n"
-" int px = gidx + dst_x; \n"
-" int py = gidy + dst_y; \n"
-" \n"
-" for(int step = 0; step < num_steps; ++step) \n"
-" { \n"
-" float t = num_steps == 1 ? 0.0f : \n"
-" step / (float)(num_steps - 1) - 0.5f; \n"
-" \n"
-" float xx = px + t * offset_x; \n"
-" float yy = py + t * offset_y; \n"
-" \n"
-" int ix = (int)floor(xx); \n"
-" int iy = (int)floor(yy); \n"
-" \n"
-" float dx = xx - floor(xx); \n"
-" float dy = yy - floor(yy); \n"
-" \n"
-" float4 mixy0,mixy1,pix0,pix1,pix2,pix3; \n"
-" \n"
-" pix0 = get_pixel_color_CL(src_buf, src_width, \n"
-" src_height, src_x, src_y, ix, iy); \n"
-" pix1 = get_pixel_color_CL(src_buf, src_width, \n"
-" src_height, src_x, src_y, ix+1, iy); \n"
-" pix2 = get_pixel_color_CL(src_buf, src_width, \n"
-" src_height, src_x, src_y, ix, iy+1); \n"
-" pix3 = get_pixel_color_CL(src_buf, src_width, \n"
-" src_height, src_x, src_y, ix+1, iy+1); \n"
-" \n"
-" mixy0 = dy * (pix2 - pix0) + pix0; \n"
-" mixy1 = dy * (pix3 - pix1) + pix1; \n"
-" \n"
-" sum += dx * (mixy1 - mixy0) + mixy0; \n"
-" } \n"
-" \n"
-" dst_buf[gidy * get_global_size(0) + gidx] = \n"
-" sum / num_steps; \n"
-"} \n";
+#include "opencl/motion-blur.cl.h"
static GeglClRunData *cl_data = NULL;
-static cl_int
+static gboolean
cl_motion_blur (cl_mem in_tex,
cl_mem out_tex,
size_t global_worksize,
@@ -152,35 +77,47 @@ cl_motion_blur (cl_mem in_tex,
if (!cl_data)
{
- const char *kernel_name[] = {"motion_blur_CL", NULL};
- cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+ const char *kernel_name[] = {"motion_blur", NULL};
+ cl_data = gegl_cl_compile_and_build (motion_blur_cl_source, kernel_name);
}
-
- if (!cl_data) return 1;
+ if (!cl_data) return TRUE;
global_ws[0] = roi->width;
global_ws[1] = roi->height;
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&src_rect->width);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&src_rect->height);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&src_rect->x);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&src_rect->y);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_int), (void*)&roi->x);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&roi->y);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 8, sizeof(cl_int), (void*)&num_steps);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 9, sizeof(cl_float), (void*)&offset_x);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 10, sizeof(cl_float), (void*)&offset_y);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&src_rect->width);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&src_rect->height);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&src_rect->x);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&src_rect->y);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_int), (void*)&roi->x);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&roi->y);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 8, sizeof(cl_int), (void*)&num_steps);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 9, sizeof(cl_float), (void*)&offset_x);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 10, sizeof(cl_float), (void*)&offset_y);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 2,
NULL, global_ws, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
+
+ return FALSE;
- return cl_err;
+error:
+ return TRUE;
}
static gboolean
@@ -193,8 +130,8 @@ cl_process (GeglOperation *operation,
const Babl *in_format = gegl_operation_get_format (operation, "input");
const Babl *out_format = gegl_operation_get_format (operation, "output");
gint err;
- gint j;
cl_int cl_err;
+ gint j;
GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
@@ -208,18 +145,18 @@ cl_process (GeglOperation *operation,
gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ,
op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
while (gegl_buffer_cl_iterator_next (i, &err))
- {
- if (err) return FALSE;
- for (j=0; j < i->n; j++)
{
- cl_err = cl_motion_blur(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], &i->roi[read][j], num_steps, offset_x, offset_y);
- if (cl_err != CL_SUCCESS)
- {
- g_warning("[OpenCL] Error in gegl:motion-blur: %s", gegl_cl_errstring(cl_err));
- return FALSE;
- }
+ if (err) return FALSE;
+ for (j=0; j < i->n; j++)
+ {
+ err = cl_motion_blur(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], &i->roi[read][j], num_steps, offset_x, offset_y);
+ if (err)
+ {
+ g_warning("[OpenCL] Error in gegl:motion-blur");
+ return FALSE;
+ }
+ }
}
- }
return TRUE;
}
diff --git a/operations/common/noise-reduction.c b/operations/common/noise-reduction.c
index b8c4337..b0f1063 100644
--- a/operations/common/noise-reduction.c
+++ b/operations/common/noise-reduction.c
@@ -152,88 +152,11 @@ static void prepare (GeglOperation *operation)
#include "opencl/gegl-cl.h"
#include "buffer/gegl-buffer-cl-iterator.h"
-static const char* kernel_source =
-"#define NEIGHBOURS 8 \n"
-"#define AXES (NEIGHBOURS/2) \n"
-" \n"
-"#define POW2(a) ((a)*(a)) \n"
-" \n"
-"#define GEN_METRIC(before, center, after) POW2((center) * 2 - (before) - (after)) \n"
-" \n"
-"#define BAIL_CONDITION(new,original) ((new) < (original)) \n"
-" \n"
-"#define SYMMETRY(a) (NEIGHBOURS - (a) - 1) \n"
-" \n"
-"#define O(u,v) (((u)+((v) * (src_stride)))) \n"
-" \n"
-"__kernel void noise_reduction_cl (__global float4 *src_buf, \n"
-" int src_stride, \n"
-" __global float4 *dst_buf, \n"
-" int dst_stride) \n"
-"{ \n"
-" int gidx = get_global_id(0); \n"
-" int gidy = get_global_id(1); \n"
-" \n"
-" __global float4 *center_pix = src_buf + (gidy + 1) * src_stride + gidx + 1; \n"
-" int dst_offset = dst_stride * gidy + gidx; \n"
-" \n"
-" int offsets[NEIGHBOURS] = { \n"
-" O(-1, -1), O( 0, -1), O( 1, -1), \n"
-" O(-1, 0), O( 1, 0), \n"
-" O(-1, 1), O( 0, 1), O( 1, 1) \n"
-" }; \n"
-" \n"
-" float4 sum; \n"
-" int4 count; \n"
-" float4 cur; \n"
-" float4 metric_reference[AXES]; \n"
-" \n"
-" for (int axis = 0; axis < AXES; axis++) \n"
-" { \n"
-" float4 before_pix = *(center_pix + offsets[axis]); \n"
-" float4 after_pix = *(center_pix + offsets[SYMMETRY(axis)]); \n"
-" metric_reference[axis] = GEN_METRIC (before_pix, *center_pix, after_pix); \n"
-" } \n"
-" \n"
-" cur = sum = *center_pix; \n"
-" count = 1; \n"
-" \n"
-" for (int direction = 0; direction < NEIGHBOURS; direction++) \n"
-" { \n"
-" float4 pix = *(center_pix + offsets[direction]); \n"
-" float4 value = (pix + cur) * (0.5f); \n"
-" int axis; \n"
-" int4 mask = {1, 1, 1, 0}; \n"
-" \n"
-" for (axis = 0; axis < AXES; axis++) \n"
-" { \n"
-" float4 before_pix = *(center_pix + offsets[axis]); \n"
-" float4 after_pix = *(center_pix + offsets[SYMMETRY(axis)]); \n"
-" \n"
-" float4 metric_new = GEN_METRIC (before_pix, \n"
-" value, \n"
-" after_pix); \n"
-" mask = BAIL_CONDITION (metric_new, metric_reference[axis]) & mask; \n"
-" } \n"
-" sum += mask >0 ? value : 0; \n"
-" count += mask >0 ? 1 : 0; \n"
-" } \n"
-" dst_buf[dst_offset] = (sum/convert_float4(count)); \n"
-" dst_buf[dst_offset].w = cur.w; \n"
-"} \n"
-"__kernel void transfer(__global float4 * in, \n"
-" int in_width, \n"
-" __global float4 * out) \n"
-"{ \n"
-" int gidx = get_global_id(0); \n"
-" int gidy = get_global_id(1); \n"
-" int width = get_global_size(0); \n"
-" out[gidy * width + gidx] = in[gidy * in_width + gidx]; \n"
-"} \n";
+#include "opencl/noise-reduction.cl.h"
static GeglClRunData *cl_data = NULL;
-static cl_int
+static gboolean
cl_noise_reduction (cl_mem in_tex,
cl_mem aux_tex,
cl_mem out_tex,
@@ -255,24 +178,21 @@ cl_noise_reduction (cl_mem in_tex,
if (!cl_data)
{
const char *kernel_name[] ={"noise_reduction_cl","transfer", NULL};
- cl_data = gegl_cl_compile_and_build(kernel_source, kernel_name);
+ cl_data = gegl_cl_compile_and_build(noise_reduction_cl_source, kernel_name);
}
- if (!cl_data) return 0;
+ if (!cl_data) return TRUE;
temp_tex = gegl_clCreateBuffer (gegl_cl_get_context(),
CL_MEM_READ_WRITE,
src_roi->width * src_roi->height * stride,
NULL, &cl_err);
- if (cl_err != CL_SUCCESS) return cl_err;
-
+ CL_CHECK;
cl_err = gegl_clEnqueueCopyBuffer(gegl_cl_get_command_queue(),
in_tex , temp_tex , 0 , 0 ,
src_roi->width * src_roi->height * stride,
0, NULL, NULL);
-
- cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
- if (CL_SUCCESS != cl_err) return cl_err;
+ CL_CHECK;
tmptex = temp_tex;
for (i = 0;i<iterations;i++)
@@ -286,37 +206,46 @@ cl_noise_reduction (cl_mem in_tex,
gbl_size_tmp[0] = roi->width + 2 * (iterations - 1 -i);
gbl_size_tmp[1] = roi->height + 2 * (iterations - 1 -i);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&temp_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&n_src_stride);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&aux_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&n_src_stride);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&temp_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&n_src_stride);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&aux_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&n_src_stride);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(), cl_data->kernel[0],
2, NULL, gbl_size_tmp, NULL,
0, NULL, NULL);
- cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
- if (CL_SUCCESS != cl_err) return cl_err;
+ CL_CHECK;
}
gbl_size_tmp[0] = roi->width ;
gbl_size_tmp[1] = roi->height;
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_int), (void*)&n_src_stride);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&out_tex);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_int), (void*)&n_src_stride);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(), cl_data->kernel[1],
2, NULL, gbl_size_tmp, NULL,
0, NULL, NULL);
+ CL_CHECK;
cl_err = gegl_clFinish(gegl_cl_get_command_queue());
- if (CL_SUCCESS != cl_err) return cl_err;
+ CL_CHECK;
- if (tmptex) gegl_clReleaseMemObject (tmptex);
+ if (tmptex) cl_err = gegl_clReleaseMemObject (tmptex);
+ CL_CHECK;
- return cl_err;
+ return FALSE;
+
+error:
+ return TRUE;
}
static gboolean
@@ -329,7 +258,6 @@ cl_process (GeglOperation *operation,
const Babl *out_format = gegl_operation_get_format (operation, "output");
gint err;
gint j;
- cl_int cl_err;
GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
@@ -345,16 +273,16 @@ cl_process (GeglOperation *operation,
if (err) return FALSE;
for (j=0; j < i->n; j++)
{
- cl_err = cl_noise_reduction(i->tex[read][j],
- i->tex[aux][j],
- i->tex[0][j],
- i->size[0][j],
- &i->roi[read][j],
- &i->roi[0][j],
- o->iterations);
- if (cl_err != CL_SUCCESS)
+ err = cl_noise_reduction(i->tex[read][j],
+ i->tex[aux][j],
+ i->tex[0][j],
+ i->size[0][j],
+ &i->roi[read][j],
+ &i->roi[0][j],
+ o->iterations);
+ if (err)
{
- g_warning("[OpenCL] Error in gegl:noise-reduction: %s", gegl_cl_errstring(cl_err));
+ g_warning("[OpenCL] Error in gegl:noise-reduction");
return FALSE;
}
}
diff --git a/operations/common/oilify.c b/operations/common/oilify.c
index 81e318d..22d9074 100644
--- a/operations/common/oilify.c
+++ b/operations/common/oilify.c
@@ -278,135 +278,11 @@ prepare (GeglOperation *operation)
#include "opencl/gegl-cl.h"
#include "buffer/gegl-buffer-cl-iterator.h"
-/* two small different kernels are better than one big */
-static const char* kernel_source =
-"#define NUM_INTENSITIES 256 \n"
-"kernel void kernel_oilify(global float4 *in, \n"
-" global float4 *out, \n"
-" const int mask_radius, \n"
-" const int intensities, \n"
-" const float exponent) \n"
-"{ \n"
-" int gidx = get_global_id(0); \n"
-" int gidy = get_global_id(1); \n"
-" int x = gidx + mask_radius; \n"
-" int y = gidy + mask_radius; \n"
-" int dst_width = get_global_size(0); \n"
-" int src_width = dst_width + mask_radius * 2; \n"
-" float4 hist[NUM_INTENSITIES]; \n"
-" float4 hist_max = 1.0; \n"
-" int i, j, intensity; \n"
-" int radius_sq = mask_radius * mask_radius; \n"
-" float4 temp_pixel; \n"
-" for (i = 0; i < intensities; i++) \n"
-" hist[i] = 0.0; \n"
-" \n"
-" for (i = -mask_radius; i <= mask_radius; i++) \n"
-" { \n"
-" for (j = -mask_radius; j <= mask_radius; j++) \n"
-" { \n"
-" if (i*i + j*j <= radius_sq) \n"
-" { \n"
-" temp_pixel = in[x + i + (y + j) * src_width]; \n"
-" hist[(int)(temp_pixel.x * (intensities - 1))].x+=1; \n"
-" hist[(int)(temp_pixel.y * (intensities - 1))].y+=1; \n"
-" hist[(int)(temp_pixel.z * (intensities - 1))].z+=1; \n"
-" hist[(int)(temp_pixel.w * (intensities - 1))].w+=1; \n"
-" } \n"
-" } \n"
-" } \n"
-" \n"
-" for (i = 0; i < intensities; i++) { \n"
-" if(hist_max.x < hist[i].x) \n"
-" hist_max.x = hist[i].x; \n"
-" if(hist_max.y < hist[i].y) \n"
-" hist_max.y = hist[i].y; \n"
-" if(hist_max.z < hist[i].z) \n"
-" hist_max.z = hist[i].z; \n"
-" if(hist_max.w < hist[i].w) \n"
-" hist_max.w = hist[i].w; \n"
-" } \n"
-" float4 div = 0.0; \n"
-" float4 sum = 0.0; \n"
-" float4 ratio, weight; \n"
-" for (i = 0; i < intensities; i++) \n"
-" { \n"
-" ratio = hist[i] / hist_max; \n"
-" weight = pow(ratio, (float4)exponent); \n"
-" sum += weight * (float4)i; \n"
-" div += weight; \n"
-" } \n"
-" out[gidx + gidy * dst_width] = sum / div / (float)(intensities - 1); \n"
-"} \n"
-" \n"
-"kernel void kernel_oilify_inten(global float4 *in, \n"
-" global float4 *out, \n"
-" const int mask_radius, \n"
-" const int intensities, \n"
-" const float exponent) \n"
-"{ \n"
-" int gidx = get_global_id(0); \n"
-" int gidy = get_global_id(1); \n"
-" int x = gidx + mask_radius; \n"
-" int y = gidy + mask_radius; \n"
-" int dst_width = get_global_size(0); \n"
-" int src_width = dst_width + mask_radius * 2; \n"
-" float4 cumulative_rgb[NUM_INTENSITIES]; \n"
-" int hist_inten[NUM_INTENSITIES], inten_max; \n"
-" int i, j, intensity; \n"
-" int radius_sq = mask_radius * mask_radius; \n"
-" float4 temp_pixel; \n"
-" for (i = 0; i < intensities; i++) \n"
-" { \n"
-" hist_inten[i] = 0; \n"
-" cumulative_rgb[i] = 0.0; \n"
-" } \n"
-" for (i = -mask_radius; i <= mask_radius; i++) \n"
-" { \n"
-" for (j = -mask_radius; j <= mask_radius; j++) \n"
-" { \n"
-" if (i*i + j*j <= radius_sq) \n"
-" { \n"
-" temp_pixel = in[x + i + (y + j) * src_width]; \n"
-" /*Calculate intensity on the fly, GPU does it fast*/ \n"
-" intensity = (int)((0.299 * temp_pixel.x \n"
-" +0.587 * temp_pixel.y \n"
-" +0.114 * temp_pixel.z) * (float)(intensities-1)); \n"
-" hist_inten[intensity] += 1; \n"
-" cumulative_rgb[intensity] += temp_pixel; \n"
-" } \n"
-" } \n"
-" } \n"
-" inten_max = 1; \n"
-" \n"
-" /* calculated maximums */ \n"
-" for (i = 0; i < intensities; i++) { \n"
-" if(hist_inten[i] > inten_max) \n"
-" inten_max = hist_inten[i]; \n"
-" } \n"
-" float div = 0.0; \n"
-" float ratio, weight, mult_inten; \n"
-" \n"
-" float4 color = 0.0; \n"
-" for (i = 0; i < intensities; i++) \n"
-" { \n"
-" if (hist_inten[i] > 0) \n"
-" { \n"
-" ratio = (float)(hist_inten[i]) / (float)(inten_max); \n"
-" weight = pow(ratio, exponent); \n"
-" mult_inten = weight / (float)(hist_inten[i]); \n"
-" \n"
-" div += weight; \n"
-" color += mult_inten * cumulative_rgb[i]; \n"
-" } \n"
-" } \n"
-" out[gidx + gidy * dst_width] = color/div; \n"
-"} \n";
-
+#include "opencl/oilify.cl.h"
static GeglClRunData *cl_data = NULL;
-static cl_int
+static gboolean
cl_oilify (cl_mem in_tex,
cl_mem out_tex,
size_t global_worksize,
@@ -416,12 +292,6 @@ cl_oilify (cl_mem in_tex,
gint exponent,
gboolean use_inten)
{
- if (!cl_data)
- {
- const char *kernel_name[] = {"kernel_oilify", "kernel_oilify_inten", NULL};
- cl_data = gegl_cl_compile_and_build(kernel_source, kernel_name);
- }
- if (!cl_data) return 0;
const size_t gbl_size[2] = {roi->width,roi->height};
cl_int radius = mask_radius;
@@ -430,21 +300,35 @@ cl_oilify (cl_mem in_tex,
cl_int cl_err = 0;
gint arg = 0;
+ if (!cl_data)
+ {
+ const char *kernel_name[] = {"kernel_oilify", "kernel_oilify_inten", NULL};
+ cl_data = gegl_cl_compile_and_build(oilify_cl_source, kernel_name);
+ }
+ if (!cl_data) return TRUE;
+
/* simple hack: select suitable kernel using boolean, 0 - no intensity mode, 1 - intensity mode */
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_int), (void*)&radius);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_int), (void*)&intensities);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_float), (void*)&exp);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_int), (void*)&radius);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_int), (void*)&intensities);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_float), (void*)&exp);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(),
cl_data->kernel[use_inten], 2,
NULL, gbl_size, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
- return CL_SUCCESS;
+ return FALSE;
+
+error:
+ return TRUE;
}
static gboolean
@@ -457,33 +341,32 @@ cl_process (GeglOperation *operation,
const Babl *out_format = gegl_operation_get_format (operation, "output");
gint err;
gint j;
- cl_int cl_err;
- GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,result, out_format, GEGL_CL_BUFFER_WRITE);
gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ,
o->mask_radius, o->mask_radius, o->mask_radius, o->mask_radius, GEGL_ABYSS_NONE);
while (gegl_buffer_cl_iterator_next (i, &err))
- {
- if (err) return FALSE;
- for (j=0; j < i->n; j++)
{
- cl_err = cl_oilify(i->tex[read][j],
- i->tex[0][j],
- i->size[0][j],&i->roi[0][j],
- o->mask_radius,
- o->intensities,
- o->exponent,
- o->use_inten);
- if (cl_err != CL_SUCCESS)
- {
- g_warning("[OpenCL] Error in gegl:oilify: %s", gegl_cl_errstring(cl_err));
- return FALSE;
- }
+ if (err) return FALSE;
+ for (j=0; j < i->n; j++)
+ {
+ err = cl_oilify(i->tex[read][j],
+ i->tex[0][j],
+ i->size[0][j],&i->roi[0][j],
+ o->mask_radius,
+ o->intensities,
+ o->exponent,
+ o->use_inten);
+ if (err)
+ {
+ g_warning("[OpenCL] Error in gegl:oilify");
+ return FALSE;
+ }
+ }
}
- }
+
return TRUE;
}
diff --git a/operations/common/opacity.c b/operations/common/opacity.c
index dec9d80..3314e7f 100644
--- a/operations/common/opacity.c
+++ b/operations/common/opacity.c
@@ -214,8 +214,7 @@ cl_process (GeglOperation *op,
const char *kernel_name[] = {"gegl_opacity_RaGaBaA_float", "gegl_opacity_RGBA_float", NULL};
cl_data = gegl_cl_compile_and_build (opacity_cl_source, kernel_name);
}
-
- if (!cl_data) return FALSE;
+ if (!cl_data) return TRUE;
kernel = (GEGL_CHANT_PROPERTIES (op)->chant_data != NULL);
@@ -232,10 +231,10 @@ cl_process (GeglOperation *op,
0, NULL, NULL);
CL_CHECK;
- return TRUE;
+ return FALSE;
error:
- return FALSE;
+ return TRUE;
}
/* Fast path when opacity is a no-op
@@ -282,6 +281,8 @@ gegl_chant_class_init (GeglChantClass *klass)
point_composer_class->process = process;
point_composer_class->cl_process = cl_process;
+ operation_class->opencl_support = TRUE;
+
gegl_operation_class_set_keys (operation_class,
"name" , "gegl:opacity",
"categories" , "transparency",
diff --git a/operations/common/pixelize.c b/operations/common/pixelize.c
index 57c5e03..4292b8c 100644
--- a/operations/common/pixelize.c
+++ b/operations/common/pixelize.c
@@ -135,59 +135,11 @@ pixelize (gfloat* buf,
#include "opencl/gegl-cl.h"
#include "buffer/gegl-buffer-cl-iterator.h"
-static const char* kernel_source =
-"__kernel void calc_block_color(__global float4 *in, \n"
-" __global float4 *out, \n"
-" int xsize, \n"
-" int ysize, \n"
-" int roi_x, \n"
-" int roi_y, \n"
-" int line_width, \n"
-" int block_count_x ) \n"
-"{ \n"
-" int gidx = get_global_id(0); \n"
-" int gidy = get_global_id(1); \n"
-" int cx = roi_x / xsize + gidx; \n"
-" int cy = roi_y / ysize + gidy; \n"
-" \n"
-" float weight = 1.0f / (xsize * ysize); \n"
-" \n"
-" int px = cx * xsize + xsize - roi_x; \n"
-" int py = cy * ysize + ysize - roi_y; \n"
-" \n"
-" int i,j; \n"
-" float4 col = 0.0f; \n"
-" for (j = py;j < py + ysize; ++j) \n"
-" { \n"
-" for (i = px;i < px + xsize; ++i) \n"
-" { \n"
-" col += in[j * line_width + i]; \n"
-" } \n"
-" } \n"
-" out[gidy * block_count_x + gidx] = col * weight; \n"
-" \n"
-"} \n"
-" \n"
-"__kernel void kernel_pixelise (__global float4 *in, \n"
-" __global float4 *out, \n"
-" int xsize, \n"
-" int ysize, \n"
-" int roi_x, \n"
-" int roi_y, \n"
-" int block_count_x) \n"
-"{ \n"
-" int gidx = get_global_id(0); \n"
-" int gidy = get_global_id(1); \n"
-" \n"
-" int src_width = get_global_size(0); \n"
-" int cx = (gidx + roi_x) / xsize - roi_x / xsize; \n"
-" int cy = (gidy + roi_y) / ysize - roi_y / ysize; \n"
-" out[gidx + gidy * src_width] = in[cx + cy * block_count_x]; \n"
-"} \n";
+#include "opencl/pixelize.cl.h"
static GeglClRunData *cl_data = NULL;
-static cl_int
+static gboolean
cl_pixelise (cl_mem in_tex,
cl_mem aux_tex,
cl_mem out_tex,
@@ -205,45 +157,62 @@ cl_pixelise (cl_mem in_tex,
gint block_count_y = CELL_Y(roi->y+roi->height - 1, ysize)-cy0 + 1;
cl_int line_width = roi->width + 2 * xsize;
- size_t gbl_size_tmp[2]={block_count_x,block_count_y};
+ size_t gbl_size_tmp[2] = {block_count_x,block_count_y};
if (!cl_data)
{
const char *kernel_name[] = {"calc_block_color", "kernel_pixelise", NULL};
- cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+ cl_data = gegl_cl_compile_and_build (pixelize_cl_source, kernel_name);
}
if (!cl_data) return 1;
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&aux_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&xsize);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&ysize);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&roi->x);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&roi->y);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_int), (void*)&line_width);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&block_count_x);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&aux_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&xsize);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&ysize);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&roi->x);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&roi->y);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_int), (void*)&line_width);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&block_count_x);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 2,
NULL, gbl_size_tmp, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
-
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_int), (void*)&xsize);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_int), (void*)&ysize);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 4, sizeof(cl_int), (void*)&roi->x);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 5, sizeof(cl_int), (void*)&roi->y);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 6, sizeof(cl_int), (void*)&block_count_x);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
+
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_int), (void*)&xsize);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_int), (void*)&ysize);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 4, sizeof(cl_int), (void*)&roi->x);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 5, sizeof(cl_int), (void*)&roi->y);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 6, sizeof(cl_int), (void*)&block_count_x);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[1], 2,
NULL, gbl_size, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
- return cl_err;
+ CL_CHECK;
+
+ return FALSE;
+
+error:
+ return TRUE;
}
static gboolean
@@ -255,8 +224,8 @@ cl_process (GeglOperation *operation,
const Babl *in_format = gegl_operation_get_format (operation, "input");
const Babl *out_format = gegl_operation_get_format (operation, "output");
gint err;
- gint j;
cl_int cl_err;
+ gint j;
GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
@@ -265,18 +234,18 @@ cl_process (GeglOperation *operation,
gint read = gegl_buffer_cl_iterator_add_2 (i, input, roi, in_format, GEGL_CL_BUFFER_READ, op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
gint aux = gegl_buffer_cl_iterator_add_2 (i, NULL, roi, in_format, GEGL_CL_BUFFER_AUX, op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
while (gegl_buffer_cl_iterator_next (i, &err))
- {
- if (err) return FALSE;
- for (j=0; j < i->n; j++)
{
- cl_err = cl_pixelise(i->tex[read][j], i->tex[aux][j], i->tex[0][j],&i->roi[read][j], &i->roi[0][j], o->size_x,o->size_y);
- if (cl_err != CL_SUCCESS)
- {
- g_warning("[OpenCL] Error in gegl:pixelise: %s", gegl_cl_errstring(cl_err));
- return FALSE;
- }
+ if (err) return FALSE;
+ for (j=0; j < i->n; j++)
+ {
+ err = cl_pixelise(i->tex[read][j], i->tex[aux][j], i->tex[0][j],&i->roi[read][j], &i->roi[0][j], o->size_x, o->size_y);
+ if (cl_err != CL_SUCCESS)
+ {
+ g_warning("[OpenCL] Error in gegl:pixelize");
+ return FALSE;
+ }
+ }
}
- }
return TRUE;
}
diff --git a/operations/common/snn-mean.c b/operations/common/snn-mean.c
index 4a0877e..b76d56b 100644
--- a/operations/common/snn-mean.c
+++ b/operations/common/snn-mean.c
@@ -222,127 +222,11 @@ snn_mean (GeglBuffer *src,
#include "opencl/gegl-cl.h"
#include "buffer/gegl-buffer-cl-iterator.h"
-static const char* kernel_source =
-"float colordiff (float4 pixA, \n"
-" float4 pixB) \n"
-"{ \n"
-" float4 pix = pixA-pixB; \n"
-" pix *= pix; \n"
-" return pix.x+pix.y+pix.z; \n"
-"} \n"
-" \n"
-"__kernel void snn_mean_CL (__global const float4 *src_buf, \n"
-" int src_width, \n"
-" int src_height, \n"
-" __global float4 *dst_buf, \n"
-" int radius, \n"
-" int pairs) \n"
-"{ \n"
-" int gidx =get_global_id(0); \n"
-" int gidy =get_global_id(1); \n"
-" int offset =gidy * get_global_size(0) + gidx; \n"
-" \n"
-" __global const float4 *center_pix= \n"
-" src_buf + ((radius+gidx) + (gidy+radius)* src_width); \n"
-" float4 accumulated=0; \n"
-" \n"
-" int count=0; \n"
-" if(pairs==2) \n"
-" { \n"
-" for(int i=-radius;i<0;i++) \n"
-" { \n"
-" for(int j=-radius;j<0;j++) \n"
-" { \n"
-" __global const float4 *selected_pix = center_pix; \n"
-" float best_diff = 1000.0f; \n"
-" \n"
-" int xs[4]={ \n"
-" gidx+j+radius, gidx-j+radius, \n"
-" gidx-j+radius, gidx+j+radius \n"
-" }; \n"
-" int ys[4]={ \n"
-" gidy+i+radius, gidy-i+radius, \n"
-" gidy+i+radius, gidy-i+radius}; \n"
-" \n"
-" for (int k=0;k<4;k++) \n"
-" { \n"
-" if (xs[k] >= 0 && xs[k] < src_width && \n"
-" ys[k] >= 0 && ys[k] < src_height) \n"
-" { \n"
-" __global const float4 *tpix = \n"
-" src_buf + (xs[k] + ys[k] * src_width);\n"
-" float diff=colordiff(*tpix, *center_pix); \n"
-" if (diff < best_diff) \n"
-" { \n"
-" best_diff = diff; \n"
-" selected_pix = tpix; \n"
-" } \n"
-" } \n"
-" } \n"
-" \n"
-" accumulated += *selected_pix; \n"
-" \n"
-" ++count; \n"
-" if (i==0 && j==0) \n"
-" break; \n"
-" } \n"
-" } \n"
-" dst_buf[offset] = accumulated/count; \n"
-" return; \n"
-" } \n"
-" else if(pairs==1) \n"
-" { \n"
-" for(int i=-radius;i<=0;i++) \n"
-" { \n"
-" for(int j=-radius;j<=radius;j++) \n"
-" { \n"
-" __global const float4 *selected_pix = center_pix; \n"
-" float best_diff = 1000.0f; \n"
-" \n"
-" /* skip computations for the center pixel */ \n"
-" if (i != 0 && j != 0) \n"
-" { \n"
-" int xs[4]={ \n"
-" gidx+i+radius, gidx-i+radius, \n"
-" gidx-i+radius, gidx+i+radius \n"
-" }; \n"
-" int ys[4]={ \n"
-" gidy+j+radius, gidy-j+radius, \n"
-" gidy+j+radius, gidy-j+radius \n"
-" }; \n"
-" \n"
-" for (i=0;i<2;i++) \n"
-" { \n"
-" if (xs[i] >= 0 && xs[i] < src_width && \n"
-" ys[i] >= 0 && ys[i] < src_height) \n"
-" { \n"
-" __global const float4 *tpix = \n"
-" src_buf + (xs[i] + ys[i] * src_width);\n"
-" float diff=colordiff (*tpix, *center_pix);\n"
-" if (diff < best_diff) \n"
-" { \n"
-" best_diff = diff; \n"
-" selected_pix = tpix; \n"
-" } \n"
-" } \n"
-" } \n"
-" } \n"
-" accumulated += *selected_pix; \n"
-" ++count; \n"
-" if (i==0 && j==0) \n"
-" break; \n"
-" } \n"
-" } \n"
-" dst_buf[offset] = accumulated/count; \n"
-" return; \n"
-" } \n"
-" return; \n"
-"} \n";
-
+#include "opencl/snn-mean.cl.h"
static GeglClRunData *cl_data = NULL;
-static cl_int
+static gboolean
cl_snn_mean (cl_mem in_tex,
cl_mem out_tex,
const GeglRectangle *src_rect,
@@ -355,31 +239,38 @@ cl_snn_mean (cl_mem in_tex,
if (!cl_data)
{
- const char *kernel_name[] = {"snn_mean_CL", NULL};
- cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+ const char *kernel_name[] = {"snn_mean", NULL};
+ cl_data = gegl_cl_compile_and_build (snn_mean_cl_source, kernel_name);
}
-
- if (!cl_data) return 1;
+ if (!cl_data) return TRUE;
global_ws[0] = roi->width;
global_ws[1] = roi->height;
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&src_rect->width);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&src_rect->height);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&radius);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&pairs);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&src_rect->width);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&src_rect->height);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&radius);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&pairs);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 2,
NULL, global_ws, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
- return cl_err;
+ return FALSE;
+
+error:
+ return TRUE;
}
static gboolean
@@ -392,7 +283,6 @@ cl_process (GeglOperation *operation,
const Babl *out_format = gegl_operation_get_format (operation, "output");
gint err;
gint j;
- cl_int cl_err;
GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
@@ -404,10 +294,10 @@ cl_process (GeglOperation *operation,
if (err) return FALSE;
for (j=0; j < i->n; j++)
{
- cl_err = cl_snn_mean(i->tex[read][j], i->tex[0][j], &i->roi[read][j], &i->roi[0][j], ceil(o->radius), o->pairs);
- if (cl_err != CL_SUCCESS)
+ err = cl_snn_mean(i->tex[read][j], i->tex[0][j], &i->roi[read][j], &i->roi[0][j], ceil(o->radius), o->pairs);
+ if (err)
{
- g_warning("[OpenCL] Error in gegl:snn-mean: %s", gegl_cl_errstring(cl_err));
+ g_warning("[OpenCL] Error in gegl:snn-mean");
return FALSE;
}
}
diff --git a/operations/common/vignette.c b/operations/common/vignette.c
index 59e711d..71abace 100644
--- a/operations/common/vignette.c
+++ b/operations/common/vignette.c
@@ -82,73 +82,11 @@ static float scale_to_aspect (float scale)
#include "opencl/gegl-cl.h"
-static const char* kernel_source =
-"__kernel void vignette_cl (__global const float4 *in, \n"
-" __global float4 *out, \n"
-" float4 color, \n"
-" float scale, \n"
-" float cost, \n"
-" float sint, \n"
-" int roi_x, \n"
-" int roi_y, \n"
-" int midx, \n"
-" int midy, \n"
-" int o_shape, \n"
-" float gamma, \n"
-" float length, \n"
-" float radius0, \n"
-" float rdiff) \n"
-"{ \n"
-" int gidx = get_global_id(0); \n"
-" int gidy = get_global_id(1); \n"
-" int gid = gidx + gidy * get_global_size(0); \n"
-" float strength = 0.0f; \n"
-" float u,v,costy,sinty; \n"
-" int x,y; \n"
-" x = gidx + roi_x; \n"
-" y = gidy + roi_y; \n"
-" sinty = sint * (y-midy) - midx; \n"
-" costy = cost * (y-midy) + midy; \n"
-" \n"
-" u = cost * (x-midx) - sinty; \n"
-" v = sint * (x-midx) + costy; \n"
-" \n"
-" if (length == 0.0f) \n"
-" strength = 0.0f; \n"
-" else \n"
-" { \n"
-" switch (o_shape) \n"
-" { \n"
-" case 0: \n"
-" strength = hypot ((u-midx) / scale, v-midy); \n"
-" break; \n"
-" \n"
-" case 1: \n"
-" strength = fmax (fabs(u-midx)/scale, fabs(v-midy)); \n"
-" break; \n"
-" \n"
-" case 2: \n"
-" strength = fabs (u-midx) / scale + fabs(v-midy); \n"
-" break; \n"
-" } \n"
-" strength /= length; \n"
-" strength = (strength-radius0) / rdiff; \n"
-" } \n"
-" \n"
-" if (strength < 0.0f) strength = 0.0f; \n"
-" if (strength > 1.0f) strength = 1.0f; \n"
-" \n"
-" if (gamma > 0.9999f && gamma < 2.0001f) \n"
-" strength *= strength; \n"
-" else if (gamma != 1.0f) \n"
-" strength = pow(strength, gamma); \n"
-" \n"
-" out[gid] = in[gid]*(1.0f-strength) + color * strength; \n"
-"} \n";
+#include "opencl/vignette.cl.h"
static GeglClRunData * cl_data = NULL;
-static cl_int
+static gboolean
cl_process (GeglOperation *operation,
cl_mem in_tex,
cl_mem out_tex,
@@ -200,9 +138,9 @@ cl_process (GeglOperation *operation,
if (!cl_data)
{
const char *kernel_name[] = {"vignette_cl",NULL};
- cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+ cl_data = gegl_cl_compile_and_build (vignette_cl_source, kernel_name);
}
- if (!cl_data) return 1;
+ if (!cl_data) return TRUE;
{
const size_t gbl_size[2] = {roi->width, roi->height};
@@ -217,31 +155,48 @@ cl_process (GeglOperation *operation,
f_color.s[2] = color[2];
f_color.s[3] = color[3];
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float4),(void*)&f_color);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&scale);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float), (void*)&cost);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_float), (void*)&sint);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_int), (void*)&roi_x);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&roi_y);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 8, sizeof(cl_int), (void*)&midx);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 9, sizeof(cl_int), (void*)&midy);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 10, sizeof(cl_int), (void*)&shape);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 11, sizeof(cl_float), (void*)&gamma);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 12, sizeof(cl_float), (void*)&length);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 13, sizeof(cl_float), (void*)&radius0);
- cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 14, sizeof(cl_float), (void*)&rdiff);
- if (cl_err != CL_SUCCESS) return cl_err;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float4),(void*)&f_color);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&scale);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float), (void*)&cost);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_float), (void*)&sint);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_int), (void*)&roi_x);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&roi_y);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 8, sizeof(cl_int), (void*)&midx);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 9, sizeof(cl_int), (void*)&midy);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 10, sizeof(cl_int), (void*)&shape);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 11, sizeof(cl_float), (void*)&gamma);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 12, sizeof(cl_float), (void*)&length);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 13, sizeof(cl_float), (void*)&radius0);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 14, sizeof(cl_float), (void*)&rdiff);
+ CL_CHECK;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
cl_data->kernel[0], 2,
NULL, gbl_size, NULL,
0, NULL, NULL);
- if (cl_err != CL_SUCCESS) return cl_err;
+ CL_CHECK;
}
- return CL_SUCCESS;
+ return FALSE;
+
+error:
+ return TRUE;
}
static gboolean
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]