[gegl] opencl: many changes



commit a3b19bc8c3f5582818021453b24d269c0195e010
Author: Victor Oliveira <victormatheus gmail com>
Date:   Mon Jan 14 17:05:47 2013 -0200

    opencl: many changes
    
    - splitting kernels from remaining filters
    - removing unnecessary opencl barriers
    - stop using cl_int to report GEGL errors (just opencl ones)

 gegl/opencl/gegl-cl-color.c                    |    6 -
 gegl/operation/gegl-operation-point-composer.c |    6 +-
 gegl/operation/gegl-operation-point-composer.h |    2 +-
 gegl/operation/gegl-operation-point-filter.c   |    6 +-
 gegl/operation/gegl-operation-point-filter.h   |    2 +-
 opencl/levels.cl                               |   13 ++
 opencl/levels.cl.h                             |   15 ++
 opencl/motion-blur.cl                          |   75 +++++++++
 opencl/motion-blur.cl.h                        |   77 +++++++++
 opencl/noise-reduction.cl                      |   77 +++++++++
 opencl/noise-reduction.cl.h                    |   79 ++++++++++
 opencl/oilify.cl                               |  125 +++++++++++++++
 opencl/oilify.cl.h                             |  127 +++++++++++++++
 opencl/pixelize.cl                             |   48 ++++++
 opencl/pixelize.cl.h                           |   50 ++++++
 opencl/snn-mean.cl                             |  115 ++++++++++++++
 opencl/snn-mean.cl.h                           |  117 ++++++++++++++
 opencl/vignette.cl                             |   62 ++++++++
 opencl/vignette.cl.h                           |   64 ++++++++
 operations/common/bilateral-filter.c           |   46 +++---
 operations/common/box-blur.c                   |   49 ++++---
 operations/common/c2g.c                        |   92 +++++++-----
 operations/common/color-temperature.c          |   25 ++-
 operations/common/edge-laplace.c               |    3 -
 operations/common/gaussian-blur.c              |   98 +++++++-----
 operations/common/grey.c                       |    8 +-
 operations/common/levels.c                     |   46 +++---
 operations/common/motion-blur.c                |  151 +++++-------------
 operations/common/noise-reduction.c            |  148 +++++-------------
 operations/common/oilify.c                     |  199 +++++-------------------
 operations/common/opacity.c                    |    9 +-
 operations/common/pixelize.c                   |  137 +++++++----------
 operations/common/snn-mean.c                   |  160 +++----------------
 operations/common/vignette.c                   |  123 +++++----------
 34 files changed, 1505 insertions(+), 855 deletions(-)
---
diff --git a/gegl/opencl/gegl-cl-color.c b/gegl/opencl/gegl-cl-color.c
index aed1d77..3aa6c65 100644
--- a/gegl/opencl/gegl-cl-color.c
+++ b/gegl/opencl/gegl-cl-color.c
@@ -297,9 +297,6 @@ gegl_cl_color_conv (cl_mem         in_tex,
                                          in_tex, out_tex, 0, 0, size * s,
                                          0, NULL, NULL);
       CL_CHECK;
-
-      cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-      CL_CHECK;
     }
   else
     {
@@ -316,9 +313,6 @@ gegl_cl_color_conv (cl_mem         in_tex,
                                            NULL, &size, NULL,
                                            0, NULL, NULL);
       CL_CHECK;
-
-      cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-      CL_CHECK;
     }
 
   return FALSE;
diff --git a/gegl/operation/gegl-operation-point-composer.c b/gegl/operation/gegl-operation-point-composer.c
index 2ace71a..a95f381 100644
--- a/gegl/operation/gegl-operation-point-composer.c
+++ b/gegl/operation/gegl-operation-point-composer.c
@@ -189,7 +189,11 @@ gegl_operation_point_composer_cl_process (GeglOperation       *operation,
                 err = point_composer_class->cl_process(operation, i->tex[read][j],
                                                        (aux)? i->tex[foo][j] : NULL,
                                                        i->tex[0][j], i->size[0][j], &i->roi[0][j], level);
-                if (err) return FALSE;
+                if (err)
+                  {
+                    GEGL_NOTE (GEGL_DEBUG_OPENCL, "Error: %s", operation_class->name);
+                    return FALSE;
+                  }
               }
             else if (operation_class->cl_data)
               {
diff --git a/gegl/operation/gegl-operation-point-composer.h b/gegl/operation/gegl-operation-point-composer.h
index 9882877..5e5b0c2 100644
--- a/gegl/operation/gegl-operation-point-composer.h
+++ b/gegl/operation/gegl-operation-point-composer.h
@@ -58,7 +58,7 @@ struct _GeglOperationPointComposerClass
                         const GeglRectangle *roi,     /* rectangular region in output buffer */
                         gint                 level);
 
-  cl_int   (* cl_process) (GeglOperation       *self,
+  gboolean (* cl_process) (GeglOperation       *self,
                            cl_mem               in_tex,
                            cl_mem               aux_tex,
                            cl_mem               out_tex,
diff --git a/gegl/operation/gegl-operation-point-filter.c b/gegl/operation/gegl-operation-point-filter.c
index 6b5c0ac..b718708 100644
--- a/gegl/operation/gegl-operation-point-filter.c
+++ b/gegl/operation/gegl-operation-point-filter.c
@@ -115,7 +115,11 @@ gegl_operation_point_filter_cl_process (GeglOperation       *operation,
               {
                 err = point_filter_class->cl_process(operation, i->tex[read][j], i->tex[0][j],
                                                      i->size[0][j], &i->roi[0][j], level);
-                if (err) return FALSE;
+                if (err)
+                  {
+                    GEGL_NOTE (GEGL_DEBUG_OPENCL, "Error: %s", operation_class->name);
+                    return FALSE;
+                  }
               }
             else if (operation_class->cl_data)
               {
diff --git a/gegl/operation/gegl-operation-point-filter.h b/gegl/operation/gegl-operation-point-filter.h
index 938da7b..4fde7ef 100644
--- a/gegl/operation/gegl-operation-point-filter.h
+++ b/gegl/operation/gegl-operation-point-filter.h
@@ -59,7 +59,7 @@ struct _GeglOperationPointFilterClass
                                                         checkerboard op for
                                                         semantics */
                         gint                 level);
-  cl_int   (* cl_process) (GeglOperation       *self,
+  gboolean (* cl_process) (GeglOperation       *self,
                            cl_mem               in_tex,
                            cl_mem               out_tex,
                            size_t               global_worksize,
diff --git a/opencl/levels.cl b/opencl/levels.cl
new file mode 100644
index 0000000..80b91d2
--- /dev/null
+++ b/opencl/levels.cl
@@ -0,0 +1,13 @@
+__kernel void kernel_levels(__global const float4     *in,
+                            __global       float4     *out,
+                            float in_offset,
+                            float out_offset,
+                            float scale)
+{
+  int gid = get_global_id(0);
+  float4 in_v  = in[gid];
+  float4 out_v;
+  out_v.xyz = (in_v.xyz - in_offset) * scale + out_offset;
+  out_v.w   =  in_v.w;
+  out[gid]  =  out_v;
+}
diff --git a/opencl/levels.cl.h b/opencl/levels.cl.h
new file mode 100644
index 0000000..88e3876
--- /dev/null
+++ b/opencl/levels.cl.h
@@ -0,0 +1,15 @@
+static const char* levels_cl_source =
+"__kernel void kernel_levels(__global const float4     *in,                    \n"
+"                            __global       float4     *out,                   \n"
+"                            float in_offset,                                  \n"
+"                            float out_offset,                                 \n"
+"                            float scale)                                      \n"
+"{                                                                             \n"
+"  int gid = get_global_id(0);                                                 \n"
+"  float4 in_v  = in[gid];                                                     \n"
+"  float4 out_v;                                                               \n"
+"  out_v.xyz = (in_v.xyz - in_offset) * scale + out_offset;                    \n"
+"  out_v.w   =  in_v.w;                                                        \n"
+"  out[gid]  =  out_v;                                                         \n"
+"}                                                                             \n"
+;
diff --git a/opencl/motion-blur.cl b/opencl/motion-blur.cl
new file mode 100644
index 0000000..a4cd49c
--- /dev/null
+++ b/opencl/motion-blur.cl
@@ -0,0 +1,75 @@
+int CLAMP(int val,int lo,int hi)
+{
+    return (val < lo) ? lo : ((hi < val) ? hi : val);
+}
+
+float4 get_pixel_color(const __global float4 *in_buf,
+                       int     rect_width,
+                       int     rect_height,
+                       int     rect_x,
+                       int     rect_y,
+                       int     x,
+                       int     y)
+{
+    int ix = x - rect_x;
+    int iy = y - rect_y;
+
+    ix = CLAMP(ix, 0, rect_width-1);
+    iy = CLAMP(iy, 0, rect_height-1);
+
+    return in_buf[iy * rect_width + ix];
+}
+
+__kernel void motion_blur(const __global float4 *src_buf,
+                          int     src_width,
+                          int     src_height,
+                          int     src_x,
+                          int     src_y,
+                          __global float4 *dst_buf,
+                          int     dst_x,
+                          int     dst_y,
+                          int     num_steps,
+                          float   offset_x,
+                          float   offset_y)
+{
+    int gidx = get_global_id(0);
+    int gidy = get_global_id(1);
+
+    float4 sum = 0.0f;
+    int px = gidx + dst_x;
+    int py = gidy + dst_y;
+
+    for(int step = 0; step < num_steps; ++step)
+    {
+        float t = num_steps == 1 ? 0.0f :
+            step / (float)(num_steps - 1) - 0.5f;
+
+        float xx = px + t * offset_x;
+        float yy = py + t * offset_y;
+
+        int   ix = (int)floor(xx);
+        int   iy = (int)floor(yy);
+
+        float dx = xx - floor(xx);
+        float dy = yy - floor(yy);
+
+        float4 mixy0,mixy1,pix0,pix1,pix2,pix3;
+
+        pix0 = get_pixel_color(src_buf, src_width,
+            src_height, src_x, src_y, ix,   iy);
+        pix1 = get_pixel_color(src_buf, src_width,
+            src_height, src_x, src_y, ix+1, iy);
+        pix2 = get_pixel_color(src_buf, src_width,
+            src_height, src_x, src_y, ix,   iy+1);
+        pix3 = get_pixel_color(src_buf, src_width,
+            src_height, src_x, src_y, ix+1, iy+1);
+
+        mixy0 = dy * (pix2 - pix0) + pix0;
+        mixy1 = dy * (pix3 - pix1) + pix1;
+
+        sum  += dx * (mixy1 - mixy0) + mixy0;
+    }
+
+    dst_buf[gidy * get_global_size(0) + gidx] =
+        sum / num_steps;
+}
diff --git a/opencl/motion-blur.cl.h b/opencl/motion-blur.cl.h
new file mode 100644
index 0000000..807fdeb
--- /dev/null
+++ b/opencl/motion-blur.cl.h
@@ -0,0 +1,77 @@
+static const char* motion_blur_cl_source =
+"int CLAMP(int val,int lo,int hi)                                              \n"
+"{                                                                             \n"
+"    return (val < lo) ? lo : ((hi < val) ? hi : val);                         \n"
+"}                                                                             \n"
+"                                                                              \n"
+"float4 get_pixel_color(const __global float4 *in_buf,                         \n"
+"                       int     rect_width,                                    \n"
+"                       int     rect_height,                                   \n"
+"                       int     rect_x,                                        \n"
+"                       int     rect_y,                                        \n"
+"                       int     x,                                             \n"
+"                       int     y)                                             \n"
+"{                                                                             \n"
+"    int ix = x - rect_x;                                                      \n"
+"    int iy = y - rect_y;                                                      \n"
+"                                                                              \n"
+"    ix = CLAMP(ix, 0, rect_width-1);                                          \n"
+"    iy = CLAMP(iy, 0, rect_height-1);                                         \n"
+"                                                                              \n"
+"    return in_buf[iy * rect_width + ix];                                      \n"
+"}                                                                             \n"
+"                                                                              \n"
+"__kernel void motion_blur(const __global float4 *src_buf,                     \n"
+"                          int     src_width,                                  \n"
+"                          int     src_height,                                 \n"
+"                          int     src_x,                                      \n"
+"                          int     src_y,                                      \n"
+"                          __global float4 *dst_buf,                           \n"
+"                          int     dst_x,                                      \n"
+"                          int     dst_y,                                      \n"
+"                          int     num_steps,                                  \n"
+"                          float   offset_x,                                   \n"
+"                          float   offset_y)                                   \n"
+"{                                                                             \n"
+"    int gidx = get_global_id(0);                                              \n"
+"    int gidy = get_global_id(1);                                              \n"
+"                                                                              \n"
+"    float4 sum = 0.0f;                                                        \n"
+"    int px = gidx + dst_x;                                                    \n"
+"    int py = gidy + dst_y;                                                    \n"
+"                                                                              \n"
+"    for(int step = 0; step < num_steps; ++step)                               \n"
+"    {                                                                         \n"
+"        float t = num_steps == 1 ? 0.0f :                                     \n"
+"            step / (float)(num_steps - 1) - 0.5f;                             \n"
+"                                                                              \n"
+"        float xx = px + t * offset_x;                                         \n"
+"        float yy = py + t * offset_y;                                         \n"
+"                                                                              \n"
+"        int   ix = (int)floor(xx);                                            \n"
+"        int   iy = (int)floor(yy);                                            \n"
+"                                                                              \n"
+"        float dx = xx - floor(xx);                                            \n"
+"        float dy = yy - floor(yy);                                            \n"
+"                                                                              \n"
+"        float4 mixy0,mixy1,pix0,pix1,pix2,pix3;                               \n"
+"                                                                              \n"
+"        pix0 = get_pixel_color(src_buf, src_width,                            \n"
+"            src_height, src_x, src_y, ix,   iy);                              \n"
+"        pix1 = get_pixel_color(src_buf, src_width,                            \n"
+"            src_height, src_x, src_y, ix+1, iy);                              \n"
+"        pix2 = get_pixel_color(src_buf, src_width,                            \n"
+"            src_height, src_x, src_y, ix,   iy+1);                            \n"
+"        pix3 = get_pixel_color(src_buf, src_width,                            \n"
+"            src_height, src_x, src_y, ix+1, iy+1);                            \n"
+"                                                                              \n"
+"        mixy0 = dy * (pix2 - pix0) + pix0;                                    \n"
+"        mixy1 = dy * (pix3 - pix1) + pix1;                                    \n"
+"                                                                              \n"
+"        sum  += dx * (mixy1 - mixy0) + mixy0;                                 \n"
+"    }                                                                         \n"
+"                                                                              \n"
+"    dst_buf[gidy * get_global_size(0) + gidx] =                               \n"
+"        sum / num_steps;                                                      \n"
+"}                                                                             \n"
+;
diff --git a/opencl/noise-reduction.cl b/opencl/noise-reduction.cl
new file mode 100644
index 0000000..312087b
--- /dev/null
+++ b/opencl/noise-reduction.cl
@@ -0,0 +1,77 @@
+#define NEIGHBOURS 8
+#define AXES       (NEIGHBOURS/2)
+
+#define POW2(a) ((a)*(a))
+
+#define GEN_METRIC(before, center, after) POW2((center) * 2 - (before) - (after))
+
+#define BAIL_CONDITION(new,original) ((new) < (original))
+
+#define SYMMETRY(a)  (NEIGHBOURS - (a) - 1)
+
+#define O(u,v) (((u)+((v) * (src_stride))))
+
+__kernel void noise_reduction_cl (__global       float4 *src_buf,
+                                  int src_stride,
+                                  __global       float4 *dst_buf,
+                                  int dst_stride)
+{
+    int gidx = get_global_id(0);
+    int gidy = get_global_id(1);
+
+    __global float4 *center_pix = src_buf + (gidy + 1) * src_stride + gidx + 1;
+    int dst_offset = dst_stride * gidy + gidx;
+
+    int offsets[NEIGHBOURS] = {
+        O(-1, -1), O( 0, -1), O( 1, -1),
+        O(-1,  0),            O( 1,  0),
+        O(-1,  1), O( 0,  1), O( 1,  1)
+    };
+
+    float4 sum;
+    int4   count;
+    float4 cur;
+    float4 metric_reference[AXES];
+
+    for (int axis = 0; axis < AXES; axis++)
+      {
+        float4 before_pix = *(center_pix + offsets[axis]);
+        float4 after_pix  = *(center_pix + offsets[SYMMETRY(axis)]);
+        metric_reference[axis] = GEN_METRIC (before_pix, *center_pix, after_pix);
+      }
+
+    cur = sum = *center_pix;
+    count = 1;
+
+    for (int direction = 0; direction < NEIGHBOURS; direction++)
+      {
+        float4 pix   = *(center_pix + offsets[direction]);
+        float4 value = (pix + cur) * (0.5f);
+        int    axis;
+        int4   mask = {1, 1, 1, 0};
+
+        for (axis = 0; axis < AXES; axis++)
+          {
+            float4 before_pix = *(center_pix + offsets[axis]);
+            float4 after_pix  = *(center_pix + offsets[SYMMETRY(axis)]);
+
+            float4 metric_new = GEN_METRIC (before_pix,
+                                            value,
+                                            after_pix);
+            mask = BAIL_CONDITION (metric_new, metric_reference[axis]) & mask;
+          }
+        sum   += mask >0 ? value : 0;
+        count += mask >0 ? 1     : 0;
+      }
+    dst_buf[dst_offset]   = (sum/convert_float4(count));
+    dst_buf[dst_offset].w = cur.w;
+}
+__kernel void transfer(__global float4 * in,
+              int               in_width,
+              __global float4 * out)
+{
+    int gidx = get_global_id(0);
+    int gidy = get_global_id(1);
+    int width = get_global_size(0);
+    out[gidy * width + gidx] = in[gidy * in_width + gidx];
+}
diff --git a/opencl/noise-reduction.cl.h b/opencl/noise-reduction.cl.h
new file mode 100644
index 0000000..969fe2a
--- /dev/null
+++ b/opencl/noise-reduction.cl.h
@@ -0,0 +1,79 @@
+static const char* noise_reduction_cl_source =
+"#define NEIGHBOURS 8                                                          \n"
+"#define AXES       (NEIGHBOURS/2)                                             \n"
+"                                                                              \n"
+"#define POW2(a) ((a)*(a))                                                     \n"
+"                                                                              \n"
+"#define GEN_METRIC(before, center, after) POW2((center) * 2 - (before) - (after))\n"
+"                                                                              \n"
+"#define BAIL_CONDITION(new,original) ((new) < (original))                     \n"
+"                                                                              \n"
+"#define SYMMETRY(a)  (NEIGHBOURS - (a) - 1)                                   \n"
+"                                                                              \n"
+"#define O(u,v) (((u)+((v) * (src_stride))))                                   \n"
+"                                                                              \n"
+"__kernel void noise_reduction_cl (__global       float4 *src_buf,             \n"
+"                                  int src_stride,                             \n"
+"                                  __global       float4 *dst_buf,             \n"
+"                                  int dst_stride)                             \n"
+"{                                                                             \n"
+"    int gidx = get_global_id(0);                                              \n"
+"    int gidy = get_global_id(1);                                              \n"
+"                                                                              \n"
+"    __global float4 *center_pix = src_buf + (gidy + 1) * src_stride + gidx + 1;\n"
+"    int dst_offset = dst_stride * gidy + gidx;                                \n"
+"                                                                              \n"
+"    int offsets[NEIGHBOURS] = {                                               \n"
+"        O(-1, -1), O( 0, -1), O( 1, -1),                                      \n"
+"        O(-1,  0),            O( 1,  0),                                      \n"
+"        O(-1,  1), O( 0,  1), O( 1,  1)                                       \n"
+"    };                                                                        \n"
+"                                                                              \n"
+"    float4 sum;                                                               \n"
+"    int4   count;                                                             \n"
+"    float4 cur;                                                               \n"
+"    float4 metric_reference[AXES];                                            \n"
+"                                                                              \n"
+"    for (int axis = 0; axis < AXES; axis++)                                   \n"
+"      {                                                                       \n"
+"        float4 before_pix = *(center_pix + offsets[axis]);                    \n"
+"        float4 after_pix  = *(center_pix + offsets[SYMMETRY(axis)]);          \n"
+"        metric_reference[axis] = GEN_METRIC (before_pix, *center_pix, after_pix);\n"
+"      }                                                                       \n"
+"                                                                              \n"
+"    cur = sum = *center_pix;                                                  \n"
+"    count = 1;                                                                \n"
+"                                                                              \n"
+"    for (int direction = 0; direction < NEIGHBOURS; direction++)              \n"
+"      {                                                                       \n"
+"        float4 pix   = *(center_pix + offsets[direction]);                    \n"
+"        float4 value = (pix + cur) * (0.5f);                                  \n"
+"        int    axis;                                                          \n"
+"        int4   mask = {1, 1, 1, 0};                                           \n"
+"                                                                              \n"
+"        for (axis = 0; axis < AXES; axis++)                                   \n"
+"          {                                                                   \n"
+"            float4 before_pix = *(center_pix + offsets[axis]);                \n"
+"            float4 after_pix  = *(center_pix + offsets[SYMMETRY(axis)]);      \n"
+"                                                                              \n"
+"            float4 metric_new = GEN_METRIC (before_pix,                       \n"
+"                                            value,                            \n"
+"                                            after_pix);                       \n"
+"            mask = BAIL_CONDITION (metric_new, metric_reference[axis]) & mask;\n"
+"          }                                                                   \n"
+"        sum   += mask >0 ? value : 0;                                         \n"
+"        count += mask >0 ? 1     : 0;                                         \n"
+"      }                                                                       \n"
+"    dst_buf[dst_offset]   = (sum/convert_float4(count));                      \n"
+"    dst_buf[dst_offset].w = cur.w;                                            \n"
+"}                                                                             \n"
+"__kernel void transfer(__global float4 * in,                                  \n"
+"              int               in_width,                                     \n"
+"              __global float4 * out)                                          \n"
+"{                                                                             \n"
+"    int gidx = get_global_id(0);                                              \n"
+"    int gidy = get_global_id(1);                                              \n"
+"    int width = get_global_size(0);                                           \n"
+"    out[gidy * width + gidx] = in[gidy * in_width + gidx];                    \n"
+"}                                                                             \n"
+;
diff --git a/opencl/oilify.cl b/opencl/oilify.cl
new file mode 100644
index 0000000..29237bf
--- /dev/null
+++ b/opencl/oilify.cl
@@ -0,0 +1,125 @@
+/* two small different kernels are better than one big */
+
+#define NUM_INTENSITIES 256
+
+kernel void kernel_oilify(global float4 *in,
+                             global float4 *out,
+                             const int mask_radius,
+                             const int intensities,
+                             const float exponent)
+{
+  int gidx = get_global_id(0);
+  int gidy = get_global_id(1);
+  int x = gidx + mask_radius;
+  int y = gidy + mask_radius;
+  int dst_width = get_global_size(0);
+  int src_width = dst_width + mask_radius * 2;
+  float4 hist[NUM_INTENSITIES];
+  float4 hist_max = 1.0;
+  int i, j, intensity;
+  int radius_sq = mask_radius * mask_radius;
+  float4 temp_pixel;
+  for (i = 0; i < intensities; i++)
+    hist[i] = 0.0;
+
+  for (i = -mask_radius; i <= mask_radius; i++)
+  {
+    for (j = -mask_radius; j <= mask_radius; j++)
+      {
+        if (i*i + j*j <= radius_sq)
+          {
+            temp_pixel = in[x + i + (y + j) * src_width];
+            hist[(int)(temp_pixel.x * (intensities - 1))].x+=1;
+            hist[(int)(temp_pixel.y * (intensities - 1))].y+=1;
+            hist[(int)(temp_pixel.z * (intensities - 1))].z+=1;
+            hist[(int)(temp_pixel.w * (intensities - 1))].w+=1;
+          }
+      }
+  }
+
+  for (i = 0; i < intensities; i++) {
+    if(hist_max.x < hist[i].x)
+      hist_max.x = hist[i].x;
+    if(hist_max.y < hist[i].y)
+      hist_max.y = hist[i].y;
+    if(hist_max.z < hist[i].z)
+      hist_max.z = hist[i].z;
+    if(hist_max.w < hist[i].w)
+      hist_max.w = hist[i].w;
+  }
+  float4 div = 0.0;
+  float4 sum = 0.0;
+  float4 ratio, weight;
+  for (i = 0; i < intensities; i++)
+  {
+    ratio = hist[i] / hist_max;
+    weight = pow(ratio, (float4)exponent);
+    sum += weight * (float4)i;
+    div += weight;
+  }
+  out[gidx + gidy * dst_width] = sum / div / (float)(intensities - 1);
+}
+
+kernel void kernel_oilify_inten(global float4 *in,
+                             global float4 *out,
+                             const int mask_radius,
+                             const int intensities,
+                             const float exponent)
+{
+  int gidx = get_global_id(0);
+  int gidy = get_global_id(1);
+  int x = gidx + mask_radius;
+  int y = gidy + mask_radius;
+  int dst_width = get_global_size(0);
+  int src_width = dst_width + mask_radius * 2;
+  float4 cumulative_rgb[NUM_INTENSITIES];
+  int hist_inten[NUM_INTENSITIES], inten_max;
+  int i, j, intensity;
+  int radius_sq = mask_radius * mask_radius;
+  float4 temp_pixel;
+  for (i = 0; i < intensities; i++)
+  {
+    hist_inten[i] = 0;
+    cumulative_rgb[i] = 0.0;
+  }
+  for (i = -mask_radius; i <= mask_radius; i++)
+  {
+    for (j = -mask_radius; j <= mask_radius; j++)
+      {
+        if (i*i + j*j <= radius_sq)
+          {
+            temp_pixel = in[x + i + (y + j) * src_width];
+            /*Calculate intensity on the fly, GPU does it fast*/
+            intensity = (int)((0.299 * temp_pixel.x
+                      +0.587 * temp_pixel.y
+                      +0.114 * temp_pixel.z) * (float)(intensities-1));
+            hist_inten[intensity] += 1;
+            cumulative_rgb[intensity] += temp_pixel;
+          }
+      }
+  }
+  inten_max = 1;
+
+  /* calculated maximums */
+  for (i = 0; i < intensities; i++) {
+    if(hist_inten[i] > inten_max)
+      inten_max = hist_inten[i];
+  }
+  float div = 0.0;
+  float ratio, weight, mult_inten;
+
+  float4 color = 0.0;
+  for (i = 0; i < intensities; i++)
+  {
+    if (hist_inten[i] > 0)
+    {
+      ratio = (float)(hist_inten[i]) / (float)(inten_max);
+      weight = pow(ratio, exponent);
+      mult_inten = weight / (float)(hist_inten[i]);
+
+      div += weight;
+      color += mult_inten * cumulative_rgb[i];
+    }
+  }
+  out[gidx + gidy * dst_width] = color/div;
+}
diff --git a/opencl/oilify.cl.h b/opencl/oilify.cl.h
new file mode 100644
index 0000000..b1d3f11
--- /dev/null
+++ b/opencl/oilify.cl.h
@@ -0,0 +1,127 @@
+static const char* oilify_cl_source =
+"/* two small different kernels are better than one big */                     \n"
+"                                                                              \n"
+"#define NUM_INTENSITIES 256                                                   \n"
+"                                                                              \n"
+"kernel void kernel_oilify(global float4 *in,                                  \n"
+"                             global float4 *out,                              \n"
+"                             const int mask_radius,                           \n"
+"                             const int intensities,                           \n"
+"                             const float exponent)                            \n"
+"{                                                                             \n"
+"  int gidx = get_global_id(0);                                                \n"
+"  int gidy = get_global_id(1);                                                \n"
+"  int x = gidx + mask_radius;                                                 \n"
+"  int y = gidy + mask_radius;                                                 \n"
+"  int dst_width = get_global_size(0);                                         \n"
+"  int src_width = dst_width + mask_radius * 2;                                \n"
+"  float4 hist[NUM_INTENSITIES];                                               \n"
+"  float4 hist_max = 1.0;                                                      \n"
+"  int i, j, intensity;                                                        \n"
+"  int radius_sq = mask_radius * mask_radius;                                  \n"
+"  float4 temp_pixel;                                                          \n"
+"  for (i = 0; i < intensities; i++)                                           \n"
+"    hist[i] = 0.0;                                                            \n"
+"                                                                              \n"
+"  for (i = -mask_radius; i <= mask_radius; i++)                               \n"
+"  {                                                                           \n"
+"    for (j = -mask_radius; j <= mask_radius; j++)                             \n"
+"      {                                                                       \n"
+"        if (i*i + j*j <= radius_sq)                                           \n"
+"          {                                                                   \n"
+"            temp_pixel = in[x + i + (y + j) * src_width];                     \n"
+"            hist[(int)(temp_pixel.x * (intensities - 1))].x+=1;               \n"
+"            hist[(int)(temp_pixel.y * (intensities - 1))].y+=1;               \n"
+"            hist[(int)(temp_pixel.z * (intensities - 1))].z+=1;               \n"
+"            hist[(int)(temp_pixel.w * (intensities - 1))].w+=1;               \n"
+"          }                                                                   \n"
+"      }                                                                       \n"
+"  }                                                                           \n"
+"                                                                              \n"
+"  for (i = 0; i < intensities; i++) {                                         \n"
+"    if(hist_max.x < hist[i].x)                                                \n"
+"      hist_max.x = hist[i].x;                                                 \n"
+"    if(hist_max.y < hist[i].y)                                                \n"
+"      hist_max.y = hist[i].y;                                                 \n"
+"    if(hist_max.z < hist[i].z)                                                \n"
+"      hist_max.z = hist[i].z;                                                 \n"
+"    if(hist_max.w < hist[i].w)                                                \n"
+"      hist_max.w = hist[i].w;                                                 \n"
+"  }                                                                           \n"
+"  float4 div = 0.0;                                                           \n"
+"  float4 sum = 0.0;                                                           \n"
+"  float4 ratio, weight;                                                       \n"
+"  for (i = 0; i < intensities; i++)                                           \n"
+"  {                                                                           \n"
+"    ratio = hist[i] / hist_max;                                               \n"
+"    weight = pow(ratio, (float4)exponent);                                    \n"
+"    sum += weight * (float4)i;                                                \n"
+"    div += weight;                                                            \n"
+"  }                                                                           \n"
+"  out[gidx + gidy * dst_width] = sum / div / (float)(intensities - 1);        \n"
+"}                                                                             \n"
+"                                                                              \n"
+"kernel void kernel_oilify_inten(global float4 *in,                            \n"
+"                             global float4 *out,                              \n"
+"                             const int mask_radius,                           \n"
+"                             const int intensities,                           \n"
+"                             const float exponent)                            \n"
+"{                                                                             \n"
+"  int gidx = get_global_id(0);                                                \n"
+"  int gidy = get_global_id(1);                                                \n"
+"  int x = gidx + mask_radius;                                                 \n"
+"  int y = gidy + mask_radius;                                                 \n"
+"  int dst_width = get_global_size(0);                                         \n"
+"  int src_width = dst_width + mask_radius * 2;                                \n"
+"  float4 cumulative_rgb[NUM_INTENSITIES];                                     \n"
+"  int hist_inten[NUM_INTENSITIES], inten_max;                                 \n"
+"  int i, j, intensity;                                                        \n"
+"  int radius_sq = mask_radius * mask_radius;                                  \n"
+"  float4 temp_pixel;                                                          \n"
+"  for (i = 0; i < intensities; i++)                                           \n"
+"  {                                                                           \n"
+"    hist_inten[i] = 0;                                                        \n"
+"    cumulative_rgb[i] = 0.0;                                                  \n"
+"  }                                                                           \n"
+"  for (i = -mask_radius; i <= mask_radius; i++)                               \n"
+"  {                                                                           \n"
+"    for (j = -mask_radius; j <= mask_radius; j++)                             \n"
+"      {                                                                       \n"
+"        if (i*i + j*j <= radius_sq)                                           \n"
+"          {                                                                   \n"
+"            temp_pixel = in[x + i + (y + j) * src_width];                     \n"
+"            /*Calculate intensity on the fly, GPU does it fast*/              \n"
+"            intensity = (int)((0.299 * temp_pixel.x                           \n"
+"                      +0.587 * temp_pixel.y                                   \n"
+"                      +0.114 * temp_pixel.z) * (float)(intensities-1));       \n"
+"            hist_inten[intensity] += 1;                                       \n"
+"            cumulative_rgb[intensity] += temp_pixel;                          \n"
+"          }                                                                   \n"
+"      }                                                                       \n"
+"  }                                                                           \n"
+"  inten_max = 1;                                                              \n"
+"                                                                              \n"
+"  /* calculated maximums */                                                   \n"
+"  for (i = 0; i < intensities; i++) {                                         \n"
+"    if(hist_inten[i] > inten_max)                                             \n"
+"      inten_max = hist_inten[i];                                              \n"
+"  }                                                                           \n"
+"  float div = 0.0;                                                            \n"
+"  float ratio, weight, mult_inten;                                            \n"
+"                                                                              \n"
+"  float4 color = 0.0;                                                         \n"
+"  for (i = 0; i < intensities; i++)                                           \n"
+"  {                                                                           \n"
+"    if (hist_inten[i] > 0)                                                    \n"
+"    {                                                                         \n"
+"      ratio = (float)(hist_inten[i]) / (float)(inten_max);                    \n"
+"      weight = pow(ratio, exponent);                                          \n"
+"      mult_inten = weight / (float)(hist_inten[i]);                           \n"
+"                                                                              \n"
+"      div += weight;                                                          \n"
+"      color += mult_inten * cumulative_rgb[i];                                \n"
+"    }                                                                         \n"
+"  }                                                                           \n"
+"  out[gidx + gidy * dst_width] = color/div;                                   \n"
+"}                                                                             \n"
+;
diff --git a/opencl/pixelize.cl b/opencl/pixelize.cl
new file mode 100644
index 0000000..2e22476
--- /dev/null
+++ b/opencl/pixelize.cl
@@ -0,0 +1,48 @@
+__kernel void calc_block_color(__global float4 *in,
+                             __global float4 *out,
+                             int xsize,
+                             int ysize,
+                             int roi_x,
+                             int roi_y,
+                             int line_width,
+                             int block_count_x )
+{
+    int gidx = get_global_id(0);
+    int gidy = get_global_id(1);
+    int cx = roi_x / xsize + gidx;
+    int cy = roi_y / ysize + gidy;
+
+    float weight   = 1.0f / (xsize * ysize);
+
+    int px = cx * xsize + xsize - roi_x;
+    int py = cy * ysize + ysize - roi_y;
+
+    int i,j;
+    float4 col = 0.0f;
+    for (j = py;j < py + ysize; ++j)
+    {
+        for (i = px;i < px + xsize; ++i)
+        {
+            col += in[j * line_width + i];
+        }
+    }
+    out[gidy * block_count_x + gidx] = col * weight;
+
+}
+
+__kernel void kernel_pixelise (__global float4 *in,
+                             __global float4 *out,
+                             int xsize,
+                             int ysize,
+                             int roi_x,
+                             int roi_y,
+                             int block_count_x)
+{
+    int gidx = get_global_id(0);
+    int gidy = get_global_id(1);
+
+    int src_width  = get_global_size(0);
+    int cx = (gidx + roi_x) / xsize - roi_x / xsize;
+    int cy = (gidy + roi_y) / ysize - roi_y / ysize;
+    out[gidx + gidy * src_width] = in[cx + cy * block_count_x];
+}
diff --git a/opencl/pixelize.cl.h b/opencl/pixelize.cl.h
new file mode 100644
index 0000000..3715450
--- /dev/null
+++ b/opencl/pixelize.cl.h
@@ -0,0 +1,50 @@
+static const char* pixelize_cl_source =
+"__kernel void calc_block_color(__global float4 *in,                           \n"
+"                             __global float4 *out,                            \n"
+"                             int xsize,                                       \n"
+"                             int ysize,                                       \n"
+"                             int roi_x,                                       \n"
+"                             int roi_y,                                       \n"
+"                             int line_width,                                  \n"
+"                             int block_count_x )                              \n"
+"{                                                                             \n"
+"    int gidx = get_global_id(0);                                              \n"
+"    int gidy = get_global_id(1);                                              \n"
+"    int cx = roi_x / xsize + gidx;                                            \n"
+"    int cy = roi_y / ysize + gidy;                                            \n"
+"                                                                              \n"
+"    float weight   = 1.0f / (xsize * ysize);                                  \n"
+"                                                                              \n"
+"    int px = cx * xsize + xsize - roi_x;                                      \n"
+"    int py = cy * ysize + ysize - roi_y;                                      \n"
+"                                                                              \n"
+"    int i,j;                                                                  \n"
+"    float4 col = 0.0f;                                                        \n"
+"    for (j = py;j < py + ysize; ++j)                                          \n"
+"    {                                                                         \n"
+"        for (i = px;i < px + xsize; ++i)                                      \n"
+"        {                                                                     \n"
+"            col += in[j * line_width + i];                                    \n"
+"        }                                                                     \n"
+"    }                                                                         \n"
+"    out[gidy * block_count_x + gidx] = col * weight;                          \n"
+"                                                                              \n"
+"}                                                                             \n"
+"                                                                              \n"
+"__kernel void kernel_pixelise (__global float4 *in,                           \n"
+"                             __global float4 *out,                            \n"
+"                             int xsize,                                       \n"
+"                             int ysize,                                       \n"
+"                             int roi_x,                                       \n"
+"                             int roi_y,                                       \n"
+"                             int block_count_x)                               \n"
+"{                                                                             \n"
+"    int gidx = get_global_id(0);                                              \n"
+"    int gidy = get_global_id(1);                                              \n"
+"                                                                              \n"
+"    int src_width  = get_global_size(0);                                      \n"
+"    int cx = (gidx + roi_x) / xsize - roi_x / xsize;                          \n"
+"    int cy = (gidy + roi_y) / ysize - roi_y / ysize;                          \n"
+"    out[gidx + gidy * src_width] = in[cx + cy * block_count_x];               \n"
+"}                                                                             \n"
+;
diff --git a/opencl/snn-mean.cl b/opencl/snn-mean.cl
new file mode 100644
index 0000000..9802b46
--- /dev/null
+++ b/opencl/snn-mean.cl
@@ -0,0 +1,115 @@
+float colordiff (float4 pixA,
+                 float4 pixB)
+{
+    float4 pix = pixA-pixB;
+    pix *= pix;
+    return pix.x+pix.y+pix.z;
+}
+
+__kernel void snn_mean (__global const   float4 *src_buf,
+                                         int src_width,
+                                         int src_height,
+                        __global         float4 *dst_buf,
+                                         int radius,
+                                         int pairs)
+{
+    int gidx   =get_global_id(0);
+    int gidy   =get_global_id(1);
+    int offset =gidy * get_global_size(0) + gidx;
+
+    __global const float4 *center_pix=
+        src_buf + ((radius+gidx) + (gidy+radius)* src_width);
+    float4 accumulated=0;
+
+    int count=0;
+    if(pairs==2)
+    {
+        for(int i=-radius;i<0;i++)
+        {
+            for(int j=-radius;j<0;j++)
+            {
+                __global const float4 *selected_pix = center_pix;
+                float  best_diff = 1000.0f;
+
+                    int xs[4]={
+                        gidx+j+radius, gidx-j+radius,
+                        gidx-j+radius, gidx+j+radius
+                    };
+                    int ys[4]={
+                        gidy+i+radius, gidy-i+radius,
+                        gidy+i+radius, gidy-i+radius};
+
+                    for (int k=0;k<4;k++)
+                    {
+                        if (xs[k] >= 0 && xs[k] < src_width &&
+                            ys[k] >= 0 && ys[k] < src_height)
+                        {
+                            __global const float4 *tpix =
+                                src_buf + (xs[k] + ys[k] * src_width);
+                            float diff=colordiff(*tpix, *center_pix);
+                            if (diff < best_diff)
+                            {
+                                best_diff = diff;
+                                selected_pix = tpix;
+                            }
+                        }
+                    }
+
+                accumulated += *selected_pix;
+
+                ++count;
+                if (i==0 && j==0)
+                    break;
+            }
+        }
+        dst_buf[offset] = accumulated/count;
+        return;
+    }
+    else if(pairs==1)
+    {
+        for(int i=-radius;i<=0;i++)
+        {
+            for(int j=-radius;j<=radius;j++)
+            {
+                __global const float4 *selected_pix = center_pix;
+                float  best_diff = 1000.0f;
+
+                /* skip computations for the center pixel */
+                if (i != 0 && j != 0)
+                {
+                    int xs[4]={
+                        gidx+i+radius, gidx-i+radius,
+                        gidx-i+radius, gidx+i+radius
+                    };
+                    int ys[4]={
+                        gidy+j+radius, gidy-j+radius,
+                        gidy+j+radius, gidy-j+radius
+                    };
+
+                    for (i=0;i<2;i++)
+                    {
+                        if (xs[i] >= 0 && xs[i] < src_width &&
+                            ys[i] >= 0 && ys[i] < src_height)
+                        {
+                            __global const float4 *tpix =
+                                src_buf + (xs[i] + ys[i] * src_width);
+                            float diff=colordiff (*tpix, *center_pix);
+                            if (diff < best_diff)
+                            {
+                                best_diff = diff;
+                                selected_pix = tpix;
+                            }
+                        }
+                    }
+                }
+                accumulated += *selected_pix;
+                ++count;
+                if (i==0 && j==0)
+                    break;
+            }
+        }
+        dst_buf[offset] = accumulated/count;
+        return;
+    }
+    return;
+}
diff --git a/opencl/snn-mean.cl.h b/opencl/snn-mean.cl.h
new file mode 100644
index 0000000..21b4935
--- /dev/null
+++ b/opencl/snn-mean.cl.h
@@ -0,0 +1,117 @@
+static const char* snn_mean_cl_source =
+"float colordiff (float4 pixA,                                                 \n"
+"                 float4 pixB)                                                 \n"
+"{                                                                             \n"
+"    float4 pix = pixA-pixB;                                                   \n"
+"    pix *= pix;                                                               \n"
+"    return pix.x+pix.y+pix.z;                                                 \n"
+"}                                                                             \n"
+"                                                                              \n"
+"__kernel void snn_mean (__global const   float4 *src_buf,                     \n"
+"                                         int src_width,                       \n"
+"                                         int src_height,                      \n"
+"                        __global         float4 *dst_buf,                     \n"
+"                                         int radius,                          \n"
+"                                         int pairs)                           \n"
+"{                                                                             \n"
+"    int gidx   =get_global_id(0);                                             \n"
+"    int gidy   =get_global_id(1);                                             \n"
+"    int offset =gidy * get_global_size(0) + gidx;                             \n"
+"                                                                              \n"
+"    __global const float4 *center_pix=                                        \n"
+"        src_buf + ((radius+gidx) + (gidy+radius)* src_width);                 \n"
+"    float4 accumulated=0;                                                     \n"
+"                                                                              \n"
+"    int count=0;                                                              \n"
+"    if(pairs==2)                                                              \n"
+"    {                                                                         \n"
+"        for(int i=-radius;i<0;i++)                                            \n"
+"        {                                                                     \n"
+"            for(int j=-radius;j<0;j++)                                        \n"
+"            {                                                                 \n"
+"                __global const float4 *selected_pix = center_pix;             \n"
+"                float  best_diff = 1000.0f;                                   \n"
+"                                                                              \n"
+"                    int xs[4]={                                               \n"
+"                        gidx+j+radius, gidx-j+radius,                         \n"
+"                        gidx-j+radius, gidx+j+radius                          \n"
+"                    };                                                        \n"
+"                    int ys[4]={                                               \n"
+"                        gidy+i+radius, gidy-i+radius,                         \n"
+"                        gidy+i+radius, gidy-i+radius};                        \n"
+"                                                                              \n"
+"                    for (int k=0;k<4;k++)                                     \n"
+"                    {                                                         \n"
+"                        if (xs[k] >= 0 && xs[k] < src_width &&                \n"
+"                            ys[k] >= 0 && ys[k] < src_height)                 \n"
+"                        {                                                     \n"
+"                            __global const float4 *tpix =                     \n"
+"                                src_buf + (xs[k] + ys[k] * src_width);        \n"
+"                            float diff=colordiff(*tpix, *center_pix);         \n"
+"                            if (diff < best_diff)                             \n"
+"                            {                                                 \n"
+"                                best_diff = diff;                             \n"
+"                                selected_pix = tpix;                          \n"
+"                            }                                                 \n"
+"                        }                                                     \n"
+"                    }                                                         \n"
+"                                                                              \n"
+"                accumulated += *selected_pix;                                 \n"
+"                                                                              \n"
+"                ++count;                                                      \n"
+"                if (i==0 && j==0)                                             \n"
+"                    break;                                                    \n"
+"            }                                                                 \n"
+"        }                                                                     \n"
+"        dst_buf[offset] = accumulated/count;                                  \n"
+"        return;                                                               \n"
+"    }                                                                         \n"
+"    else if(pairs==1)                                                         \n"
+"    {                                                                         \n"
+"        for(int i=-radius;i<=0;i++)                                           \n"
+"        {                                                                     \n"
+"            for(int j=-radius;j<=radius;j++)                                  \n"
+"            {                                                                 \n"
+"                __global const float4 *selected_pix = center_pix;             \n"
+"                float  best_diff = 1000.0f;                                   \n"
+"                                                                              \n"
+"                /* skip computations for the center pixel */                  \n"
+"                if (i != 0 && j != 0)                                         \n"
+"                {                                                             \n"
+"                    int xs[4]={                                               \n"
+"                        gidx+i+radius, gidx-i+radius,                         \n"
+"                        gidx-i+radius, gidx+i+radius                          \n"
+"                    };                                                        \n"
+"                    int ys[4]={                                               \n"
+"                        gidy+j+radius, gidy-j+radius,                         \n"
+"                        gidy+j+radius, gidy-j+radius                          \n"
+"                    };                                                        \n"
+"                                                                              \n"
+"                    for (i=0;i<2;i++)                                         \n"
+"                    {                                                         \n"
+"                        if (xs[i] >= 0 && xs[i] < src_width &&                \n"
+"                            ys[i] >= 0 && ys[i] < src_height)                 \n"
+"                        {                                                     \n"
+"                            __global const float4 *tpix =                     \n"
+"                                src_buf + (xs[i] + ys[i] * src_width);        \n"
+"                            float diff=colordiff (*tpix, *center_pix);        \n"
+"                            if (diff < best_diff)                             \n"
+"                            {                                                 \n"
+"                                best_diff = diff;                             \n"
+"                                selected_pix = tpix;                          \n"
+"                            }                                                 \n"
+"                        }                                                     \n"
+"                    }                                                         \n"
+"                }                                                             \n"
+"                accumulated += *selected_pix;                                 \n"
+"                ++count;                                                      \n"
+"                if (i==0 && j==0)                                             \n"
+"                    break;                                                    \n"
+"            }                                                                 \n"
+"        }                                                                     \n"
+"        dst_buf[offset] = accumulated/count;                                  \n"
+"        return;                                                               \n"
+"    }                                                                         \n"
+"    return;                                                                   \n"
+"}                                                                             \n"
+;
diff --git a/opencl/vignette.cl b/opencl/vignette.cl
new file mode 100644
index 0000000..4d7561a
--- /dev/null
+++ b/opencl/vignette.cl
@@ -0,0 +1,62 @@
+__kernel void vignette_cl (__global const float4 *in,
+                           __global       float4 *out,
+                                          float4 color,
+                                          float  scale,
+                                          float  cost,
+                                          float  sint,
+                                          int    roi_x,
+                                          int    roi_y,
+                                          int    midx,
+                                          int    midy,
+                                          int    o_shape,
+                                          float  gamma,
+                                          float  length,
+                                          float  radius0,
+                                          float  rdiff)
+{
+  int gidx = get_global_id(0);
+  int gidy = get_global_id(1);
+  int gid = gidx + gidy * get_global_size(0);
+  float strength = 0.0f;
+  float u,v,costy,sinty;
+  int x,y;
+  x = gidx + roi_x;
+  y = gidy + roi_y;
+  sinty = sint * (y-midy) - midx;
+  costy = cost * (y-midy) + midy;
+
+  u = cost * (x-midx) - sinty;
+  v = sint * (x-midx) + costy;
+
+  if (length == 0.0f)
+    strength = 0.0f;
+  else
+    {
+      switch (o_shape)
+        {
+          case 0:
+          strength = hypot ((u-midx) / scale, v-midy);
+          break;
+
+          case 1:
+          strength = fmax (fabs(u-midx)/scale, fabs(v-midy));
+          break;
+
+          case 2:
+          strength = fabs (u-midx) / scale + fabs(v-midy);
+          break;
+        }
+      strength /= length;
+      strength = (strength-radius0) / rdiff;
+    }
+
+  if (strength < 0.0f) strength = 0.0f;
+  if (strength > 1.0f) strength = 1.0f;
+
+  if (gamma > 0.9999f && gamma < 2.0001f)
+    strength *= strength;
+  else if (gamma != 1.0f)
+    strength = pow(strength, gamma);
+
+  out[gid] = in[gid]*(1.0f-strength) + color * strength;
+}
diff --git a/opencl/vignette.cl.h b/opencl/vignette.cl.h
new file mode 100644
index 0000000..243433c
--- /dev/null
+++ b/opencl/vignette.cl.h
@@ -0,0 +1,64 @@
+static const char* vignette_cl_source =
+"__kernel void vignette_cl (__global const float4 *in,                         \n"
+"                           __global       float4 *out,                        \n"
+"                                          float4 color,                       \n"
+"                                          float  scale,                       \n"
+"                                          float  cost,                        \n"
+"                                          float  sint,                        \n"
+"                                          int    roi_x,                       \n"
+"                                          int    roi_y,                       \n"
+"                                          int    midx,                        \n"
+"                                          int    midy,                        \n"
+"                                          int    o_shape,                     \n"
+"                                          float  gamma,                       \n"
+"                                          float  length,                      \n"
+"                                          float  radius0,                     \n"
+"                                          float  rdiff)                       \n"
+"{                                                                             \n"
+"  int gidx = get_global_id(0);                                                \n"
+"  int gidy = get_global_id(1);                                                \n"
+"  int gid = gidx + gidy * get_global_size(0);                                 \n"
+"  float strength = 0.0f;                                                      \n"
+"  float u,v,costy,sinty;                                                      \n"
+"  int x,y;                                                                    \n"
+"  x = gidx + roi_x;                                                           \n"
+"  y = gidy + roi_y;                                                           \n"
+"  sinty = sint * (y-midy) - midx;                                             \n"
+"  costy = cost * (y-midy) + midy;                                             \n"
+"                                                                              \n"
+"  u = cost * (x-midx) - sinty;                                                \n"
+"  v = sint * (x-midx) + costy;                                                \n"
+"                                                                              \n"
+"  if (length == 0.0f)                                                         \n"
+"    strength = 0.0f;                                                          \n"
+"  else                                                                        \n"
+"    {                                                                         \n"
+"      switch (o_shape)                                                        \n"
+"        {                                                                     \n"
+"          case 0:                                                             \n"
+"          strength = hypot ((u-midx) / scale, v-midy);                        \n"
+"          break;                                                              \n"
+"                                                                              \n"
+"          case 1:                                                             \n"
+"          strength = fmax (fabs(u-midx)/scale, fabs(v-midy));                 \n"
+"          break;                                                              \n"
+"                                                                              \n"
+"          case 2:                                                             \n"
+"          strength = fabs (u-midx) / scale + fabs(v-midy);                    \n"
+"          break;                                                              \n"
+"        }                                                                     \n"
+"      strength /= length;                                                     \n"
+"      strength = (strength-radius0) / rdiff;                                  \n"
+"    }                                                                         \n"
+"                                                                              \n"
+"  if (strength < 0.0f) strength = 0.0f;                                       \n"
+"  if (strength > 1.0f) strength = 1.0f;                                       \n"
+"                                                                              \n"
+"  if (gamma > 0.9999f && gamma < 2.0001f)                                     \n"
+"    strength *= strength;                                                     \n"
+"  else if (gamma != 1.0f)                                                     \n"
+"    strength = pow(strength, gamma);                                          \n"
+"                                                                              \n"
+"  out[gid] = in[gid]*(1.0f-strength) + color * strength;                      \n"
+"}                                                                             \n"
+;
diff --git a/operations/common/bilateral-filter.c b/operations/common/bilateral-filter.c
index 97cafd4..de4f775 100644
--- a/operations/common/bilateral-filter.c
+++ b/operations/common/bilateral-filter.c
@@ -64,7 +64,7 @@ static void prepare (GeglOperation *operation)
 
 static GeglClRunData *cl_data = NULL;
 
-static cl_int
+static gboolean
 cl_bilateral_filter (cl_mem                in_tex,
                      cl_mem                out_tex,
                      size_t                global_worksize,
@@ -80,25 +80,30 @@ cl_bilateral_filter (cl_mem                in_tex,
     const char *kernel_name[] = {"bilateral_filter", NULL};
     cl_data = gegl_cl_compile_and_build (bilateral_filter_cl_source, kernel_name);
   }
-
-  if (!cl_data) return 1;
+  if (!cl_data) return TRUE;
 
   global_ws[0] = roi->width;
   global_ws[1] = roi->height;
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&radius);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&preserve);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&radius);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&preserve);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                        cl_data->kernel[0], 2,
                                        NULL, global_ws, NULL,
                                        0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
+
+  return FALSE;
 
-  return cl_err;
+error:
+  return TRUE;
 }
 
 static gboolean
@@ -119,18 +124,19 @@ cl_process (GeglOperation       *operation,
   GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,   result, out_format, GEGL_CL_BUFFER_WRITE);
                 gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ, op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
   while (gegl_buffer_cl_iterator_next (i, &err))
-  {
-    if (err) return FALSE;
-    for (j=0; j < i->n; j++)
     {
-      cl_err = cl_bilateral_filter(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], ceil(o->blur_radius), o->edge_preservation);
-      if (cl_err != CL_SUCCESS)
-      {
-        g_warning("[OpenCL] Error in gegl:bilateral-filter: %s", gegl_cl_errstring(cl_err));
-        return FALSE;
-      }
+      if (err) return FALSE;
+      for (j=0; j < i->n; j++)
+        {
+          err = cl_bilateral_filter(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], ceil(o->blur_radius), o->edge_preservation);
+          if (err)
+            {
+              g_warning("[OpenCL] Error in gegl:bilateral-filter");
+              return FALSE;
+            }
+        }
     }
-  }
+
   return TRUE;
 }
 
diff --git a/operations/common/box-blur.c b/operations/common/box-blur.c
index 760c3a7..1553897 100644
--- a/operations/common/box-blur.c
+++ b/operations/common/box-blur.c
@@ -31,7 +31,6 @@ gegl_chant_double_ui (radius, _("Radius"), 0.0, 1000.0, 4.0, 0.0, 100.0, 1.5,
 #define GEGL_CHANT_C_FILE       "box-blur.c"
 
 #include "gegl-chant.h"
-#include "gegl/gegl-debug.h"
 #include <stdio.h>
 #include <math.h>
 
@@ -184,7 +183,7 @@ static void prepare (GeglOperation *operation)
 
 static GeglClRunData *cl_data = NULL;
 
-static cl_int
+static gboolean
 cl_box_blur (cl_mem                in_tex,
              cl_mem                aux_tex,
              cl_mem                out_tex,
@@ -201,8 +200,7 @@ cl_box_blur (cl_mem                in_tex,
       const char *kernel_name[] = {"kernel_blur_hor", "kernel_blur_ver", NULL};
       cl_data = gegl_cl_compile_and_build (box_blur_cl_source, kernel_name);
     }
-
-  if (!cl_data) return 1;
+  if (!cl_data) return TRUE;
 
   local_ws_hor[0] = 1;
   local_ws_hor[1] = 256;
@@ -214,33 +212,40 @@ cl_box_blur (cl_mem                in_tex,
   global_ws_ver[0] = roi->height;
   global_ws_ver[1] = ((roi->width + local_ws_ver[1] -1)/local_ws_ver[1]) * local_ws_ver[1];
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&aux_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int),   (void*)&roi->width);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int),   (void*)&radius);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&aux_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int),   (void*)&roi->width);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int),   (void*)&radius);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                         cl_data->kernel[0], 2,
                                         NULL, global_ws_hor, local_ws_hor,
                                         0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
-
-  gegl_clEnqueueBarrier (gegl_cl_get_command_queue ());
+  CL_CHECK;
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),   (void*)&aux_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),   (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_int),   (void*)&roi->width);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_int),   (void*)&radius);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),   (void*)&aux_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),   (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_int),   (void*)&roi->width);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_int),   (void*)&radius);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                         cl_data->kernel[1], 2,
                                         NULL, global_ws_ver, local_ws_ver,
                                         0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
 
-  return cl_err;
+  return FALSE;
+
+error:
+  return TRUE;
 }
 
 static gboolean
@@ -266,10 +271,10 @@ cl_process (GeglOperation       *operation,
       if (err) return FALSE;
       for (j=0; j < i->n; j++)
         {
-          cl_err = cl_box_blur(i->tex[read][j], i->tex[aux][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], ceil (o->radius));
-          if (cl_err != CL_SUCCESS)
+          err = cl_box_blur(i->tex[read][j], i->tex[aux][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], ceil (o->radius));
+          if (err)
             {
-              GEGL_NOTE (GEGL_DEBUG_OPENCL, "Error in gegl:box-blur: %s", gegl_cl_errstring(cl_err));
+              g_warning("[OpenCL] Error in gegl:box-blur");
               return FALSE;
             }
         }
diff --git a/operations/common/c2g.c b/operations/common/c2g.c
index 742cf07..5df7b4a 100644
--- a/operations/common/c2g.c
+++ b/operations/common/c2g.c
@@ -156,7 +156,7 @@ get_bounding_box (GeglOperation *operation)
 
 static GeglClRunData *cl_data = NULL;
 
-static cl_int
+static gboolean
 cl_c2g (cl_mem                in_tex,
     cl_mem                    out_tex,
     size_t                    global_worksize,
@@ -176,36 +176,38 @@ cl_c2g (cl_mem                in_tex,
       const char *kernel_name[] ={"c2g", NULL};
       cl_data = gegl_cl_compile_and_build(c2g_cl_source, kernel_name);
     }
-  if (!cl_data)  return 0;
+  if (!cl_data) return TRUE;
 
   compute_luts(rgamma);
 
   cl_lut_cos = gegl_clCreateBuffer(gegl_cl_get_context(),
                                    CL_MEM_READ_ONLY,
                                    ANGLE_PRIME * sizeof(cl_float), NULL, &cl_err);
+  CL_CHECK;
 
-  cl_err |= gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_lut_cos,
+  cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_lut_cos,
                                       CL_TRUE, 0, ANGLE_PRIME * sizeof(cl_float), lut_cos,
                                       0, NULL, NULL);
-  if (CL_SUCCESS != cl_err)   return cl_err;
+  CL_CHECK;
 
   cl_lut_sin = gegl_clCreateBuffer(gegl_cl_get_context(),
                                    CL_MEM_READ_ONLY,
                                    ANGLE_PRIME * sizeof(cl_float), NULL, &cl_err);
 
-  cl_err |= gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_lut_sin,
+  cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_lut_sin,
                                       CL_TRUE, 0, ANGLE_PRIME * sizeof(cl_float), lut_sin,
                                       0, NULL, NULL);
-  if (CL_SUCCESS != cl_err)    return cl_err;
+  CL_CHECK;
 
   cl_radiuses = gegl_clCreateBuffer(gegl_cl_get_context(),
                                     CL_MEM_READ_ONLY,
                                     RADIUS_PRIME * sizeof(cl_float), NULL, &cl_err);
+  CL_CHECK;
 
-  cl_err |= gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_radiuses,
+  cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_radiuses,
                                       CL_TRUE, 0, RADIUS_PRIME * sizeof(cl_float), radiuses,
                                       0, NULL, NULL);
-  if (CL_SUCCESS != cl_err)    return cl_err;
+  CL_CHECK;
 
   {
   cl_int cl_src_width  = src_roi->width;
@@ -214,34 +216,47 @@ cl_c2g (cl_mem                in_tex,
   cl_int cl_samples    = samples;
   cl_int cl_iterations = iterations;
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&cl_src_width);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&cl_src_height);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&cl_radiuses);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_mem), (void*)&cl_lut_cos);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_mem), (void*)&cl_lut_sin);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_mem), (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&cl_radius);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 8, sizeof(cl_int), (void*)&cl_samples);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 9, sizeof(cl_int), (void*)&cl_iterations);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&cl_src_width);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int), (void*)&cl_src_height);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&cl_radiuses);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_mem), (void*)&cl_lut_cos);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_mem), (void*)&cl_lut_sin);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_mem), (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int), (void*)&cl_radius);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 8, sizeof(cl_int), (void*)&cl_samples);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 9, sizeof(cl_int), (void*)&cl_iterations);
+  CL_CHECK;
   }
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(), cl_data->kernel[0],
                                        2, NULL, gbl_size, NULL,
                                        0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
 
-  cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-  if (CL_SUCCESS != cl_err) return cl_err;
+  cl_err = gegl_clFinish(gegl_cl_get_command_queue ());
+  CL_CHECK;
 
-  gegl_clFinish(gegl_cl_get_command_queue ());
+  cl_err = gegl_clReleaseMemObject(cl_radiuses);
+  CL_CHECK;
+  cl_err = gegl_clReleaseMemObject(cl_lut_cos);
+  CL_CHECK;
+  cl_err = gegl_clReleaseMemObject(cl_lut_sin);
+  CL_CHECK;
 
-  gegl_clReleaseMemObject(cl_radiuses);
-  gegl_clReleaseMemObject(cl_lut_cos);
-  gegl_clReleaseMemObject(cl_lut_sin);
+  return FALSE;
 
-  return cl_err;
+error:
+  return TRUE;
 }
 
 static gboolean
@@ -253,8 +268,8 @@ cl_process (GeglOperation *operation,
   const Babl *in_format  = babl_format("RGBA float");
   const Babl *out_format = gegl_operation_get_format (operation, "output");
   gint err;
-  gint j;
   cl_int cl_err;
+  gint j;
 
   GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
   GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
@@ -263,19 +278,18 @@ cl_process (GeglOperation *operation,
                 gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ,
                                                            op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
   while (gegl_buffer_cl_iterator_next (i, &err))
-  {
-    if (err) return FALSE;
-    for (j=0; j < i->n; j++)
     {
-      cl_err = cl_c2g(i->tex[read][j], i->tex[0][j],i->size[0][j], &i->roi[read][j],&i->roi[0][j],
-                      o->radius,o->samples,o->iterations,RGAMMA);
-      if (cl_err != CL_SUCCESS)
-      {
-        g_warning("[OpenCL] Error in gegl:c2g: %s", gegl_cl_errstring(cl_err));
-        return FALSE;
-      }
+      if (err) return FALSE;
+      for (j=0; j < i->n; j++)
+        {
+          err = cl_c2g(i->tex[read][j], i->tex[0][j],i->size[0][j], &i->roi[read][j], &i->roi[0][j], o->radius, o->samples, o->iterations, RGAMMA);
+          if (err)
+           {
+             g_warning("[OpenCL] Error in gegl:c2g");
+             return FALSE;
+           }
+        }
     }
-  }
   return TRUE;
 }
 
diff --git a/operations/common/color-temperature.c b/operations/common/color-temperature.c
index a177380..b52bb08 100644
--- a/operations/common/color-temperature.c
+++ b/operations/common/color-temperature.c
@@ -177,7 +177,7 @@ process (GeglOperation       *op,
 static GeglClRunData *cl_data = NULL;
 
 /* OpenCL processing function */
-static cl_int
+static gboolean
 cl_process (GeglOperation       *op,
             cl_mem               in_tex,
             cl_mem               out_tex,
@@ -207,20 +207,27 @@ cl_process (GeglOperation       *op,
 
   if (!cl_data) return 1;
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),    (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),    (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float),  (void*)&coeffs[0]);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float),  (void*)&coeffs[1]);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float),  (void*)&coeffs[2]);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),    (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),    (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float),  (void*)&coeffs[0]);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float),  (void*)&coeffs[1]);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float),  (void*)&coeffs[2]);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                         cl_data->kernel[0], 1,
                                         NULL, &global_worksize, NULL,
                                         0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
+
+  return FALSE;
 
-  return cl_err;
+error:
+  return TRUE;
 }
 
 
diff --git a/operations/common/edge-laplace.c b/operations/common/edge-laplace.c
index 6d20398..b12e5e4 100644
--- a/operations/common/edge-laplace.c
+++ b/operations/common/edge-laplace.c
@@ -452,9 +452,6 @@ cl_edge_laplace (cl_mem                in_tex,
                                        0, NULL, NULL);
   if (cl_err != CL_SUCCESS) return cl_err;
 
-  cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-  if (CL_SUCCESS != cl_err) return cl_err;
-
   cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),   (void*)&aux_tex);
   cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),   (void*)&out_tex);
   if (cl_err != CL_SUCCESS) return cl_err;
diff --git a/operations/common/gaussian-blur.c b/operations/common/gaussian-blur.c
index 20f8723..a158342 100644
--- a/operations/common/gaussian-blur.c
+++ b/operations/common/gaussian-blur.c
@@ -422,7 +422,7 @@ static void prepare (GeglOperation *operation)
 
 static GeglClRunData *cl_data = NULL;
 
-static cl_int
+static gboolean
 cl_gaussian_blur (cl_mem                in_tex,
                   cl_mem                out_tex,
                   cl_mem                aux_tex,
@@ -448,72 +448,88 @@ cl_gaussian_blur (cl_mem                in_tex,
       const char *kernel_name[] = {"fir_ver_blur", "fir_hor_blur", NULL};
       cl_data = gegl_cl_compile_and_build (gaussian_blur_cl_source, kernel_name);
     }
-  if (!cl_data) return 1;
+  if (!cl_data) return TRUE;
 
   cl_matrix_x = gegl_clCreateBuffer(gegl_cl_get_context(),
                                     CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
                                     matrix_length_x * sizeof(cl_float), NULL, &cl_err);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_matrix_x,
                                      CL_TRUE, 0, matrix_length_x * sizeof(cl_float), dmatrix_x,
                                      0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
 
   cl_matrix_y = gegl_clCreateBuffer(gegl_cl_get_context(),
                                     CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
                                     matrix_length_y * sizeof(cl_float), NULL, &cl_err);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_matrix_y,
                                      CL_TRUE, 0, matrix_length_y * sizeof(cl_float), dmatrix_y,
                                      0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
 
   {
   global_ws[0] = aux_rect->width;
   global_ws[1] = aux_rect->height;
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_int), (void*)&src_rect->width);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&aux_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_mem), (void*)&cl_matrix_x);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 4, sizeof(cl_int), (void*)&matrix_length_x);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 5, sizeof(cl_int), (void*)&xoff);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_int), (void*)&src_rect->width);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&aux_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_mem), (void*)&cl_matrix_x);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 4, sizeof(cl_int), (void*)&matrix_length_x);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 5, sizeof(cl_int), (void*)&xoff);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                        cl_data->kernel[1], 2,
                                        NULL, global_ws, NULL,
                                        0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
   }
 
   {
   global_ws[0] = roi->width;
   global_ws[1] = roi->height;
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&aux_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&aux_rect->width);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&cl_matrix_y);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&matrix_length_y);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&yoff);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&aux_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&aux_rect->width);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&cl_matrix_y);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&matrix_length_y);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&yoff);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                        cl_data->kernel[0], 2,
                                        NULL, global_ws, NULL,
                                        0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
   }
 
-  gegl_clFinish(gegl_cl_get_command_queue ());
+  cl_err = gegl_clFinish(gegl_cl_get_command_queue ());
+  CL_CHECK;
+
+  cl_err = gegl_clReleaseMemObject(cl_matrix_x);
+  CL_CHECK;
+  cl_err = gegl_clReleaseMemObject(cl_matrix_y);
+  CL_CHECK;
 
-  gegl_clReleaseMemObject(cl_matrix_x);
-  gegl_clReleaseMemObject(cl_matrix_y);
+  return FALSE;
 
-  return CL_SUCCESS;
+error:
+  return TRUE;
 }
 
 static gboolean
@@ -560,22 +576,22 @@ cl_process (GeglOperation       *operation,
       if (err) return FALSE;
       for (j=0; j < i->n; j++)
         {
-           cl_err = cl_gaussian_blur(i->tex[read][j],
-                                     i->tex[0][j],
-                                     i->tex[aux][j],
-                                     i->size[0][j],
-                                     &i->roi[0][j],
-                                     &i->roi[read][j],
-                                     &i->roi[aux][j],
-                                     fmatrix_x,
-                                     cmatrix_len_x,
-                                     op_area->left,
-                                     fmatrix_y,
-                                     cmatrix_len_y,
-                                     op_area->top);
-          if (cl_err != CL_SUCCESS)
+           err = cl_gaussian_blur(i->tex[read][j],
+                                  i->tex[0][j],
+                                  i->tex[aux][j],
+                                  i->size[0][j],
+                                  &i->roi[0][j],
+                                  &i->roi[read][j],
+                                  &i->roi[aux][j],
+                                  fmatrix_x,
+                                  cmatrix_len_x,
+                                  op_area->left,
+                                  fmatrix_y,
+                                  cmatrix_len_y,
+                                  op_area->top);
+          if (err)
             {
-              g_warning("[OpenCL] Error in gegl:gaussian-blur: %s", gegl_cl_errstring(cl_err));
+              g_warning("[OpenCL] Error in gegl:gaussian-blur");
               return FALSE;
             }
         }
diff --git a/operations/common/grey.c b/operations/common/grey.c
index d2bb3b1..bc79dc7 100644
--- a/operations/common/grey.c
+++ b/operations/common/grey.c
@@ -62,7 +62,7 @@ process (GeglOperation       *op,
 
 #include "opencl/gegl-cl.h"
 
-static cl_int
+static gboolean
 cl_process (GeglOperation       *op,
             cl_mem               in_tex,
             cl_mem               out_tex,
@@ -76,10 +76,12 @@ cl_process (GeglOperation       *op,
                                     in_tex , out_tex , 0 , 0 ,
                                     global_worksize * sizeof (cl_float2),
                                     0, NULL, NULL);
+  CL_CHECK;
 
-  if (CL_SUCCESS != cl_err) return cl_err;
+  return FALSE;
 
-  return cl_err;
+error:
+  return TRUE;
 }
 
 
diff --git a/operations/common/levels.c b/operations/common/levels.c
index 2c3f9f0..c3dd955 100644
--- a/operations/common/levels.c
+++ b/operations/common/levels.c
@@ -87,25 +87,12 @@ process (GeglOperation       *op,
 
 #include "opencl/gegl-cl.h"
 
-static const char* kernel_source =
-"__kernel void kernel_levels(__global const float4     *in,      \n"
-"                            __global       float4     *out,     \n"
-"                            float in_offset,                    \n"
-"                            float out_offset,                   \n"
-"                            float scale)                        \n"
-"{                                                               \n"
-"  int gid = get_global_id(0);                                   \n"
-"  float4 in_v  = in[gid];                                       \n"
-"  float4 out_v;                                                 \n"
-"  out_v.xyz = (in_v.xyz - in_offset) * scale + out_offset;      \n"
-"  out_v.w   =  in_v.w;                                          \n"
-"  out[gid]  =  out_v;                                           \n"
-"}                                                               \n";
+#include "opencl/levels.cl.h"
 
 static GeglClRunData *cl_data = NULL;
 
 /* OpenCL processing function */
-static cl_int
+static gboolean
 cl_process (GeglOperation       *op,
             cl_mem               in_tex,
             cl_mem               out_tex,
@@ -140,24 +127,31 @@ cl_process (GeglOperation       *op,
   if (!cl_data)
     {
       const char *kernel_name[] = {"kernel_levels", NULL};
-      cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+      cl_data = gegl_cl_compile_and_build (levels_cl_source, kernel_name);
     }
-  if (!cl_data) return 1;
-
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&in_offset);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&out_offset);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float), (void*)&scale);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  if (!cl_data) return TRUE;
+
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&in_offset);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&out_offset);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_float), (void*)&scale);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                         cl_data->kernel[0], 1,
                                         NULL, &global_worksize, NULL,
                                         0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
 
-  return cl_err;
+  return FALSE;
+
+error:
+  return TRUE;
 }
 
 
diff --git a/operations/common/motion-blur.c b/operations/common/motion-blur.c
index 2a23dc6..b6ac612 100644
--- a/operations/common/motion-blur.c
+++ b/operations/common/motion-blur.c
@@ -51,93 +51,18 @@ prepare (GeglOperation *operation)
   op_area->top    =
   op_area->bottom = (gint)ceil(0.5 * offset_y);
 
-  gegl_operation_set_format (operation, "input", babl_format ("RaGaBaA float"));
+  gegl_operation_set_format (operation, "input",  babl_format ("RaGaBaA float"));
   gegl_operation_set_format (operation, "output", babl_format ("RaGaBaA float"));
 }
 
 #include "opencl/gegl-cl.h"
 #include "buffer/gegl-buffer-cl-iterator.h"
 
-static const char* kernel_source =
-"int CLAMP(int val,int lo,int hi)                                      \n"
-"{                                                                     \n"
-"    return (val < lo) ? lo : ((hi < val) ? hi : val);                 \n"
-"}                                                                     \n"
-"                                                                      \n"
-"float4 get_pixel_color_CL(const __global float4 *in_buf,              \n"
-"                          int     rect_width,                         \n"
-"                          int     rect_height,                        \n"
-"                          int     rect_x,                             \n"
-"                          int     rect_y,                             \n"
-"                          int     x,                                  \n"
-"                          int     y)                                  \n"
-"{                                                                     \n"
-"    int ix = x - rect_x;                                              \n"
-"    int iy = y - rect_y;                                              \n"
-"                                                                      \n"
-"    ix = CLAMP(ix, 0, rect_width-1);                                  \n"
-"    iy = CLAMP(iy, 0, rect_height-1);                                 \n"
-"                                                                      \n"
-"    return in_buf[iy * rect_width + ix];                              \n"
-"}                                                                     \n"
-"                                                                      \n"
-"__kernel void motion_blur_CL(const __global float4 *src_buf,          \n"
-"                             int     src_width,                       \n"
-"                             int     src_height,                      \n"
-"                             int     src_x,                           \n"
-"                             int     src_y,                           \n"
-"                             __global float4 *dst_buf,                \n"
-"                             int     dst_x,                           \n"
-"                             int     dst_y,                           \n"
-"                             int     num_steps,                       \n"
-"                             float   offset_x,                        \n"
-"                             float   offset_y)                        \n"
-"{                                                                     \n"
-"    int gidx = get_global_id(0);                                      \n"
-"    int gidy = get_global_id(1);                                      \n"
-"                                                                      \n"
-"    float4 sum = 0.0f;                                                \n"
-"    int px = gidx + dst_x;                                            \n"
-"    int py = gidy + dst_y;                                            \n"
-"                                                                      \n"
-"    for(int step = 0; step < num_steps; ++step)                       \n"
-"    {                                                                 \n"
-"        float t = num_steps == 1 ? 0.0f :                             \n"
-"            step / (float)(num_steps - 1) - 0.5f;                     \n"
-"                                                                      \n"
-"        float xx = px + t * offset_x;                                 \n"
-"        float yy = py + t * offset_y;                                 \n"
-"                                                                      \n"
-"        int   ix = (int)floor(xx);                                    \n"
-"        int   iy = (int)floor(yy);                                    \n"
-"                                                                      \n"
-"        float dx = xx - floor(xx);                                    \n"
-"        float dy = yy - floor(yy);                                    \n"
-"                                                                      \n"
-"        float4 mixy0,mixy1,pix0,pix1,pix2,pix3;                       \n"
-"                                                                      \n"
-"        pix0 = get_pixel_color_CL(src_buf, src_width,                 \n"
-"            src_height, src_x, src_y, ix,   iy);                      \n"
-"        pix1 = get_pixel_color_CL(src_buf, src_width,                 \n"
-"            src_height, src_x, src_y, ix+1, iy);                      \n"
-"        pix2 = get_pixel_color_CL(src_buf, src_width,                 \n"
-"            src_height, src_x, src_y, ix,   iy+1);                    \n"
-"        pix3 = get_pixel_color_CL(src_buf, src_width,                 \n"
-"            src_height, src_x, src_y, ix+1, iy+1);                    \n"
-"                                                                      \n"
-"        mixy0 = dy * (pix2 - pix0) + pix0;                            \n"
-"        mixy1 = dy * (pix3 - pix1) + pix1;                            \n"
-"                                                                      \n"
-"        sum  += dx * (mixy1 - mixy0) + mixy0;                         \n"
-"    }                                                                 \n"
-"                                                                      \n"
-"    dst_buf[gidy * get_global_size(0) + gidx] =                       \n"
-"        sum / num_steps;                                              \n"
-"}                                                                     \n";
+#include "opencl/motion-blur.cl.h"
 
 static GeglClRunData *cl_data = NULL;
 
-static cl_int
+static gboolean
 cl_motion_blur (cl_mem                in_tex,
                 cl_mem                out_tex,
                 size_t                global_worksize,
@@ -152,35 +77,47 @@ cl_motion_blur (cl_mem                in_tex,
 
   if (!cl_data)
   {
-    const char *kernel_name[] = {"motion_blur_CL", NULL};
-    cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+    const char *kernel_name[] = {"motion_blur", NULL};
+    cl_data = gegl_cl_compile_and_build (motion_blur_cl_source, kernel_name);
   }
-
-  if (!cl_data) return 1;
+  if (!cl_data) return TRUE;
 
   global_ws[0] = roi->width;
   global_ws[1] = roi->height;
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  0, sizeof(cl_mem),   (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  1, sizeof(cl_int),   (void*)&src_rect->width);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  2, sizeof(cl_int),   (void*)&src_rect->height);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  3, sizeof(cl_int),   (void*)&src_rect->x);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  4, sizeof(cl_int),   (void*)&src_rect->y);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  5, sizeof(cl_mem),   (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  6, sizeof(cl_int),   (void*)&roi->x);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  7, sizeof(cl_int),   (void*)&roi->y);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  8, sizeof(cl_int),   (void*)&num_steps);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  9, sizeof(cl_float), (void*)&offset_x);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 10, sizeof(cl_float), (void*)&offset_y);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0],  0, sizeof(cl_mem),   (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0],  1, sizeof(cl_int),   (void*)&src_rect->width);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0],  2, sizeof(cl_int),   (void*)&src_rect->height);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0],  3, sizeof(cl_int),   (void*)&src_rect->x);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0],  4, sizeof(cl_int),   (void*)&src_rect->y);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0],  5, sizeof(cl_mem),   (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0],  6, sizeof(cl_int),   (void*)&roi->x);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0],  7, sizeof(cl_int),   (void*)&roi->y);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0],  8, sizeof(cl_int),   (void*)&num_steps);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0],  9, sizeof(cl_float), (void*)&offset_x);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 10, sizeof(cl_float), (void*)&offset_y);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                        cl_data->kernel[0], 2,
                                        NULL, global_ws, NULL,
                                        0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
+
+  return FALSE;
 
-  return cl_err;
+error:
+  return TRUE;
 }
 
 static gboolean
@@ -193,8 +130,8 @@ cl_process (GeglOperation       *operation,
   const Babl *in_format  = gegl_operation_get_format (operation, "input");
   const Babl *out_format = gegl_operation_get_format (operation, "output");
   gint err;
-  gint j;
   cl_int cl_err;
+  gint j;
 
   GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
   GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
@@ -208,18 +145,18 @@ cl_process (GeglOperation       *operation,
                 gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format,  GEGL_CL_BUFFER_READ,
                                                            op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
   while (gegl_buffer_cl_iterator_next (i, &err))
-  {
-    if (err) return FALSE;
-    for (j=0; j < i->n; j++)
     {
-      cl_err = cl_motion_blur(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], &i->roi[read][j], num_steps, offset_x, offset_y);
-      if (cl_err != CL_SUCCESS)
-      {
-        g_warning("[OpenCL] Error in gegl:motion-blur: %s", gegl_cl_errstring(cl_err));
-        return FALSE;
-      }
+      if (err) return FALSE;
+      for (j=0; j < i->n; j++)
+        {
+          err = cl_motion_blur(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], &i->roi[read][j], num_steps, offset_x, offset_y);
+          if (err)
+            {
+              g_warning("[OpenCL] Error in gegl:motion-blur");
+              return FALSE;
+            }
+        }
     }
-  }
   return TRUE;
 }
 
diff --git a/operations/common/noise-reduction.c b/operations/common/noise-reduction.c
index b8c4337..b0f1063 100644
--- a/operations/common/noise-reduction.c
+++ b/operations/common/noise-reduction.c
@@ -152,88 +152,11 @@ static void prepare (GeglOperation *operation)
 #include "opencl/gegl-cl.h"
 #include "buffer/gegl-buffer-cl-iterator.h"
 
-static const char* kernel_source =
-"#define NEIGHBOURS 8                                                              \n"
-"#define AXES       (NEIGHBOURS/2)                                                 \n"
-"                                                                                  \n"
-"#define POW2(a) ((a)*(a))                                                         \n"
-"                                                                                  \n"
-"#define GEN_METRIC(before, center, after) POW2((center) * 2 - (before) - (after)) \n"
-"                                                                                  \n"
-"#define BAIL_CONDITION(new,original) ((new) < (original))                         \n"
-"                                                                                  \n"
-"#define SYMMETRY(a)  (NEIGHBOURS - (a) - 1)                                       \n"
-"                                                                                  \n"
-"#define O(u,v) (((u)+((v) * (src_stride))))                                       \n"
-"                                                                                  \n"
-"__kernel void noise_reduction_cl (__global       float4 *src_buf,                 \n"
-"                                  int src_stride,                                 \n"
-"                                  __global       float4 *dst_buf,                 \n"
-"                                  int dst_stride)                                 \n"
-"{                                                                                 \n"
-"    int gidx = get_global_id(0);                                                  \n"
-"    int gidy = get_global_id(1);                                                  \n"
-"                                                                                  \n"
-"    __global float4 *center_pix = src_buf + (gidy + 1) * src_stride + gidx + 1;   \n"
-"    int dst_offset = dst_stride * gidy + gidx;                                    \n"
-"                                                                                  \n"
-"    int offsets[NEIGHBOURS] = {                                                   \n"
-"        O(-1, -1), O( 0, -1), O( 1, -1),                                          \n"
-"        O(-1,  0),            O( 1,  0),                                          \n"
-"        O(-1,  1), O( 0,  1), O( 1,  1)                                           \n"
-"    };                                                                            \n"
-"                                                                                  \n"
-"    float4 sum;                                                                   \n"
-"    int4   count;                                                                 \n"
-"    float4 cur;                                                                   \n"
-"    float4 metric_reference[AXES];                                                \n"
-"                                                                                  \n"
-"    for (int axis = 0; axis < AXES; axis++)                                       \n"
-"      {                                                                           \n"
-"        float4 before_pix = *(center_pix + offsets[axis]);                        \n"
-"        float4 after_pix  = *(center_pix + offsets[SYMMETRY(axis)]);              \n"
-"        metric_reference[axis] = GEN_METRIC (before_pix, *center_pix, after_pix); \n"
-"      }                                                                           \n"
-"                                                                                  \n"
-"    cur = sum = *center_pix;                                                      \n"
-"    count = 1;                                                                    \n"
-"                                                                                  \n"
-"    for (int direction = 0; direction < NEIGHBOURS; direction++)                  \n"
-"      {                                                                           \n"
-"        float4 pix   = *(center_pix + offsets[direction]);                        \n"
-"        float4 value = (pix + cur) * (0.5f);                                      \n"
-"        int    axis;                                                              \n"
-"        int4   mask = {1, 1, 1, 0};                                               \n"
-"                                                                                  \n"
-"        for (axis = 0; axis < AXES; axis++)                                       \n"
-"          {                                                                       \n"
-"            float4 before_pix = *(center_pix + offsets[axis]);                    \n"
-"            float4 after_pix  = *(center_pix + offsets[SYMMETRY(axis)]);          \n"
-"                                                                                  \n"
-"            float4 metric_new = GEN_METRIC (before_pix,                           \n"
-"                                            value,                                \n"
-"                                            after_pix);                           \n"
-"            mask = BAIL_CONDITION (metric_new, metric_reference[axis]) & mask;    \n"
-"          }                                                                       \n"
-"        sum   += mask >0 ? value : 0;                                             \n"
-"        count += mask >0 ? 1     : 0;                                             \n"
-"      }                                                                           \n"
-"    dst_buf[dst_offset]   = (sum/convert_float4(count));                          \n"
-"    dst_buf[dst_offset].w = cur.w;                                                \n"
-"}                                                                                 \n"
-"__kernel void transfer(__global float4 * in,                                      \n"
-"              int               in_width,                                         \n"
-"              __global float4 * out)                                              \n"
-"{                                                                                 \n"
-"    int gidx = get_global_id(0);                                                  \n"
-"    int gidy = get_global_id(1);                                                  \n"
-"    int width = get_global_size(0);                                               \n"
-"    out[gidy * width + gidx] = in[gidy * in_width + gidx];                        \n"
-"}                                                                                 \n";
+#include "opencl/noise-reduction.cl.h"
 
 static GeglClRunData *cl_data = NULL;
 
-static cl_int
+static gboolean
 cl_noise_reduction (cl_mem                in_tex,
                     cl_mem                aux_tex,
                     cl_mem                out_tex,
@@ -255,24 +178,21 @@ cl_noise_reduction (cl_mem                in_tex,
   if (!cl_data)
     {
       const char *kernel_name[] ={"noise_reduction_cl","transfer", NULL};
-      cl_data = gegl_cl_compile_and_build(kernel_source, kernel_name);
+      cl_data = gegl_cl_compile_and_build(noise_reduction_cl_source, kernel_name);
     }
-  if (!cl_data)  return 0;
+  if (!cl_data)  return TRUE;
 
   temp_tex = gegl_clCreateBuffer (gegl_cl_get_context(),
                                   CL_MEM_READ_WRITE,
                                   src_roi->width * src_roi->height * stride,
                                   NULL, &cl_err);
-  if (cl_err != CL_SUCCESS) return cl_err;
-
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueCopyBuffer(gegl_cl_get_command_queue(),
                                     in_tex , temp_tex , 0 , 0 ,
                                     src_roi->width * src_roi->height * stride,
                                     0, NULL, NULL);
-
-  cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-  if (CL_SUCCESS != cl_err) return cl_err;
+  CL_CHECK;
 
   tmptex = temp_tex;
   for (i = 0;i<iterations;i++)
@@ -286,37 +206,46 @@ cl_noise_reduction (cl_mem                in_tex,
       gbl_size_tmp[0] = roi->width  + 2 * (iterations - 1 -i);
       gbl_size_tmp[1] = roi->height + 2 * (iterations - 1 -i);
 
-      cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&temp_tex);
-      cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&n_src_stride);
-      cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&aux_tex);
-      cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&n_src_stride);
-      if (cl_err != CL_SUCCESS) return cl_err;
+      cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&temp_tex);
+      CL_CHECK;
+      cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&n_src_stride);
+      CL_CHECK;
+      cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&aux_tex);
+      CL_CHECK;
+      cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&n_src_stride);
+      CL_CHECK;
 
       cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(), cl_data->kernel[0],
                                            2, NULL, gbl_size_tmp, NULL,
                                            0, NULL, NULL);
-      cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-      if (CL_SUCCESS != cl_err) return cl_err;
+      CL_CHECK;
     }
 
   gbl_size_tmp[0] = roi->width ;
   gbl_size_tmp[1] = roi->height;
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_int), (void*)&n_src_stride);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&out_tex);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_int), (void*)&n_src_stride);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&out_tex);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(), cl_data->kernel[1],
                                        2, NULL, gbl_size_tmp, NULL,
                                        0, NULL, NULL);
+  CL_CHECK;
 
   cl_err = gegl_clFinish(gegl_cl_get_command_queue());
-  if (CL_SUCCESS != cl_err) return cl_err;
+  CL_CHECK;
 
-  if (tmptex) gegl_clReleaseMemObject (tmptex);
+  if (tmptex) cl_err = gegl_clReleaseMemObject (tmptex);
+  CL_CHECK;
 
-  return cl_err;
+  return FALSE;
+
+error:
+  return TRUE;
 }
 
 static gboolean
@@ -329,7 +258,6 @@ cl_process (GeglOperation       *operation,
   const Babl *out_format = gegl_operation_get_format (operation, "output");
   gint err;
   gint j;
-  cl_int cl_err;
 
   GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
   GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
@@ -345,16 +273,16 @@ cl_process (GeglOperation       *operation,
     if (err) return FALSE;
     for (j=0; j < i->n; j++)
       {
-        cl_err = cl_noise_reduction(i->tex[read][j],
-                                    i->tex[aux][j],
-                                    i->tex[0][j],
-                                    i->size[0][j],
-                                    &i->roi[read][j],
-                                    &i->roi[0][j],
-                                    o->iterations);
-        if (cl_err != CL_SUCCESS)
+        err = cl_noise_reduction(i->tex[read][j],
+                                 i->tex[aux][j],
+                                 i->tex[0][j],
+                                 i->size[0][j],
+                                 &i->roi[read][j],
+                                 &i->roi[0][j],
+                                 o->iterations);
+        if (err)
         {
-          g_warning("[OpenCL] Error in gegl:noise-reduction: %s", gegl_cl_errstring(cl_err));
+          g_warning("[OpenCL] Error in gegl:noise-reduction");
           return FALSE;
         }
       }
diff --git a/operations/common/oilify.c b/operations/common/oilify.c
index 81e318d..22d9074 100644
--- a/operations/common/oilify.c
+++ b/operations/common/oilify.c
@@ -278,135 +278,11 @@ prepare (GeglOperation *operation)
 #include "opencl/gegl-cl.h"
 #include "buffer/gegl-buffer-cl-iterator.h"
 
-/* two small different kernels are better than one big */
-static const char* kernel_source =
-"#define NUM_INTENSITIES 256                                                \n"
-"kernel void kernel_oilify(global float4 *in,                               \n"
-"                             global float4 *out,                           \n"
-"                             const int mask_radius,                        \n"
-"                             const int intensities,                        \n"
-"                             const float exponent)                         \n"
-"{                                                                          \n"
-"  int gidx = get_global_id(0);                                             \n"
-"  int gidy = get_global_id(1);                                             \n"
-"  int x = gidx + mask_radius;                                              \n"
-"  int y = gidy + mask_radius;                                              \n"
-"  int dst_width = get_global_size(0);                                      \n"
-"  int src_width = dst_width + mask_radius * 2;                             \n"
-"  float4 hist[NUM_INTENSITIES];                                            \n"
-"  float4 hist_max = 1.0;                                                   \n"
-"  int i, j, intensity;                                                     \n"
-"  int radius_sq = mask_radius * mask_radius;                               \n"
-"  float4 temp_pixel;                                                       \n"
-"  for (i = 0; i < intensities; i++)                                        \n"
-"    hist[i] = 0.0;                                                         \n"
-"                                                                           \n"
-"  for (i = -mask_radius; i <= mask_radius; i++)                            \n"
-"  {                                                                        \n"
-"    for (j = -mask_radius; j <= mask_radius; j++)                          \n"
-"      {                                                                    \n"
-"        if (i*i + j*j <= radius_sq)                                        \n"
-"          {                                                                \n"
-"            temp_pixel = in[x + i + (y + j) * src_width];                  \n"
-"            hist[(int)(temp_pixel.x * (intensities - 1))].x+=1;            \n"
-"            hist[(int)(temp_pixel.y * (intensities - 1))].y+=1;            \n"
-"            hist[(int)(temp_pixel.z * (intensities - 1))].z+=1;            \n"
-"            hist[(int)(temp_pixel.w * (intensities - 1))].w+=1;            \n"
-"          }                                                                \n"
-"      }                                                                    \n"
-"  }                                                                        \n"
-"                                                                           \n"
-"  for (i = 0; i < intensities; i++) {                                      \n"
-"    if(hist_max.x < hist[i].x)                                             \n"
-"      hist_max.x = hist[i].x;                                              \n"
-"    if(hist_max.y < hist[i].y)                                             \n"
-"      hist_max.y = hist[i].y;                                              \n"
-"    if(hist_max.z < hist[i].z)                                             \n"
-"      hist_max.z = hist[i].z;                                              \n"
-"    if(hist_max.w < hist[i].w)                                             \n"
-"      hist_max.w = hist[i].w;                                              \n"
-"  }                                                                        \n"
-"  float4 div = 0.0;                                                        \n"
-"  float4 sum = 0.0;                                                        \n"
-"  float4 ratio, weight;                                                    \n"
-"  for (i = 0; i < intensities; i++)                                        \n"
-"  {                                                                        \n"
-"    ratio = hist[i] / hist_max;                                            \n"
-"    weight = pow(ratio, (float4)exponent);                                 \n"
-"    sum += weight * (float4)i;                                             \n"
-"    div += weight;                                                         \n"
-"  }                                                                        \n"
-"  out[gidx + gidy * dst_width] = sum / div / (float)(intensities - 1);     \n"
-"}                                                                          \n"
-"                                                                           \n"
-"kernel void kernel_oilify_inten(global float4 *in,                         \n"
-"                             global float4 *out,                           \n"
-"                             const int mask_radius,                        \n"
-"                             const int intensities,                        \n"
-"                             const float exponent)                         \n"
-"{                                                                          \n"
-"  int gidx = get_global_id(0);                                             \n"
-"  int gidy = get_global_id(1);                                             \n"
-"  int x = gidx + mask_radius;                                              \n"
-"  int y = gidy + mask_radius;                                              \n"
-"  int dst_width = get_global_size(0);                                      \n"
-"  int src_width = dst_width + mask_radius * 2;                             \n"
-"  float4 cumulative_rgb[NUM_INTENSITIES];                                  \n"
-"  int hist_inten[NUM_INTENSITIES], inten_max;                              \n"
-"  int i, j, intensity;                                                     \n"
-"  int radius_sq = mask_radius * mask_radius;                               \n"
-"  float4 temp_pixel;                                                       \n"
-"  for (i = 0; i < intensities; i++)                                        \n"
-"  {                                                                        \n"
-"    hist_inten[i] = 0;                                                     \n"
-"    cumulative_rgb[i] = 0.0;                                               \n"
-"  }                                                                        \n"
-"  for (i = -mask_radius; i <= mask_radius; i++)                            \n"
-"  {                                                                        \n"
-"    for (j = -mask_radius; j <= mask_radius; j++)                          \n"
-"      {                                                                    \n"
-"        if (i*i + j*j <= radius_sq)                                        \n"
-"          {                                                                \n"
-"            temp_pixel = in[x + i + (y + j) * src_width];                  \n"
-"            /*Calculate intensity on the fly, GPU does it fast*/           \n"
-"            intensity = (int)((0.299 * temp_pixel.x                        \n"
-"                      +0.587 * temp_pixel.y                                \n"
-"                      +0.114 * temp_pixel.z) * (float)(intensities-1));    \n"
-"            hist_inten[intensity] += 1;                                    \n"
-"            cumulative_rgb[intensity] += temp_pixel;                       \n"
-"          }                                                                \n"
-"      }                                                                    \n"
-"  }                                                                        \n"
-"  inten_max = 1;                                                           \n"
-"                                                                           \n"
-"  /* calculated maximums */                                                \n"
-"  for (i = 0; i < intensities; i++) {                                      \n"
-"    if(hist_inten[i] > inten_max)                                          \n"
-"      inten_max = hist_inten[i];                                           \n"
-"  }                                                                        \n"
-"  float div = 0.0;                                                         \n"
-"  float ratio, weight, mult_inten;                                         \n"
-"                                                                           \n"
-"  float4 color = 0.0;                                                      \n"
-"  for (i = 0; i < intensities; i++)                                        \n"
-"  {                                                                        \n"
-"    if (hist_inten[i] > 0)                                                 \n"
-"    {                                                                      \n"
-"      ratio = (float)(hist_inten[i]) / (float)(inten_max);                 \n"
-"      weight = pow(ratio, exponent);                                       \n"
-"      mult_inten = weight / (float)(hist_inten[i]);                        \n"
-"                                                                           \n"
-"      div += weight;                                                       \n"
-"      color += mult_inten * cumulative_rgb[i];                             \n"
-"    }                                                                      \n"
-"  }                                                                        \n"
-"  out[gidx + gidy * dst_width] = color/div;                                \n"
-"}                                                                          \n";
-
+#include "opencl/oilify.cl.h"
 
 static GeglClRunData *cl_data = NULL;
 
-static cl_int
+static gboolean
 cl_oilify (cl_mem              in_tex,
            cl_mem              out_tex,
            size_t              global_worksize,
@@ -416,12 +292,6 @@ cl_oilify (cl_mem              in_tex,
            gint                exponent,
            gboolean            use_inten)
 {
-  if (!cl_data)
-    {
-      const char *kernel_name[] = {"kernel_oilify", "kernel_oilify_inten", NULL};
-      cl_data = gegl_cl_compile_and_build(kernel_source, kernel_name);
-    }
-  if (!cl_data)  return 0;
 
   const size_t gbl_size[2] = {roi->width,roi->height};
   cl_int radius      = mask_radius;
@@ -430,21 +300,35 @@ cl_oilify (cl_mem              in_tex,
   cl_int cl_err      = 0;
   gint arg = 0;
 
+  if (!cl_data)
+    {
+      const char *kernel_name[] = {"kernel_oilify", "kernel_oilify_inten", NULL};
+      cl_data = gegl_cl_compile_and_build(oilify_cl_source, kernel_name);
+    }
+  if (!cl_data)  return TRUE;
+
   /* simple hack: select suitable kernel using boolean, 0 - no intensity mode, 1 - intensity mode */
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_mem), (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_mem), (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_int), (void*)&radius);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_int), (void*)&intensities);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_float), (void*)&exp);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_mem),   (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_mem),   (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_int),   (void*)&radius);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_int),   (void*)&intensities);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[use_inten], arg++, sizeof(cl_float), (void*)&exp);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(),
                                        cl_data->kernel[use_inten], 2,
                                        NULL, gbl_size, NULL,
                                        0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
 
-  return CL_SUCCESS;
+  return FALSE;
+
+error:
+  return TRUE;
 }
 
 static gboolean
@@ -457,33 +341,32 @@ cl_process (GeglOperation       *operation,
   const Babl *out_format = gegl_operation_get_format (operation, "output");
   gint err;
   gint j;
-  cl_int cl_err;
 
-  GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
   GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
 
   GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,result, out_format, GEGL_CL_BUFFER_WRITE);
                 gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ,
                                                            o->mask_radius, o->mask_radius, o->mask_radius, o->mask_radius, GEGL_ABYSS_NONE);
   while (gegl_buffer_cl_iterator_next (i, &err))
-  {
-    if (err) return FALSE;
-    for (j=0; j < i->n; j++)
     {
-      cl_err = cl_oilify(i->tex[read][j],
-                         i->tex[0][j],
-                         i->size[0][j],&i->roi[0][j],
-                         o->mask_radius,
-                         o->intensities,
-                         o->exponent,
-                         o->use_inten);
-      if (cl_err != CL_SUCCESS)
-      {
-        g_warning("[OpenCL] Error in gegl:oilify: %s", gegl_cl_errstring(cl_err));
-        return FALSE;
-      }
+      if (err) return FALSE;
+      for (j=0; j < i->n; j++)
+        {
+          err = cl_oilify(i->tex[read][j],
+                          i->tex[0][j],
+                          i->size[0][j],&i->roi[0][j],
+                          o->mask_radius,
+                          o->intensities,
+                          o->exponent,
+                          o->use_inten);
+          if (err)
+            {
+              g_warning("[OpenCL] Error in gegl:oilify");
+              return FALSE;
+            }
+        }
     }
-  }
+
   return TRUE;
 }
 
diff --git a/operations/common/opacity.c b/operations/common/opacity.c
index dec9d80..3314e7f 100644
--- a/operations/common/opacity.c
+++ b/operations/common/opacity.c
@@ -214,8 +214,7 @@ cl_process (GeglOperation       *op,
       const char *kernel_name[] = {"gegl_opacity_RaGaBaA_float", "gegl_opacity_RGBA_float", NULL};
       cl_data = gegl_cl_compile_and_build (opacity_cl_source, kernel_name);
     }
-
-  if (!cl_data) return FALSE;
+  if (!cl_data) return TRUE;
 
   kernel = (GEGL_CHANT_PROPERTIES (op)->chant_data != NULL);
 
@@ -232,10 +231,10 @@ cl_process (GeglOperation       *op,
                                         0, NULL, NULL);
   CL_CHECK;
 
-  return TRUE;
+  return FALSE;
 
 error:
-  return FALSE;
+  return TRUE;
 }
 
 /* Fast path when opacity is a no-op
@@ -282,6 +281,8 @@ gegl_chant_class_init (GeglChantClass *klass)
   point_composer_class->process = process;
   point_composer_class->cl_process = cl_process;
 
+  operation_class->opencl_support = TRUE;
+
   gegl_operation_class_set_keys (operation_class,
     "name"       , "gegl:opacity",
     "categories" , "transparency",
diff --git a/operations/common/pixelize.c b/operations/common/pixelize.c
index 57c5e03..4292b8c 100644
--- a/operations/common/pixelize.c
+++ b/operations/common/pixelize.c
@@ -135,59 +135,11 @@ pixelize (gfloat* buf,
 #include "opencl/gegl-cl.h"
 #include "buffer/gegl-buffer-cl-iterator.h"
 
-static const char* kernel_source =
-"__kernel void calc_block_color(__global float4 *in,                   \n"
-"                             __global float4 *out,                    \n"
-"                             int xsize,                               \n"
-"                             int ysize,                               \n"
-"                             int roi_x,                               \n"
-"                             int roi_y,                               \n"
-"                             int line_width,                          \n"
-"                             int block_count_x )                      \n"
-"{                                                                     \n"
-"    int gidx = get_global_id(0);                                      \n"
-"    int gidy = get_global_id(1);                                      \n"
-"    int cx = roi_x / xsize + gidx;                                    \n"
-"    int cy = roi_y / ysize + gidy;                                    \n"
-"                                                                      \n"
-"    float weight   = 1.0f / (xsize * ysize);                          \n"
-"                                                                      \n"
-"    int px = cx * xsize + xsize - roi_x;                              \n"
-"    int py = cy * ysize + ysize - roi_y;                              \n"
-"                                                                      \n"
-"    int i,j;                                                          \n"
-"    float4 col = 0.0f;                                                \n"
-"    for (j = py;j < py + ysize; ++j)                                  \n"
-"    {                                                                 \n"
-"        for (i = px;i < px + xsize; ++i)                              \n"
-"        {                                                             \n"
-"            col += in[j * line_width + i];                            \n"
-"        }                                                             \n"
-"    }                                                                 \n"
-"    out[gidy * block_count_x + gidx] = col * weight;                  \n"
-"                                                                      \n"
-"}                                                                     \n"
-"                                                                      \n"
-"__kernel void kernel_pixelise (__global float4 *in,                   \n"
-"                             __global float4 *out,                    \n"
-"                             int xsize,                               \n"
-"                             int ysize,                               \n"
-"                             int roi_x,                               \n"
-"                             int roi_y,                               \n"
-"                             int block_count_x)                       \n"
-"{                                                                     \n"
-"    int gidx = get_global_id(0);                                      \n"
-"    int gidy = get_global_id(1);                                      \n"
-"                                                                      \n"
-"    int src_width  = get_global_size(0);                              \n"
-"    int cx = (gidx + roi_x) / xsize - roi_x / xsize;                  \n"
-"    int cy = (gidy + roi_y) / ysize - roi_y / ysize;                  \n"
-"    out[gidx + gidy * src_width] = in[cx + cy * block_count_x];       \n"
-"}                                                                     \n";
+#include "opencl/pixelize.cl.h"
 
 static GeglClRunData *cl_data = NULL;
 
-static cl_int
+static gboolean
 cl_pixelise (cl_mem                in_tex,
              cl_mem                aux_tex,
              cl_mem                out_tex,
@@ -205,45 +157,62 @@ cl_pixelise (cl_mem                in_tex,
   gint block_count_y = CELL_Y(roi->y+roi->height - 1, ysize)-cy0 + 1;
   cl_int line_width = roi->width + 2 * xsize;
 
-  size_t gbl_size_tmp[2]={block_count_x,block_count_y};
+  size_t gbl_size_tmp[2] = {block_count_x,block_count_y};
 
   if (!cl_data)
   {
     const char *kernel_name[] = {"calc_block_color", "kernel_pixelise", NULL};
-    cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+    cl_data = gegl_cl_compile_and_build (pixelize_cl_source, kernel_name);
   }
 
   if (!cl_data) return 1;
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&aux_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int),   (void*)&xsize);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int),   (void*)&ysize);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int),   (void*)&roi->x);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int),   (void*)&roi->y);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_int),   (void*)&line_width);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int),   (void*)&block_count_x);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&aux_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int),   (void*)&xsize);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int),   (void*)&ysize);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int),   (void*)&roi->x);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int),   (void*)&roi->y);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 6, sizeof(cl_int),   (void*)&line_width);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 7, sizeof(cl_int),   (void*)&block_count_x);
+  CL_CHECK;
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                         cl_data->kernel[0], 2,
                                         NULL, gbl_size_tmp, NULL,
                                         0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
-
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),   (void*)&aux_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),   (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_int),   (void*)&xsize);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_int),   (void*)&ysize);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 4, sizeof(cl_int),   (void*)&roi->x);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 5, sizeof(cl_int),   (void*)&roi->y);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 6, sizeof(cl_int),   (void*)&block_count_x);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
+
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),   (void*)&aux_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),   (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_int),   (void*)&xsize);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_int),   (void*)&ysize);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 4, sizeof(cl_int),   (void*)&roi->x);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 5, sizeof(cl_int),   (void*)&roi->y);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 6, sizeof(cl_int),   (void*)&block_count_x);
+  CL_CHECK;
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                         cl_data->kernel[1], 2,
                                         NULL, gbl_size, NULL,
                                         0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
-  return cl_err;
+  CL_CHECK;
+
+  return FALSE;
+
+error:
+  return TRUE;
 }
 
 static gboolean
@@ -255,8 +224,8 @@ cl_process (GeglOperation       *operation,
   const Babl *in_format  = gegl_operation_get_format (operation, "input");
   const Babl *out_format = gegl_operation_get_format (operation, "output");
   gint err;
-  gint j;
   cl_int cl_err;
+  gint j;
 
   GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
   GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
@@ -265,18 +234,18 @@ cl_process (GeglOperation       *operation,
                 gint read = gegl_buffer_cl_iterator_add_2 (i, input, roi, in_format,  GEGL_CL_BUFFER_READ, op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
                 gint aux  = gegl_buffer_cl_iterator_add_2 (i, NULL,  roi, in_format,  GEGL_CL_BUFFER_AUX,  op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
   while (gegl_buffer_cl_iterator_next (i, &err))
-  {
-    if (err) return FALSE;
-    for (j=0; j < i->n; j++)
     {
-      cl_err = cl_pixelise(i->tex[read][j], i->tex[aux][j], i->tex[0][j],&i->roi[read][j], &i->roi[0][j], o->size_x,o->size_y);
-      if (cl_err != CL_SUCCESS)
-      {
-        g_warning("[OpenCL] Error in gegl:pixelise: %s", gegl_cl_errstring(cl_err));
-        return FALSE;
-      }
+      if (err) return FALSE;
+      for (j=0; j < i->n; j++)
+        {
+          err = cl_pixelise(i->tex[read][j], i->tex[aux][j], i->tex[0][j],&i->roi[read][j], &i->roi[0][j], o->size_x, o->size_y);
+          if (cl_err != CL_SUCCESS)
+            {
+              g_warning("[OpenCL] Error in gegl:pixelize");
+              return FALSE;
+            }
+        }
     }
-  }
   return TRUE;
 }
 
diff --git a/operations/common/snn-mean.c b/operations/common/snn-mean.c
index 4a0877e..b76d56b 100644
--- a/operations/common/snn-mean.c
+++ b/operations/common/snn-mean.c
@@ -222,127 +222,11 @@ snn_mean (GeglBuffer          *src,
 #include "opencl/gegl-cl.h"
 #include "buffer/gegl-buffer-cl-iterator.h"
 
-static const char* kernel_source =
-"float colordiff (float4 pixA,                                         \n"
-"                 float4 pixB)                                         \n"
-"{                                                                     \n"
-"    float4 pix = pixA-pixB;                                           \n"
-"    pix *= pix;                                                       \n"
-"    return pix.x+pix.y+pix.z;                                         \n"
-"}                                                                     \n"
-"                                                                      \n"
-"__kernel void snn_mean_CL (__global const   float4 *src_buf,          \n"
-"                                            int src_width,            \n"
-"                                            int src_height,           \n"
-"                           __global         float4 *dst_buf,          \n"
-"                                            int radius,               \n"
-"                                            int pairs)                \n"
-"{                                                                     \n"
-"    int gidx   =get_global_id(0);                                     \n"
-"    int gidy   =get_global_id(1);                                     \n"
-"    int offset =gidy * get_global_size(0) + gidx;                     \n"
-"                                                                      \n"
-"    __global const float4 *center_pix=                                \n"
-"        src_buf + ((radius+gidx) + (gidy+radius)* src_width);         \n"
-"    float4 accumulated=0;                                             \n"
-"                                                                      \n"
-"    int count=0;                                                      \n"
-"    if(pairs==2)                                                      \n"
-"    {                                                                 \n"
-"        for(int i=-radius;i<0;i++)                                    \n"
-"        {                                                             \n"
-"            for(int j=-radius;j<0;j++)                                \n"
-"            {                                                         \n"
-"                __global const float4 *selected_pix = center_pix;     \n"
-"                float  best_diff = 1000.0f;                           \n"
-"                                                                      \n"
-"                    int xs[4]={                                       \n"
-"                        gidx+j+radius, gidx-j+radius,                 \n"
-"                        gidx-j+radius, gidx+j+radius                  \n"
-"                    };                                                \n"
-"                    int ys[4]={                                       \n"
-"                        gidy+i+radius, gidy-i+radius,                 \n"
-"                        gidy+i+radius, gidy-i+radius};                \n"
-"                                                                      \n"
-"                    for (int k=0;k<4;k++)                             \n"
-"                    {                                                 \n"
-"                        if (xs[k] >= 0 && xs[k] < src_width &&        \n"
-"                            ys[k] >= 0 && ys[k] < src_height)         \n"
-"                        {                                             \n"
-"                            __global const float4 *tpix =             \n"
-"                                src_buf + (xs[k] + ys[k] * src_width);\n"
-"                            float diff=colordiff(*tpix, *center_pix); \n"
-"                            if (diff < best_diff)                     \n"
-"                            {                                         \n"
-"                                best_diff = diff;                     \n"
-"                                selected_pix = tpix;                  \n"
-"                            }                                         \n"
-"                        }                                             \n"
-"                    }                                                 \n"
-"                                                                      \n"
-"                accumulated += *selected_pix;                         \n"
-"                                                                      \n"
-"                ++count;                                              \n"
-"                if (i==0 && j==0)                                     \n"
-"                    break;                                            \n"
-"            }                                                         \n"
-"        }                                                             \n"
-"        dst_buf[offset] = accumulated/count;                          \n"
-"        return;                                                       \n"
-"    }                                                                 \n"
-"    else if(pairs==1)                                                 \n"
-"    {                                                                 \n"
-"        for(int i=-radius;i<=0;i++)                                   \n"
-"        {                                                             \n"
-"            for(int j=-radius;j<=radius;j++)                          \n"
-"            {                                                         \n"
-"                __global const float4 *selected_pix = center_pix;     \n"
-"                float  best_diff = 1000.0f;                           \n"
-"                                                                      \n"
-"                /* skip computations for the center pixel */          \n"
-"                if (i != 0 && j != 0)                                 \n"
-"                {                                                     \n"
-"                    int xs[4]={                                       \n"
-"                        gidx+i+radius, gidx-i+radius,                 \n"
-"                        gidx-i+radius, gidx+i+radius                  \n"
-"                    };                                                \n"
-"                    int ys[4]={                                       \n"
-"                        gidy+j+radius, gidy-j+radius,                 \n"
-"                        gidy+j+radius, gidy-j+radius                  \n"
-"                    };                                                \n"
-"                                                                      \n"
-"                    for (i=0;i<2;i++)                                 \n"
-"                    {                                                 \n"
-"                        if (xs[i] >= 0 && xs[i] < src_width &&        \n"
-"                            ys[i] >= 0 && ys[i] < src_height)         \n"
-"                        {                                             \n"
-"                            __global const float4 *tpix =             \n"
-"                                src_buf + (xs[i] + ys[i] * src_width);\n"
-"                            float diff=colordiff (*tpix, *center_pix);\n"
-"                            if (diff < best_diff)                     \n"
-"                            {                                         \n"
-"                                best_diff = diff;                     \n"
-"                                selected_pix = tpix;                  \n"
-"                            }                                         \n"
-"                        }                                             \n"
-"                    }                                                 \n"
-"                }                                                     \n"
-"                accumulated += *selected_pix;                         \n"
-"                ++count;                                              \n"
-"                if (i==0 && j==0)                                     \n"
-"                    break;                                            \n"
-"            }                                                         \n"
-"        }                                                             \n"
-"        dst_buf[offset] = accumulated/count;                          \n"
-"        return;                                                       \n"
-"    }                                                                 \n"
-"    return;                                                           \n"
-"}                                                                     \n";
-
+#include "opencl/snn-mean.cl.h"
 
 static GeglClRunData *cl_data = NULL;
 
-static cl_int
+static gboolean
 cl_snn_mean (cl_mem                in_tex,
              cl_mem                out_tex,
              const GeglRectangle  *src_rect,
@@ -355,31 +239,38 @@ cl_snn_mean (cl_mem                in_tex,
 
   if (!cl_data)
     {
-      const char *kernel_name[] = {"snn_mean_CL", NULL};
-      cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+      const char *kernel_name[] = {"snn_mean", NULL};
+      cl_data = gegl_cl_compile_and_build (snn_mean_cl_source, kernel_name);
     }
-
-  if (!cl_data) return 1;
+  if (!cl_data) return TRUE;
 
 
   global_ws[0] = roi->width;
   global_ws[1] = roi->height;
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int),   (void*)&src_rect->width);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int),   (void*)&src_rect->height);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem),   (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int),   (void*)&radius);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int),   (void*)&pairs);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int),   (void*)&src_rect->width);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int),   (void*)&src_rect->height);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem),   (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int),   (void*)&radius);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int),   (void*)&pairs);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                         cl_data->kernel[0], 2,
                                         NULL, global_ws, NULL,
                                         0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
 
-  return cl_err;
+  return FALSE;
+
+error:
+  return TRUE;
 }
 
 static gboolean
@@ -392,7 +283,6 @@ cl_process (GeglOperation       *operation,
   const Babl *out_format = gegl_operation_get_format (operation, "output");
   gint err;
   gint j;
-  cl_int cl_err;
 
   GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
   GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
@@ -404,10 +294,10 @@ cl_process (GeglOperation       *operation,
       if (err) return FALSE;
       for (j=0; j < i->n; j++)
         {
-          cl_err = cl_snn_mean(i->tex[read][j], i->tex[0][j], &i->roi[read][j], &i->roi[0][j], ceil(o->radius), o->pairs);
-          if (cl_err != CL_SUCCESS)
+          err = cl_snn_mean(i->tex[read][j], i->tex[0][j], &i->roi[read][j], &i->roi[0][j], ceil(o->radius), o->pairs);
+          if (err)
             {
-              g_warning("[OpenCL] Error in gegl:snn-mean: %s", gegl_cl_errstring(cl_err));
+              g_warning("[OpenCL] Error in gegl:snn-mean");
               return FALSE;
             }
         }
diff --git a/operations/common/vignette.c b/operations/common/vignette.c
index 59e711d..71abace 100644
--- a/operations/common/vignette.c
+++ b/operations/common/vignette.c
@@ -82,73 +82,11 @@ static float scale_to_aspect (float scale)
 
 #include "opencl/gegl-cl.h"
 
-static const char* kernel_source =
-"__kernel void vignette_cl (__global const float4 *in,           \n"
-"                           __global       float4 *out,          \n"
-"                                          float4 color,         \n"
-"                                          float  scale,         \n"
-"                                          float  cost,          \n"
-"                                          float  sint,          \n"
-"                                          int    roi_x,         \n"
-"                                          int    roi_y,         \n"
-"                                          int    midx,          \n"
-"                                          int    midy,          \n"
-"                                          int    o_shape,       \n"
-"                                          float  gamma,         \n"
-"                                          float  length,        \n"
-"                                          float  radius0,       \n"
-"                                          float  rdiff)         \n"
-"{                                                               \n"
-"  int gidx = get_global_id(0);                                  \n"
-"  int gidy = get_global_id(1);                                  \n"
-"  int gid = gidx + gidy * get_global_size(0);                   \n"
-"  float strength = 0.0f;                                        \n"
-"  float u,v,costy,sinty;                                        \n"
-"  int x,y;                                                      \n"
-"  x = gidx + roi_x;                                             \n"
-"  y = gidy + roi_y;                                             \n"
-"  sinty = sint * (y-midy) - midx;                               \n"
-"  costy = cost * (y-midy) + midy;                               \n"
-"                                                                \n"
-"  u = cost * (x-midx) - sinty;                                  \n"
-"  v = sint * (x-midx) + costy;                                  \n"
-"                                                                \n"
-"  if (length == 0.0f)                                           \n"
-"    strength = 0.0f;                                            \n"
-"  else                                                          \n"
-"    {                                                           \n"
-"      switch (o_shape)                                          \n"
-"        {                                                       \n"
-"          case 0:                                               \n"
-"          strength = hypot ((u-midx) / scale, v-midy);          \n"
-"          break;                                                \n"
-"                                                                \n"
-"          case 1:                                               \n"
-"          strength = fmax (fabs(u-midx)/scale, fabs(v-midy));   \n"
-"          break;                                                \n"
-"                                                                \n"
-"          case 2:                                               \n"
-"          strength = fabs (u-midx) / scale + fabs(v-midy);      \n"
-"          break;                                                \n"
-"        }                                                       \n"
-"      strength /= length;                                       \n"
-"      strength = (strength-radius0) / rdiff;                    \n"
-"    }                                                           \n"
-"                                                                \n"
-"  if (strength < 0.0f) strength = 0.0f;                         \n"
-"  if (strength > 1.0f) strength = 1.0f;                         \n"
-"                                                                \n"
-"  if (gamma > 0.9999f && gamma < 2.0001f)                       \n"
-"    strength *= strength;                                       \n"
-"  else if (gamma != 1.0f)                                       \n"
-"    strength = pow(strength, gamma);                            \n"
-"                                                                \n"
-"  out[gid] = in[gid]*(1.0f-strength) + color * strength;        \n"
-"}                                                               \n";
+#include "opencl/vignette.cl.h"
 
 static GeglClRunData * cl_data = NULL;
 
-static cl_int
+static gboolean
 cl_process (GeglOperation       *operation,
             cl_mem               in_tex,
             cl_mem               out_tex,
@@ -200,9 +138,9 @@ cl_process (GeglOperation       *operation,
   if (!cl_data)
     {
       const char *kernel_name[] = {"vignette_cl",NULL};
-      cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+      cl_data = gegl_cl_compile_and_build (vignette_cl_source, kernel_name);
     }
-  if (!cl_data) return 1;
+  if (!cl_data) return TRUE;
 
   {
   const size_t gbl_size[2] = {roi->width, roi->height};
@@ -217,31 +155,48 @@ cl_process (GeglOperation       *operation,
   f_color.s[2] = color[2];
   f_color.s[3] = color[3];
 
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0,  sizeof(cl_mem),   (void*)&in_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1,  sizeof(cl_mem),   (void*)&out_tex);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2,  sizeof(cl_float4),(void*)&f_color);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3,  sizeof(cl_float), (void*)&scale);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4,  sizeof(cl_float), (void*)&cost);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5,  sizeof(cl_float), (void*)&sint);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 6,  sizeof(cl_int),   (void*)&roi_x);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 7,  sizeof(cl_int),   (void*)&roi_y);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 8,  sizeof(cl_int),   (void*)&midx);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 9,  sizeof(cl_int),   (void*)&midy);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 10, sizeof(cl_int),   (void*)&shape);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 11, sizeof(cl_float), (void*)&gamma);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 12, sizeof(cl_float), (void*)&length);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 13, sizeof(cl_float), (void*)&radius0);
-  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 14, sizeof(cl_float), (void*)&rdiff);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0,  sizeof(cl_mem),   (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1,  sizeof(cl_mem),   (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2,  sizeof(cl_float4),(void*)&f_color);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3,  sizeof(cl_float), (void*)&scale);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4,  sizeof(cl_float), (void*)&cost);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5,  sizeof(cl_float), (void*)&sint);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 6,  sizeof(cl_int),   (void*)&roi_x);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 7,  sizeof(cl_int),   (void*)&roi_y);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 8,  sizeof(cl_int),   (void*)&midx);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 9,  sizeof(cl_int),   (void*)&midy);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 10, sizeof(cl_int),   (void*)&shape);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 11, sizeof(cl_float), (void*)&gamma);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 12, sizeof(cl_float), (void*)&length);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 13, sizeof(cl_float), (void*)&radius0);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 14, sizeof(cl_float), (void*)&rdiff);
+  CL_CHECK;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                        cl_data->kernel[0], 2,
                                        NULL, gbl_size, NULL,
                                        0, NULL, NULL);
-  if (cl_err != CL_SUCCESS) return cl_err;
+  CL_CHECK;
   }
 
-  return  CL_SUCCESS;
+  return  FALSE;
+
+error:
+  return TRUE;
 }
 
 static gboolean



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]