[gegl] Using opencl buffers instead of Image2D
- From: Ãyvind KolÃs <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] Using opencl buffers instead of Image2D
- Date: Tue, 20 Mar 2012 13:52:24 +0000 (UTC)
commit 6914269cf5cd5c89083774a1b8bfd3f05b3e41c7
Author: Victor Oliveira <victormatheus gmail com>
Date: Tue Jan 31 11:13:58 2012 -0200
Using opencl buffers instead of Image2D
changing memory flags also.
gegl/buffer/gegl-buffer-cl-iterator.c | 156 +++++++++++---------------
gegl/buffer/gegl-buffer-cl-iterator.h | 2 +-
gegl/opencl/gegl-cl-color-kernel.h | 109 +++++++++---------
gegl/opencl/gegl-cl-color.c | 16 ++--
gegl/opencl/gegl-cl-color.h | 2 +-
gegl/operation/gegl-operation-point-filter.h | 2 +-
operations/common/brightness-contrast.c | 23 ++--
7 files changed, 140 insertions(+), 170 deletions(-)
---
diff --git a/gegl/buffer/gegl-buffer-cl-iterator.c b/gegl/buffer/gegl-buffer-cl-iterator.c
index 55a8159..bce2bbe 100644
--- a/gegl/buffer/gegl-buffer-cl-iterator.c
+++ b/gegl/buffer/gegl-buffer-cl-iterator.c
@@ -20,7 +20,7 @@ typedef struct GeglBufferClIterators
{
/* current region of interest */
gint n;
- size_t size [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX][2]; /* length of current data in pixels */
+ size_t size [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX]; /* length of current data in pixels */
cl_mem tex [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];
GeglRectangle roi [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];
@@ -42,9 +42,9 @@ typedef struct GeglBufferClIterators
GeglBuffer *buffer [GEGL_CL_BUFFER_MAX_ITERATORS];
/* buffer->format */
- cl_image_format buf_cl_format [GEGL_CL_BUFFER_MAX_ITERATORS];
+ size_t buf_cl_format_size [GEGL_CL_BUFFER_MAX_ITERATORS];
/* format */
- cl_image_format op_cl_format [GEGL_CL_BUFFER_MAX_ITERATORS];
+ size_t op_cl_format_size [GEGL_CL_BUFFER_MAX_ITERATORS];
gegl_cl_color_op conv [GEGL_CL_BUFFER_MAX_ITERATORS];
@@ -93,8 +93,8 @@ gegl_buffer_cl_iterator_add (GeglBufferClIterator *iterator,
else
i->conv[self] = gegl_cl_color_supported (buffer->format, format);
- gegl_cl_color_babl (buffer->format, &i->buf_cl_format[self], NULL);
- gegl_cl_color_babl (format, &i->op_cl_format [self], NULL);
+ gegl_cl_color_babl (buffer->format, NULL, &i->buf_cl_format_size[self]);
+ gegl_cl_color_babl (format, NULL, &i->op_cl_format_size [self]);
if (self!=0)
{
@@ -137,8 +137,6 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
gint no, j;
cl_int cl_err = 0;
- const size_t origin_zero[3] = {0, 0, 0};
-
if (i->is_finished)
g_error ("%s called on finished buffer iterator", G_STRFUNC);
if (i->iteration_no == 0)
@@ -185,38 +183,36 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
for (j=0; j < i->n; j++)
{
gpointer data;
- size_t pitch;
- const size_t region[3] = {i->roi[no][j].width, i->roi[no][j].height, 1};
/* tile-ize */
if (i->conv[no] == GEGL_CL_COLOR_NOT_SUPPORTED)
{
- data = gegl_clEnqueueMapImage(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
- CL_MAP_READ,
- origin_zero, region, &pitch, NULL,
- 0, NULL, NULL, &cl_err);
+ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
+ CL_MAP_READ,
+ 0, i->size[no][j] * i->op_cl_format_size [no],
+ 0, NULL, NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
/* color conversion using BABL */
- gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->format[no], data, pitch);
+ gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->format[no], data, GEGL_AUTO_ROWSTRIDE);
cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data,
- 0, NULL, NULL);
+ 0, NULL, NULL);
if (cl_err != CL_SUCCESS) CL_ERROR;
}
else
{
- data = gegl_clEnqueueMapImage(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
- CL_MAP_READ,
- origin_zero, region, &pitch, NULL,
- 0, NULL, NULL, &cl_err);
+ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
+ CL_MAP_READ,
+ 0, i->size[no][j] * i->buf_cl_format_size [no],
+ 0, NULL, NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
/* color conversion has already been performed in the GPU */
- gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->buffer[no]->format, data, pitch);
+ gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->buffer[no]->format, data, GEGL_AUTO_ROWSTRIDE);
cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
- 0, NULL, NULL);
+ 0, NULL, NULL);
if (cl_err != CL_SUCCESS) CL_ERROR;
}
}
@@ -254,9 +250,7 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
i->roi_all[i->roi_no+j].width,
i->roi_all[i->roi_no+j].height};
i->roi [no][j] = r;
-
- i->size[no][j][0] = r.width;
- i->size[no][j][1] = r.height;
+ i->size[no][j] = r.width * r.height;
}
if (i->flags[no] == GEGL_CL_BUFFER_READ)
@@ -264,8 +258,6 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
for (j=0; j < i->n; j++)
{
gpointer data;
- size_t pitch;
- const size_t region[3] = {i->roi[no][j].width, i->roi[no][j].height, 1};
/* un-tile */
switch (i->conv[no])
@@ -274,23 +266,21 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
{
g_assert (i->tex_op[no][j] == NULL);
- i->tex_op[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- &i->op_cl_format [no],
- i->roi[no][j].width,
- i->roi[no][j].height,
- 0, NULL, &cl_err);
+ i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+ CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
+ i->size[no][j] * i->op_cl_format_size [no],
+ NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
/* pre-pinned memory */
- data = gegl_clEnqueueMapImage(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
- CL_MAP_WRITE,
- origin_zero, region, &pitch, NULL,
- 0, NULL, NULL, &cl_err);
+ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
+ CL_MAP_WRITE,
+ 0, i->size[no][j] * i->op_cl_format_size [no],
+ 0, NULL, NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
/* color conversion using BABL */
- gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->format[no], data, pitch);
+ gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->format[no], data, GEGL_AUTO_ROWSTRIDE);
i->tex[no][j] = i->tex_op[no][j];
@@ -301,23 +291,21 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
{
g_assert (i->tex_buf[no][j] == NULL);
- i->tex_buf[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- &i->buf_cl_format [no],
- i->roi[no][j].width,
- i->roi[no][j].height,
- 0, NULL, &cl_err);
+ i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+ CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
+ i->size[no][j] * i->buf_cl_format_size [no],
+ NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
/* pre-pinned memory */
- data = gegl_clEnqueueMapImage(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
- CL_MAP_WRITE,
- origin_zero, region, &pitch, NULL,
- 0, NULL, NULL, &cl_err);
+ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
+ CL_MAP_WRITE,
+ 0, i->size[no][j] * i->buf_cl_format_size [no],
+ 0, NULL, NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
/* color conversion will be performed in the GPU later */
- gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->buffer[no]->format, data, pitch);
+ gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->buffer[no]->format, data, GEGL_AUTO_ROWSTRIDE);
cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
0, NULL, NULL);
@@ -332,32 +320,28 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
{
g_assert (i->tex_buf[no][j] == NULL);
- i->tex_buf[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- &i->buf_cl_format [no],
- i->roi[no][j].width,
- i->roi[no][j].height,
- 0, NULL, &cl_err);
+ i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+ CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
+ i->size[no][j] * i->buf_cl_format_size [no],
+ NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
g_assert (i->tex_op[no][j] == NULL);
- i->tex_op[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- &i->op_cl_format [no],
- i->roi[no][j].width,
- i->roi[no][j].height,
- 0, NULL, &cl_err);
+ i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+ CL_MEM_READ_WRITE,
+ i->size[no][j] * i->op_cl_format_size [no],
+ NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
/* pre-pinned memory */
- data = gegl_clEnqueueMapImage(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
- CL_MAP_WRITE,
- origin_zero, region, &pitch, NULL,
- 0, NULL, NULL, &cl_err);
+ data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
+ CL_MAP_WRITE,
+ 0, i->size[no][j] * i->buf_cl_format_size [no],
+ 0, NULL, NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
/* color conversion will be performed in the GPU later */
- gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->buffer[no]->format, data, pitch);
+ gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->buffer[no]->format, data, GEGL_AUTO_ROWSTRIDE);
cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
0, NULL, NULL);
@@ -369,7 +353,7 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
i->buffer[no]->format, i->format[no]);
if (cl_err == FALSE) CL_ERROR;
- i->tex[no][j] = i->tex_buf[no][j];
+ i->tex[no][j] = i->tex_op[no][j];
break;
}
@@ -390,12 +374,10 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
{
g_assert (i->tex_op[no][j] == NULL);
- i->tex_op[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- &i->op_cl_format [no],
- i->roi[no][j].width,
- i->roi[no][j].height,
- 0, NULL, &cl_err);
+ i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+ CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY,
+ i->size[no][j] * i->op_cl_format_size [no],
+ NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
i->tex[no][j] = i->tex_op[no][j];
@@ -407,12 +389,10 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
{
g_assert (i->tex_buf[no][j] == NULL);
- i->tex_buf[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- &i->buf_cl_format [no],
- i->roi[no][j].width,
- i->roi[no][j].height,
- 0, NULL, &cl_err);
+ i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+ CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY,
+ i->size[no][j] * i->buf_cl_format_size [no],
+ NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
i->tex[no][j] = i->tex_buf[no][j];
@@ -424,21 +404,17 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
{
g_assert (i->tex_buf[no][j] == NULL);
- i->tex_buf[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- &i->buf_cl_format [no],
- i->roi[no][j].width,
- i->roi[no][j].height,
- 0, NULL, &cl_err);
+ i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+ CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY,
+ i->size[no][j] * i->buf_cl_format_size [no],
+ NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
g_assert (i->tex_op[no][j] == NULL);
- i->tex_op[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
- CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
- &i->op_cl_format [no],
- i->roi[no][j].width,
- i->roi[no][j].height,
- 0, NULL, &cl_err);
+ i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+ CL_MEM_READ_WRITE,
+ i->size[no][j] * i->op_cl_format_size [no],
+ NULL, &cl_err);
if (cl_err != CL_SUCCESS) CL_ERROR;
i->tex[no][j] = i->tex_op[no][j];
diff --git a/gegl/buffer/gegl-buffer-cl-iterator.h b/gegl/buffer/gegl-buffer-cl-iterator.h
index 5558f9f..9ccbf45 100644
--- a/gegl/buffer/gegl-buffer-cl-iterator.h
+++ b/gegl/buffer/gegl-buffer-cl-iterator.h
@@ -16,7 +16,7 @@ enum
typedef struct GeglBufferClIterator
{
gint n;
- size_t size [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX][2]; /* length of current data in pixels */
+ size_t size [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX]; /* length of current data in pixels */
cl_mem tex [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];
GeglRectangle roi [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];
} GeglBufferClIterator;
diff --git a/gegl/opencl/gegl-cl-color-kernel.h b/gegl/opencl/gegl-cl-color-kernel.h
index b31f6f0..898be52 100644
--- a/gegl/opencl/gegl-cl-color-kernel.h
+++ b/gegl/opencl/gegl-cl-color-kernel.h
@@ -22,118 +22,115 @@ static const char* kernel_color_source =
" return value / 12.92f; \n"
"} \n"
" \n"
-"__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | \n"
-" CLK_ADDRESS_NONE | \n"
-" CLK_FILTER_NEAREST; \n"
" \n"
"/* RGBA float -> RaGaBaA float */ \n"
-"__kernel void non_premultiplied_to_premultiplied (__read_only image2d_t in, \n"
-" __write_only image2d_t out) \n"
+"__kernel void non_premultiplied_to_premultiplied (__global const float4 * in, \n"
+" __global float4 * out) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
" float4 out_v; \n"
" out_v = in_v * in_v.w; \n"
" out_v.w = in_v.w; \n"
-" write_imagef(out, gid, out_v); \n"
+" out[gid] = out_v; \n"
"} \n"
" \n"
"/* RaGaBaA float -> RGBA float */ \n"
-"__kernel void premultiplied_to_non_premultiplied (__read_only image2d_t in, \n"
-" __write_only image2d_t out) \n"
+"__kernel void premultiplied_to_non_premultiplied (__global const float4 * in, \n"
+" __global float4 * out) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
" float4 out_v; \n"
" out_v = (in_v.w > BABL_ALPHA_THRESHOLD)? in_v / in_v.w : (float4)(0.0f); \n"
" out_v.w = in_v.w; \n"
-" write_imagef(out, gid, out_v); \n"
+" out[gid] = out_v; \n"
"} \n"
" \n"
"/* RGBA float -> R'G'B'A float */ \n"
-"__kernel void rgba2rgba_gamma_2_2 (__read_only image2d_t in, \n"
-" __write_only image2d_t out) \n"
+"__kernel void rgba2rgba_gamma_2_2 (__global const float4 * in, \n"
+" __global float4 * out) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
" float4 out_v; \n"
" out_v = (float4)(linear_to_gamma_2_2(in_v.x), \n"
" linear_to_gamma_2_2(in_v.y), \n"
" linear_to_gamma_2_2(in_v.z), \n"
" in_v.w); \n"
-" write_imagef(out, gid, out_v); \n"
+" out[gid] = out_v; \n"
"} \n"
" \n"
"/* R'G'B'A float -> RGBA float */ \n"
-"__kernel void rgba_gamma_2_22rgba (__read_only image2d_t in, \n"
-" __write_only image2d_t out) \n"
+"__kernel void rgba_gamma_2_22rgba (__global const float4 * in, \n"
+" __global float4 * out) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
" float4 out_v; \n"
" out_v = (float4)(gamma_2_2_to_linear(in_v.x), \n"
" gamma_2_2_to_linear(in_v.y), \n"
" gamma_2_2_to_linear(in_v.z), \n"
" in_v.w); \n"
-" write_imagef(out, gid, out_v); \n"
+" out[gid] = out_v; \n"
"} \n"
" \n"
"/* RGBA float -> R'aG'aB'aA float */ \n"
-"__kernel void rgba2rgba_gamma_2_2_premultiplied (__read_only image2d_t in, \n"
-" __write_only image2d_t out) \n"
+"__kernel void rgba2rgba_gamma_2_2_premultiplied (__global const float4 * in, \n"
+" __global float4 * out) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
" float4 out_v; \n"
" out_v = (float4)(linear_to_gamma_2_2(in_v.x) * in_v.w, \n"
" linear_to_gamma_2_2(in_v.y) * in_v.w, \n"
" linear_to_gamma_2_2(in_v.z) * in_v.w, \n"
" in_v.w); \n"
-" write_imagef(out, gid, out_v); \n"
+" out[gid] = out_v; \n"
"} \n"
" \n"
"/* R'aG'aB'aA float -> RGBA float */ \n"
-"__kernel void rgba_gamma_2_2_premultiplied2rgba (__read_only image2d_t in, \n"
-" __write_only image2d_t out) \n"
+"__kernel void rgba_gamma_2_2_premultiplied2rgba (__global const float4 * in, \n"
+" __global float4 * out) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
" float4 out_v; \n"
" out_v = (in_v.w > BABL_ALPHA_THRESHOLD)? (float4)(linear_to_gamma_2_2(in_v.x) / in_v.w,\n"
" linear_to_gamma_2_2(in_v.y) / in_v.w,\n"
" linear_to_gamma_2_2(in_v.z) / in_v.w,\n"
" in_v.w) : \n"
" (float4)(0.0f); \n"
-" write_imagef(out, gid, out_v); \n"
+" out[gid] = out_v; \n"
"} \n"
" \n"
"/* RGBA float -> RGBA u8 */ \n"
-"__kernel void rgbaf_to_rgbau8 (__read_only image2d_t in, \n"
-" __write_only image2d_t out) \n"
+"__kernel void rgbaf_to_rgbau8 (__global const float4 * in, \n"
+" __global uchar4 * out) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
-" float4 out_v = in_v; \n"
-" write_imagef(out, gid, out_v); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
+" float4 out_v = in_v * 255.0f; \n"
+" out[gid] = convert_uchar4_sat_rte(out_v); \n"
"} \n"
" \n"
"/* RGBAu8 -> RGBA float */ \n"
-"__kernel void rgbau8_to_rgbaf (__read_only image2d_t in, \n"
-" __write_only image2d_t out) \n"
+"__kernel void rgbau8_to_rgbaf (__global const uchar4 * in, \n"
+" __global float4 * out) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
-" float4 out_v = in_v; \n"
-" write_imagef(out, gid, out_v); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = convert_float4(in[gid]); \n"
+" float4 out_v = in_v / 255.0f; \n"
+" out[gid] = out_v; \n"
"} \n"
" \n"
"/* RGBA float -> Y'CbCrA float */ \n"
" \n"
-"__kernel void rgba_to_ycbcra (__read_only image2d_t in, \n"
-" __write_only image2d_t out) \n"
+"__kernel void rgba_to_ycbcra (__global const float4 * in, \n"
+" __global float4 * out) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
" float4 out_v; \n"
" \n"
" float4 rgb = (float4)(linear_to_gamma_2_2(in_v.x), \n"
@@ -145,16 +142,16 @@ static const char* kernel_color_source =
" -0.168736f * rgb.x - 0.331264f * rgb.y + 0.5f * rgb.z, \n"
" 0.5f * rgb.x - 0.418688f * rgb.y - 0.081312f * rgb.z, \n"
" in_v.w); \n"
-" write_imagef(out, gid, out_v); \n"
+" out[gid] = out_v; \n"
"} \n"
" \n"
"/* Y'CbCrA float -> RGBA float */ \n"
" \n"
-"__kernel void ycbcra_to_rgba (__read_only image2d_t in, \n"
-" __write_only image2d_t out) \n"
+"__kernel void ycbcra_to_rgba (__global const float4 * in, \n"
+" __global float4 * out) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
" float4 out_v; \n"
" \n"
" float4 rgb = (float4)(1.0f * in_v.x + 0.0f * in_v.y + 1.40200f * in_v.z, \n"
@@ -166,5 +163,5 @@ static const char* kernel_color_source =
" linear_to_gamma_2_2(rgb.y), \n"
" linear_to_gamma_2_2(rgb.z), \n"
" in_v.w); \n"
-" write_imagef(out, gid, out_v); \n"
-"} \n";
+" out[gid] = out_v; \n"
+"} \n";
\ No newline at end of file
diff --git a/gegl/opencl/gegl-cl-color.c b/gegl/opencl/gegl-cl-color.c
index bb9de3c..40fc574 100644
--- a/gegl/opencl/gegl-cl-color.c
+++ b/gegl/opencl/gegl-cl-color.c
@@ -130,7 +130,7 @@ gegl_cl_color_supported (const Babl *in_format, const Babl *out_format)
#define CL_ERROR {g_printf("[OpenCL] Error in %s:%d %s - %s\n", __FILE__, __LINE__, __func__, gegl_cl_errstring(errcode)); return FALSE;}
gboolean
-gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
+gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size,
const Babl *in_format, const Babl *out_format)
{
int errcode;
@@ -140,13 +140,13 @@ gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
if (in_format == out_format)
{
- const size_t origin[3] = {0, 0, 0};
- const size_t region[3] = {size[0], size[1], 1};
+ size_t s;
+ gegl_cl_color_babl (in_format, NULL, &s);
/* just copy in_tex to out_tex */
- errcode = gegl_clEnqueueCopyImage (gegl_cl_get_command_queue(),
- in_tex, out_tex, origin, origin, region,
- 0, NULL, NULL);
+ errcode = gegl_clEnqueueCopyBuffer (gegl_cl_get_command_queue(),
+ in_tex, out_tex, 0, 0, size * s,
+ 0, NULL, NULL);
if (errcode != CL_SUCCESS) CL_ERROR
errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
@@ -163,8 +163,8 @@ gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
if (errcode != CL_SUCCESS) CL_ERROR
errcode = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
- kernels_color->kernel[k], 2,
- NULL, size, NULL,
+ kernels_color->kernel[k], 1,
+ NULL, &size, NULL,
0, NULL, NULL);
if (errcode != CL_SUCCESS) CL_ERROR
diff --git a/gegl/opencl/gegl-cl-color.h b/gegl/opencl/gegl-cl-color.h
index 59b04b6..797ed6c 100644
--- a/gegl/opencl/gegl-cl-color.h
+++ b/gegl/opencl/gegl-cl-color.h
@@ -17,7 +17,7 @@ gboolean gegl_cl_color_babl (const Babl *buffer_format, cl_image_format *cl_form
gegl_cl_color_op gegl_cl_color_supported (const Babl *in_format, const Babl *out_format);
-gboolean gegl_cl_color_conv (cl_mem in_tex, cl_mem aux_tex, const size_t size[2],
+gboolean gegl_cl_color_conv (cl_mem in_tex, cl_mem aux_tex, const size_t size,
const Babl *in_format, const Babl *out_format);
#endif
diff --git a/gegl/operation/gegl-operation-point-filter.h b/gegl/operation/gegl-operation-point-filter.h
index 3e80856..f42677f 100644
--- a/gegl/operation/gegl-operation-point-filter.h
+++ b/gegl/operation/gegl-operation-point-filter.h
@@ -62,7 +62,7 @@ struct _GeglOperationPointFilterClass
cl_int (* cl_process) (GeglOperation *self,
cl_mem in_tex,
cl_mem out_tex,
- const size_t global_worksize[2],
+ size_t global_worksize,
const GeglRectangle *roi);
};
diff --git a/operations/common/brightness-contrast.c b/operations/common/brightness-contrast.c
index 3501af5..ce8b207 100644
--- a/operations/common/brightness-contrast.c
+++ b/operations/common/brightness-contrast.c
@@ -107,20 +107,17 @@ process (GeglOperation *op,
#include "opencl/gegl-cl.h"
static const char* kernel_source =
-"__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | \n"
-" CLK_ADDRESS_NONE | \n"
-" CLK_FILTER_NEAREST; \n"
-"__kernel void kernel_bc(__read_only image2d_t in, \n"
-" __write_only image2d_t out, \n"
-" float brightness, \n"
-" float contrast) \n"
+"__kernel void kernel_bc(__global const float4 *in, \n"
+" __global float4 *out, \n"
+" float brightness, \n"
+" float contrast) \n"
"{ \n"
-" int2 gid = (int2)(get_global_id(0), get_global_id(1)); \n"
-" float4 in_v = read_imagef(in, sampler, gid); \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
" float4 out_v; \n"
" out_v.xyz = (in_v.xyz - 0.5f) * contrast + brightness + 0.5f;\n"
" out_v.w = in_v.w; \n"
-" write_imagef(out, gid, out_v); \n"
+" out[gid] = out_v; \n"
"} \n";
static gegl_cl_run_data *cl_data = NULL;
@@ -130,7 +127,7 @@ static cl_int
cl_process (GeglOperation *op,
cl_mem in_tex,
cl_mem out_tex,
- const size_t global_worksize[2],
+ size_t global_worksize,
const GeglRectangle *roi)
{
/* Retrieve a pointer to GeglChantO structure which contains all the
@@ -159,8 +156,8 @@ cl_process (GeglOperation *op,
if (cl_err != CL_SUCCESS) return cl_err;
cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
- cl_data->kernel[0], 2,
- NULL, global_worksize, NULL,
+ cl_data->kernel[0], 1,
+ NULL, &global_worksize, NULL,
0, NULL, NULL);
if (cl_err != CL_SUCCESS) return cl_err;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]