[gegl/openmp: 1/5] 2x2-downscale: unroll and add openmp
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl/openmp: 1/5] 2x2-downscale: unroll and add openmp
- Date: Wed, 4 Jun 2014 07:57:55 +0000 (UTC)
commit 827c494bd7267e1cdf39a380a49ab8d0f062ec8e
Author: Øyvind Kolås <pippin gimp org>
Date: Wed Jun 4 09:32:05 2014 +0200
2x2-downscale: unroll and add openmp
gegl/gegl-algorithms-2x2-downscale.inc | 256 +++++++++++++++++++++++++++++---
1 files changed, 237 insertions(+), 19 deletions(-)
---
diff --git a/gegl/gegl-algorithms-2x2-downscale.inc b/gegl/gegl-algorithms-2x2-downscale.inc
index 5f5ba63..d725997 100644
--- a/gegl/gegl-algorithms-2x2-downscale.inc
+++ b/gegl/gegl-algorithms-2x2-downscale.inc
@@ -1,3 +1,6 @@
+#include "gegl-init.h"
+#include "gegl-config.h"
+
void
DOWNSCALE_FUNCNAME (gint bpp,
gint src_width,
@@ -8,36 +11,251 @@ DOWNSCALE_FUNCNAME (gint bpp,
gint dst_rowstride)
{
gint y;
+ gint diag = src_rowstride + bpp;
const gint components = bpp / sizeof(DOWNSCALE_TYPE);
if (!src_data || !dst_data)
return;
- for (y = 0; y < src_height / 2; y++)
- {
- gint x;
- guchar *src = src_data;
- guchar *dst = dst_data;
+#ifdef HAVE_OPENMP
+ if (gegl_config()->use_openmp)
+ switch (components)
+ {
+ case 1:
+#pragma omp parallel for
+ for (y = 0; y < src_height / 2; y++)
+ {
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
+
+ for (x = 0; x < src_width / 2; x++)
+ {
+ DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+ DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+ DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+ DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+ ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ }
+ break;
+ case 2:
+#pragma omp parallel for
+ for (y = 0; y < src_height / 2; y++)
+ {
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
+
+ for (x = 0; x < src_width / 2; x++)
+ {
+ DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+ DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+ DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+ DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+ ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ }
+ break;
+ case 3:
+#pragma omp parallel for
+ for (y = 0; y < src_height / 2; y++)
+ {
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
+
+ for (x = 0; x < src_width / 2; x++)
+ {
+ DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+ DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+ DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+ DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+ ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[2] = (aa[2] + ab[2] + ba[2] + bb[2]) / DOWNSCALE_DIVISOR;
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ }
+ break;
+ case 4:
+#pragma omp parallel for
+ for (y = 0; y < src_height / 2; y++)
+ {
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
+
+ for (x = 0; x < src_width / 2; x++)
+ {
+ DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+ DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+ DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+ DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+ ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[2] = (aa[2] + ab[2] + ba[2] + bb[2]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[3] = (aa[3] + ab[3] + ba[3] + bb[3]) / DOWNSCALE_DIVISOR;
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ }
+ break;
+ default:
+#pragma omp parallel for
+ for (y = 0; y < src_height / 2; y++)
+ {
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
+
+ for (x = 0; x < src_width / 2; x++)
+ {
+ gint i;
+ DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+ DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+ DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+ DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+ for (i = 0; i < components; i++)
+ ((DOWNSCALE_TYPE *)dst)[i] = (aa[i] + ab[i] + ba[i] + bb[i]) / DOWNSCALE_DIVISOR;
- for (x = 0; x < src_width / 2; x++)
+ dst += bpp;
+ src += bpp * 2;
+ }
+ }
+ break;
+ }
+#endif
+else
+ switch (components)
+ {
+ case 1:
+ for (y = 0; y < src_height / 2; y++)
{
- gint i;
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
- for (i = 0; i < components; i++)
+ for (x = 0; x < src_width / 2; x++)
{
- DOWNSCALE_SUM aa = ((DOWNSCALE_TYPE *)(src))[i];
- DOWNSCALE_SUM ab = ((DOWNSCALE_TYPE *)(src + bpp))[i];
- DOWNSCALE_SUM ba = ((DOWNSCALE_TYPE *)(src + src_rowstride))[i];
- DOWNSCALE_SUM bb = ((DOWNSCALE_TYPE *)(src + src_rowstride + bpp))[i];
+ DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+ DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+ DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+ DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
- ((DOWNSCALE_TYPE *)dst)[i] = (aa + ab + ba + bb) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+
+ dst += bpp;
+ src += bpp * 2;
}
+ }
+ break;
+ case 2:
+ for (y = 0; y < src_height / 2; y++)
+ {
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
+
+ for (x = 0; x < src_width / 2; x++)
+ {
+ DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+ DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+ DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+ DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+ ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
- dst += bpp;
- src += bpp * 2;
+ dst += bpp;
+ src += bpp * 2;
+ }
}
+ break;
+ case 3:
+ for (y = 0; y < src_height / 2; y++)
+ {
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
- dst_data += dst_rowstride;
- src_data += 2 * src_rowstride;
- }
-}
\ No newline at end of file
+ for (x = 0; x < src_width / 2; x++)
+ {
+ DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+ DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+ DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+ DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+ ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[2] = (aa[2] + ab[2] + ba[2] + bb[2]) / DOWNSCALE_DIVISOR;
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ }
+ break;
+ case 4:
+ for (y = 0; y < src_height / 2; y++)
+ {
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
+
+ for (x = 0; x < src_width / 2; x++)
+ {
+ DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+ DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+ DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+ DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+ ((DOWNSCALE_TYPE *)dst)[0] = (aa[0] + ab[0] + ba[0] + bb[0]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[2] = (aa[2] + ab[2] + ba[2] + bb[2]) / DOWNSCALE_DIVISOR;
+ ((DOWNSCALE_TYPE *)dst)[3] = (aa[3] + ab[3] + ba[3] + bb[3]) / DOWNSCALE_DIVISOR;
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ }
+ break;
+ default:
+ for (y = 0; y < src_height / 2; y++)
+ {
+ gint x;
+ guchar *src = src_data + src_rowstride * y * 2;
+ guchar *dst = dst_data + dst_rowstride * y;
+
+ for (x = 0; x < src_width / 2; x++)
+ {
+ gint i;
+ DOWNSCALE_TYPE * aa = ((DOWNSCALE_TYPE *)(src));
+ DOWNSCALE_TYPE * ab = ((DOWNSCALE_TYPE *)(src + bpp));
+ DOWNSCALE_TYPE * ba = ((DOWNSCALE_TYPE *)(src + src_rowstride));
+ DOWNSCALE_TYPE * bb = ((DOWNSCALE_TYPE *)(src + diag));
+
+ for (i = 0; i < components; i++)
+ ((DOWNSCALE_TYPE *)dst)[i] = (aa[i] + ab[i] + ba[i] + bb[i]) / DOWNSCALE_DIVISOR;
+
+ dst += bpp;
+ src += bpp * 2;
+ }
+ }
+ break;
+ }
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]