[gegl] buffer: add more SIMD variants of buffer resamplers
- From: Øyvind "pippin" Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] buffer: add more SIMD variants of buffer resamplers
- Date: Sun, 16 Jan 2022 07:01:59 +0000 (UTC)
commit 3b84a690e242d360657ec9cd3d7958d7c52902ba
Author: Øyvind Kolås <pippin gimp org>
Date: Sun Jan 16 04:24:26 2022 +0100
buffer: add more SIMD variants of buffer resamplers
This is for scaling for display and generation of mipmaps.
gegl/buffer/gegl-algorithms-2x2-downscale.inc | 2 +-
gegl/buffer/gegl-algorithms-bilinear.inc | 2 +-
gegl/buffer/gegl-algorithms-boxfilter.inc | 2 +-
gegl/buffer/gegl-algorithms-x86-64-v2.c | 4 +
gegl/buffer/gegl-algorithms-x86-64-v3.c | 4 +
gegl/buffer/gegl-algorithms.c | 1102 ++++++++++++-------------
gegl/buffer/gegl-algorithms.h | 159 +---
gegl/buffer/gegl-buffer-private.h | 36 +
gegl/buffer/gegl-buffer.c | 174 ++++
gegl/buffer/gegl-tile-handler-zoom.c | 13 +-
gegl/buffer/meson.build | 15 +
gegl/gegl-init.c | 11 +-
gegl/meson.build | 10 +-
meson.build | 8 +
14 files changed, 830 insertions(+), 712 deletions(-)
---
diff --git a/gegl/buffer/gegl-algorithms-2x2-downscale.inc b/gegl/buffer/gegl-algorithms-2x2-downscale.inc
index e2a8cc786..2946fa436 100644
--- a/gegl/buffer/gegl-algorithms-2x2-downscale.inc
+++ b/gegl/buffer/gegl-algorithms-2x2-downscale.inc
@@ -1,6 +1,6 @@
#define S(a) ((DOWNSCALE_SUM)(a))
-void
+static void
DOWNSCALE_FUNCNAME (const Babl *format,
gint src_width,
gint src_height,
diff --git a/gegl/buffer/gegl-algorithms-bilinear.inc b/gegl/buffer/gegl-algorithms-bilinear.inc
index 36486fcaf..1beb569fb 100644
--- a/gegl/buffer/gegl-algorithms-bilinear.inc
+++ b/gegl/buffer/gegl-algorithms-bilinear.inc
@@ -1,4 +1,4 @@
-void
+static void
BILINEAR_FUNCNAME (guchar *dest_buf,
const guchar *source_buf,
const GeglRectangle *dst_rect,
diff --git a/gegl/buffer/gegl-algorithms-boxfilter.inc b/gegl/buffer/gegl-algorithms-boxfilter.inc
index 1c6f46c7a..fd67dce67 100644
--- a/gegl/buffer/gegl-algorithms-boxfilter.inc
+++ b/gegl/buffer/gegl-algorithms-boxfilter.inc
@@ -1,4 +1,4 @@
-void
+static void
BOXFILTER_FUNCNAME (guchar *dest_buf,
const guchar *source_buf,
const GeglRectangle *dst_rect,
diff --git a/gegl/buffer/gegl-algorithms-x86-64-v2.c b/gegl/buffer/gegl-algorithms-x86-64-v2.c
new file mode 100644
index 000000000..393edd0e3
--- /dev/null
+++ b/gegl/buffer/gegl-algorithms-x86-64-v2.c
@@ -0,0 +1,4 @@
+
+#define GEGL_SIMD_SUFFIX(symbol) symbol##_x86_64_v2
+
+#include "gegl-algorithms.c"
diff --git a/gegl/buffer/gegl-algorithms-x86-64-v3.c b/gegl/buffer/gegl-algorithms-x86-64-v3.c
new file mode 100644
index 000000000..b992476fe
--- /dev/null
+++ b/gegl/buffer/gegl-algorithms-x86-64-v3.c
@@ -0,0 +1,4 @@
+
+#define GEGL_SIMD_SUFFIX(symbol) symbol##_x86_64_v3
+
+#include "gegl-algorithms.c"
diff --git a/gegl/buffer/gegl-algorithms.c b/gegl/buffer/gegl-algorithms.c
index 6545ee97c..5299b771a 100644
--- a/gegl/buffer/gegl-algorithms.c
+++ b/gegl/buffer/gegl-algorithms.c
@@ -31,15 +31,16 @@
#include <math.h>
-void gegl_downscale_2x2 (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride)
+void
+GEGL_SIMD_SUFFIX(gegl_downscale_2x2) (const Babl *format,
+ gint src_width,
+ gint src_height,
+ guchar *src_data,
+ gint src_rowstride,
+ guchar *dst_data,
+ gint dst_rowstride)
{
- gegl_downscale_2x2_get_fun (format)(format, src_width, src_height,
+ GEGL_SIMD_SUFFIX(gegl_downscale_2x2_get_fun) (format)(format, src_width, src_height,
src_data, src_rowstride,
dst_data, dst_rowstride);;
}
@@ -54,93 +55,10 @@ static void inline *align_16 (unsigned char *ret)
return ret;
}
-static void
-gegl_downscale_2x2_generic (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride)
-{
- const Babl *tmp_format = babl_format_with_space ("RGBA float", format);
- const Babl *from_fish = babl_fish (format, tmp_format);
- const Babl *to_fish = babl_fish (tmp_format, format);
- const gint tmp_bpp = 4 * 4;
- gint dst_width = src_width / 2;
- gint dst_height = src_height / 2;
- gint in_tmp_rowstride = src_width * tmp_bpp;
- gint out_tmp_rowstride = dst_width * tmp_bpp;
- gint do_free = 0;
-
- void *in_tmp;
- void *out_tmp;
-
- if (src_height * in_tmp_rowstride + dst_height * out_tmp_rowstride < GEGL_ALLOCA_THRESHOLD)
- {
- in_tmp = align_16 (alloca (src_height * in_tmp_rowstride + 16));
- out_tmp = align_16 (alloca (dst_height * out_tmp_rowstride + 16));
- }
- else
- {
- in_tmp = gegl_scratch_alloc (src_height * in_tmp_rowstride);
- out_tmp = gegl_scratch_alloc (dst_height * out_tmp_rowstride);
- do_free = 1;
- }
-
- babl_process_rows (from_fish,
- src_data, src_rowstride,
- in_tmp, in_tmp_rowstride,
- src_width, src_height);
- gegl_downscale_2x2_float (tmp_format, src_width, src_height,
- in_tmp, in_tmp_rowstride,
- out_tmp, out_tmp_rowstride);
- babl_process_rows (to_fish,
- out_tmp, out_tmp_rowstride,
- dst_data, dst_rowstride,
- dst_width, dst_height);
-
- if (do_free)
- {
- gegl_scratch_free (out_tmp);
- gegl_scratch_free (in_tmp);
- }
-}
-
-#define LUT_DIVISOR 16
-
-static uint16_t lut_u8_to_u16[256];
-static float lut_u8_to_u16f[256];
-static uint8_t lut_u16_to_u8[65536/LUT_DIVISOR];
-
-void _gegl_init_u8_lut (void);
-void _gegl_init_u8_lut (void)
-{
- static int lut_inited = 0;
- uint8_t u8_ramp[256];
- uint16_t u16_ramp[65536/LUT_DIVISOR];
- int i;
-
- if (lut_inited)
- return;
- for (i = 0; i < 256; i++) u8_ramp[i]=i;
- for (i = 0; i < 65536/LUT_DIVISOR; i++) u16_ramp[i]=i * LUT_DIVISOR;
- babl_process (babl_fish (babl_format ("Y' u8"), babl_format("Y u16")),
- &u8_ramp[0], &lut_u8_to_u16[0],
- 256);
- for (i = 0; i < 256; i++)
- {
- lut_u8_to_u16[i] = lut_u8_to_u16[i]/LUT_DIVISOR;
- lut_u8_to_u16f[i] = lut_u8_to_u16[i];
- }
-
- babl_process (babl_fish (babl_format ("Y u16"), babl_format("Y' u8")),
- &u16_ramp[0], &lut_u16_to_u8[0],
- 65536/LUT_DIVISOR);
-
- lut_inited = 1;
-}
+extern uint16_t gegl_lut_u8_to_u16[256];
+extern float gegl_lut_u8_to_u16f[256];
+extern uint8_t gegl_lut_u16_to_u8[65536/GEGL_ALGORITHMS_LUT_DIVISOR];
static void
@@ -239,8 +157,8 @@ gegl_boxfilter_u8_nl (guchar *dest_buf,
const gfloat m = middle_weight;
const gfloat b = bottom_weight;
-#define C(val) lut_u8_to_u16f[(val)]
-#define BOXFILTER_ROUND(val) lut_u16_to_u8[((int)((val)+0.5f))]
+#define C(val) gegl_lut_u8_to_u16f[(val)]
+#define BOXFILTER_ROUND(val) gegl_lut_u16_to_u8[((int)((val)+0.5f))]
#define BOXFILTER_ROUND_ALPHA(val) ((int)((val)+0.5f))
dst[0] = BOXFILTER_ROUND(
(C(src[0][0]) * t + C(src[3][0]) * m + C(src[6][0]) * b) * l +
@@ -482,8 +400,8 @@ gegl_bilinear_u8_nl (guchar *dest_buf,
}\
}while(0)
-#define C(val) lut_u8_to_u16f[(val)]
-#define BILINEAR_ROUND(val) lut_u16_to_u8[((int)((val)+0.5f))]
+#define C(val) gegl_lut_u8_to_u16f[(val)]
+#define BILINEAR_ROUND(val) gegl_lut_u16_to_u8[((int)((val)+0.5f))]
#define BILINEAR_ROUND_ALPHA(val) ((int)((val)+0.5f))
switch (components)
@@ -701,57 +619,57 @@ break;\
switch (components)
{
CASE(1,
- ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
- lut_u8_to_u16[ab[0]] +
- lut_u8_to_u16[ba[0]] +
- lut_u8_to_u16[bb[0]])>>2 ];);
+ ((uint8_t *)dst)[0] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[0]] +
+ gegl_lut_u8_to_u16[ab[0]] +
+ gegl_lut_u8_to_u16[ba[0]] +
+ gegl_lut_u8_to_u16[bb[0]])>>2 ];);
CASE(2,
- ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
- lut_u8_to_u16[ab[0]] +
- lut_u8_to_u16[ba[0]] +
- lut_u8_to_u16[bb[0]])>>2 ];
- ((uint8_t *)dst)[1] = lut_u16_to_u8[ (lut_u8_to_u16[aa[1]] +
- lut_u8_to_u16[ab[1]] +
- lut_u8_to_u16[ba[1]] +
- lut_u8_to_u16[bb[1]])>>2 ];);
+ ((uint8_t *)dst)[0] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[0]] +
+ gegl_lut_u8_to_u16[ab[0]] +
+ gegl_lut_u8_to_u16[ba[0]] +
+ gegl_lut_u8_to_u16[bb[0]])>>2 ];
+ ((uint8_t *)dst)[1] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[1]] +
+ gegl_lut_u8_to_u16[ab[1]] +
+ gegl_lut_u8_to_u16[ba[1]] +
+ gegl_lut_u8_to_u16[bb[1]])>>2 ];);
CASE(3,
- ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
- lut_u8_to_u16[ab[0]] +
- lut_u8_to_u16[ba[0]] +
- lut_u8_to_u16[bb[0]])>>2 ];
- ((uint8_t *)dst)[1] = lut_u16_to_u8[ (lut_u8_to_u16[aa[1]] +
- lut_u8_to_u16[ab[1]] +
- lut_u8_to_u16[ba[1]] +
- lut_u8_to_u16[bb[1]])>>2 ];
- ((uint8_t *)dst)[2] = lut_u16_to_u8[ (lut_u8_to_u16[aa[2]] +
- lut_u8_to_u16[ab[2]] +
- lut_u8_to_u16[ba[2]] +
- lut_u8_to_u16[bb[2]])>>2 ];);
+ ((uint8_t *)dst)[0] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[0]] +
+ gegl_lut_u8_to_u16[ab[0]] +
+ gegl_lut_u8_to_u16[ba[0]] +
+ gegl_lut_u8_to_u16[bb[0]])>>2 ];
+ ((uint8_t *)dst)[1] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[1]] +
+ gegl_lut_u8_to_u16[ab[1]] +
+ gegl_lut_u8_to_u16[ba[1]] +
+ gegl_lut_u8_to_u16[bb[1]])>>2 ];
+ ((uint8_t *)dst)[2] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[2]] +
+ gegl_lut_u8_to_u16[ab[2]] +
+ gegl_lut_u8_to_u16[ba[2]] +
+ gegl_lut_u8_to_u16[bb[2]])>>2 ];);
CASE(4,
- ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
- lut_u8_to_u16[ab[0]] +
- lut_u8_to_u16[ba[0]] +
- lut_u8_to_u16[bb[0]])>>2 ];
- ((uint8_t *)dst)[1] = lut_u16_to_u8[ (lut_u8_to_u16[aa[1]] +
- lut_u8_to_u16[ab[1]] +
- lut_u8_to_u16[ba[1]] +
- lut_u8_to_u16[bb[1]])>>2 ];
- ((uint8_t *)dst)[2] = lut_u16_to_u8[ (lut_u8_to_u16[aa[2]] +
- lut_u8_to_u16[ab[2]] +
- lut_u8_to_u16[ba[2]] +
- lut_u8_to_u16[bb[2]])>>2 ];
- ((uint8_t *)dst)[3] = lut_u16_to_u8[ (lut_u8_to_u16[aa[3]] +
- lut_u8_to_u16[ab[3]] +
- lut_u8_to_u16[ba[3]] +
- lut_u8_to_u16[bb[3]])>>2 ];);
+ ((uint8_t *)dst)[0] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[0]] +
+ gegl_lut_u8_to_u16[ab[0]] +
+ gegl_lut_u8_to_u16[ba[0]] +
+ gegl_lut_u8_to_u16[bb[0]])>>2 ];
+ ((uint8_t *)dst)[1] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[1]] +
+ gegl_lut_u8_to_u16[ab[1]] +
+ gegl_lut_u8_to_u16[ba[1]] +
+ gegl_lut_u8_to_u16[bb[1]])>>2 ];
+ ((uint8_t *)dst)[2] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[2]] +
+ gegl_lut_u8_to_u16[ab[2]] +
+ gegl_lut_u8_to_u16[ba[2]] +
+ gegl_lut_u8_to_u16[bb[2]])>>2 ];
+ ((uint8_t *)dst)[3] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[3]] +
+ gegl_lut_u8_to_u16[ab[3]] +
+ gegl_lut_u8_to_u16[ba[3]] +
+ gegl_lut_u8_to_u16[bb[3]])>>2 ];);
default:
CASE(0,
for (gint i = 0; i < components; i++)
((uint8_t *)dst)[i] =
- lut_u16_to_u8[ (lut_u8_to_u16[aa[i]] +
- lut_u8_to_u16[ab[i]] +
- lut_u8_to_u16[ba[i]] +
- lut_u8_to_u16[bb[i]])>>2 ];);
+ gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[i]] +
+ gegl_lut_u8_to_u16[ab[i]] +
+ gegl_lut_u8_to_u16[ba[i]] +
+ gegl_lut_u8_to_u16[bb[i]])>>2 ];);
}
}
@@ -775,33 +693,33 @@ gegl_downscale_2x2_u8_nl_alpha (const Babl *format,
switch (components)
{
CASE(2,
- ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
- lut_u8_to_u16[ab[0]] +
- lut_u8_to_u16[ba[0]] +
- lut_u8_to_u16[bb[0]])>>2 ];
+ ((uint8_t *)dst)[0] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[0]] +
+ gegl_lut_u8_to_u16[ab[0]] +
+ gegl_lut_u8_to_u16[ba[0]] +
+ gegl_lut_u8_to_u16[bb[0]])>>2 ];
((uint8_t *)dst)[1] = (aa[1] + ab[1] + ba[1] + bb[1])>>2;);
CASE(4,
- ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
- lut_u8_to_u16[ab[0]] +
- lut_u8_to_u16[ba[0]] +
- lut_u8_to_u16[bb[0]])>>2 ];
- ((uint8_t *)dst)[1] = lut_u16_to_u8[ (lut_u8_to_u16[aa[1]] +
- lut_u8_to_u16[ab[1]] +
- lut_u8_to_u16[ba[1]] +
- lut_u8_to_u16[bb[1]])>>2 ];
- ((uint8_t *)dst)[2] = lut_u16_to_u8[ (lut_u8_to_u16[aa[2]] +
- lut_u8_to_u16[ab[2]] +
- lut_u8_to_u16[ba[2]] +
- lut_u8_to_u16[bb[2]])>>2 ];
+ ((uint8_t *)dst)[0] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[0]] +
+ gegl_lut_u8_to_u16[ab[0]] +
+ gegl_lut_u8_to_u16[ba[0]] +
+ gegl_lut_u8_to_u16[bb[0]])>>2 ];
+ ((uint8_t *)dst)[1] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[1]] +
+ gegl_lut_u8_to_u16[ab[1]] +
+ gegl_lut_u8_to_u16[ba[1]] +
+ gegl_lut_u8_to_u16[bb[1]])>>2 ];
+ ((uint8_t *)dst)[2] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[2]] +
+ gegl_lut_u8_to_u16[ab[2]] +
+ gegl_lut_u8_to_u16[ba[2]] +
+ gegl_lut_u8_to_u16[bb[2]])>>2 ];
((uint8_t *)dst)[3] = (aa[3] + ab[3] + ba[3] + bb[3])>>2;);
default:
CASE(0,
for (gint i = 0; i < components - 1; i++)
((uint8_t *)dst)[i] =
- lut_u16_to_u8[ (lut_u8_to_u16[aa[i]] +
- lut_u8_to_u16[ab[i]] +
- lut_u8_to_u16[ba[i]] +
- lut_u8_to_u16[bb[i]])>>2 ];
+ gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[i]] +
+ gegl_lut_u8_to_u16[ab[i]] +
+ gegl_lut_u8_to_u16[ba[i]] +
+ gegl_lut_u8_to_u16[bb[i]])>>2 ];
((uint8_t *)dst)[components-1] = (aa[components-1] + ab[components-1] + ba[components-1] +
bb[components-1])>>2;);
}
#undef CASE
@@ -837,18 +755,18 @@ gegl_downscale_2x2_u8_rgba (const Babl *format,
for (x = 0; x < src_width / 2; x++)
{
- ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
- lut_u8_to_u16[ab[0]] +
- lut_u8_to_u16[ba[0]] +
- lut_u8_to_u16[bb[0]])>>2 ];
- ((uint8_t *)dst)[1] = lut_u16_to_u8[ (lut_u8_to_u16[aa[1]] +
- lut_u8_to_u16[ab[1]] +
- lut_u8_to_u16[ba[1]] +
- lut_u8_to_u16[bb[1]])>>2 ];
- ((uint8_t *)dst)[2] = lut_u16_to_u8[ (lut_u8_to_u16[aa[2]] +
- lut_u8_to_u16[ab[2]] +
- lut_u8_to_u16[ba[2]] +
- lut_u8_to_u16[bb[2]])>>2 ];
+ ((uint8_t *)dst)[0] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[0]] +
+ gegl_lut_u8_to_u16[ab[0]] +
+ gegl_lut_u8_to_u16[ba[0]] +
+ gegl_lut_u8_to_u16[bb[0]])>>2 ];
+ ((uint8_t *)dst)[1] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[1]] +
+ gegl_lut_u8_to_u16[ab[1]] +
+ gegl_lut_u8_to_u16[ba[1]] +
+ gegl_lut_u8_to_u16[bb[1]])>>2 ];
+ ((uint8_t *)dst)[2] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[2]] +
+ gegl_lut_u8_to_u16[ab[2]] +
+ gegl_lut_u8_to_u16[ba[2]] +
+ gegl_lut_u8_to_u16[bb[2]])>>2 ];
((uint8_t *)dst)[3] = (aa[3] + ab[3] + ba[3] + bb[3])>>2;
dst += bpp;
@@ -890,18 +808,18 @@ gegl_downscale_2x2_u8_rgb (const Babl *format,
for (x = 0; x < src_width / 2; x++)
{
- ((uint8_t *)dst)[0] = lut_u16_to_u8[ (lut_u8_to_u16[aa[0]] +
- lut_u8_to_u16[ab[0]] +
- lut_u8_to_u16[ba[0]] +
- lut_u8_to_u16[bb[0]])>>2 ];
- ((uint8_t *)dst)[1] = lut_u16_to_u8[ (lut_u8_to_u16[aa[1]] +
- lut_u8_to_u16[ab[1]] +
- lut_u8_to_u16[ba[1]] +
- lut_u8_to_u16[bb[1]])>>2 ];
- ((uint8_t *)dst)[2] = lut_u16_to_u8[ (lut_u8_to_u16[aa[2]] +
- lut_u8_to_u16[ab[2]] +
- lut_u8_to_u16[ba[2]] +
- lut_u8_to_u16[bb[2]])>>2 ];
+ ((uint8_t *)dst)[0] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[0]] +
+ gegl_lut_u8_to_u16[ab[0]] +
+ gegl_lut_u8_to_u16[ba[0]] +
+ gegl_lut_u8_to_u16[bb[0]])>>2 ];
+ ((uint8_t *)dst)[1] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[1]] +
+ gegl_lut_u8_to_u16[ab[1]] +
+ gegl_lut_u8_to_u16[ba[1]] +
+ gegl_lut_u8_to_u16[bb[1]])>>2 ];
+ ((uint8_t *)dst)[2] = gegl_lut_u16_to_u8[ (gegl_lut_u8_to_u16[aa[2]] +
+ gegl_lut_u8_to_u16[ab[2]] +
+ gegl_lut_u8_to_u16[ba[2]] +
+ gegl_lut_u8_to_u16[bb[2]])>>2 ];
dst += bpp;
aa += bpp * 2;
ab += bpp * 2;
@@ -931,60 +849,14 @@ gegl_downscale_2x2_u8_rgb (const Babl *format,
#define gegl_downscale_2x2_u8_nl_alpha ((void) gegl_downscale_2x2_u8_nl_alpha, \
gegl_downscale_2x2_u8_nl)
-
-GeglDownscale2x2Fun gegl_downscale_2x2_get_fun (const Babl *format)
-{
- const Babl *comp_type = babl_format_get_type (format, 0);
- const Babl *model = babl_format_get_model (format);
- BablModelFlag model_flags = babl_get_model_flags (model);
-
- if ((model_flags & BABL_MODEL_FLAG_LINEAR)||
- (model_flags & BABL_MODEL_FLAG_CMYK))
- {
- if (comp_type == gegl_babl_float())
- {
- return gegl_downscale_2x2_float;
- }
- else if (comp_type == gegl_babl_u8())
- {
- return gegl_downscale_2x2_u8;
- }
- else if (comp_type == gegl_babl_u16())
- {
- return gegl_downscale_2x2_u16;
- }
- else if (comp_type == gegl_babl_u32())
- {
- return gegl_downscale_2x2_u32;
- }
- else if (comp_type == gegl_babl_double())
- {
- return gegl_downscale_2x2_double;
- }
- }
- if (comp_type == gegl_babl_u8())
- {
- if (format == gegl_babl_rgba_u8())
- return gegl_downscale_2x2_u8_rgba;
- if (format == gegl_babl_rgb_u8())
- return gegl_downscale_2x2_u8_rgb;
-
- if (babl_format_has_alpha (format))
- return gegl_downscale_2x2_u8_nl_alpha;
- else
- return gegl_downscale_2x2_u8_nl;
- }
- return gegl_downscale_2x2_generic;
-}
-
void
-gegl_downscale_2x2_nearest (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride)
+GEGL_SIMD_SUFFIX(gegl_downscale_2x2_nearest) (const Babl *format,
+ gint src_width,
+ gint src_height,
+ guchar *src_data,
+ gint src_rowstride,
+ guchar *dst_data,
+ gint dst_rowstride)
{
gint bpp = babl_format_get_bytes_per_pixel (format);
gint y;
@@ -1007,68 +879,421 @@ gegl_downscale_2x2_nearest (const Babl *format,
}
}
-static void
-gegl_resample_boxfilter_generic (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint bpp,
- gint d_rowstride)
-{
- const Babl *tmp_format = babl_format_with_space ("RGBA float", format);
- const Babl *from_fish = babl_fish (format, tmp_format);
- const Babl *to_fish = babl_fish (tmp_format, format);
-
- const gint tmp_bpp = 4 * 4;
- gint in_tmp_rowstride = src_rect->width * tmp_bpp;
- gint out_tmp_rowstride = dst_rect->width * tmp_bpp;
- gint do_free = 0;
-
- guchar *in_tmp, *out_tmp;
- if (src_rect->height * in_tmp_rowstride + dst_rect->height * out_tmp_rowstride < GEGL_ALLOCA_THRESHOLD)
+void
+GEGL_SIMD_SUFFIX(gegl_resample_nearest) (guchar *dst,
+ const guchar *src,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ const gint src_stride,
+ const gdouble scale,
+ const gint bpp,
+ const gint dst_stride)
+{
+ gint jj[dst_rect->width];
+ gint x, y;
+ for (x = 0; x < dst_rect->width; x++)
{
- in_tmp = align_16 (alloca (src_rect->height * in_tmp_rowstride + 16));
- out_tmp = align_16 (alloca (dst_rect->height * out_tmp_rowstride + 16));
+ const gfloat sx = (dst_rect->x + .5 + x) / scale - src_rect->x;
+ jj[x] = int_floorf (sx ) * bpp;
}
- else
+
+#define IMPL(...) do{ \
+ for (y = 0; y < dst_rect->height; y++)\
+ {\
+ const gfloat sy = (dst_rect->y + .5 + y) / scale - src_rect->y;\
+ const gint ii = int_floorf (sy);\
+ gint *ijj = &jj[0];\
+ guchar *d = &dst[y*dst_stride];\
+ const guchar *s = &src[ii * src_stride];\
+ for (x = 0; x < dst_rect->width; x++)\
+ {\
+ __VA_ARGS__;\
+ d += bpp; \
+ }\
+ }\
+ }while(0)
+
+ switch(bpp)
{
- in_tmp = gegl_scratch_alloc (src_rect->height * in_tmp_rowstride);
- out_tmp = gegl_scratch_alloc (dst_rect->height * out_tmp_rowstride);
- do_free = 1;
+ case 1:IMPL(
+ d[0] = s[*(ijj++)];
+ );
+ break;
+ case 2:IMPL(
+ uint16_t* d16 = (void*) d;
+ const uint16_t* s16 = (void*) &s[*(ijj++)];
+ d16[0] = s16[0];
+ );
+ break;
+ case 3:IMPL(
+ d[0] = s[*ijj];
+ d[1] = s[*ijj + 1];
+ d[2] = s[*(ijj++) + 2];
+ );
+ break;
+ case 5:IMPL(
+ uint32_t* d32 = (void*) d;
+ const uint32_t* s32 = (void*) &s[*(ijj++)];
+ d32[0] = s32[0];
+ d[4] = s[4];
+ );
+ break;
+ case 4:IMPL(
+ uint32_t* d32 = (void*) d;
+ const uint32_t* s32 = (void*) &s[*(ijj++)];
+ d32[0] = s32[0];
+ );
+ break;
+ case 6:IMPL(
+ uint32_t* d32 = (void*) d;
+ const uint32_t* s32 = (void*) &s[*(ijj++)];
+ d32[0] = s32[0];
+ d[4] = s[4];
+ d[5] = s[5];
+ );
+ break;
+ case 8:IMPL(
+ uint64_t* d64 = (void*) d;
+ const uint64_t* s64 = (void*) &s[*(ijj++)];
+ d64[0] = s64[0];
+ );
+ break;
+ case 12:IMPL(
+ uint32_t* d32 = (void*) d;
+ const uint32_t* s32 = (void*) &s[*(ijj++)];
+ d32[0] = s32[0];
+ d32[1] = s32[1];
+ d32[2] = s32[2];
+ );
+ break;
+ case 16:IMPL(
+ uint64_t* d64 = (void*) d;
+ const uint64_t* s64 = (void*) &s[*(ijj++)];
+ d64[0] = s64[0];
+ d64[1] = s64[1];
+ );
+ break;
+ default:
+ IMPL(
+ memcpy (&d[0], &s[*(ijj++)], bpp);
+ );
+ break;
}
+#undef IMPL
+}
- babl_process_rows (from_fish,
- source_buf, s_rowstride,
- in_tmp, in_tmp_rowstride,
- src_rect->width, src_rect->height);
-
- gegl_resample_boxfilter_float (out_tmp, in_tmp, dst_rect, src_rect,
- in_tmp_rowstride, scale, tmp_format, tmp_bpp, out_tmp_rowstride);
+#define BOXFILTER_FUNCNAME gegl_resample_boxfilter_double
+#define BOXFILTER_TYPE gdouble
+#define BOXFILTER_TEMP_TYPE gdouble
+#define BOXFILTER_ROUND(val) (val)
+#include "gegl-algorithms-boxfilter.inc"
+#undef BOXFILTER_FUNCNAME
+#undef BOXFILTER_TYPE
+#undef BOXFILTER_TEMP_TYPE
+#undef BOXFILTER_ROUND
- babl_process_rows (to_fish,
- out_tmp, out_tmp_rowstride,
- dest_buf, d_rowstride,
- dst_rect->width, dst_rect->height);
+#define BOXFILTER_FUNCNAME gegl_resample_boxfilter_float
+#define BOXFILTER_TYPE gfloat
+#define BOXFILTER_TEMP_TYPE gfloat
+#define BOXFILTER_ROUND(val) (val)
+#include "gegl-algorithms-boxfilter.inc"
+#undef BOXFILTER_FUNCNAME
+#undef BOXFILTER_TYPE
+#undef BOXFILTER_TEMP_TYPE
+#undef BOXFILTER_ROUND
- if (do_free)
- {
- gegl_scratch_free (out_tmp);
+#define BOXFILTER_FUNCNAME gegl_resample_boxfilter_u8
+#define BOXFILTER_TYPE guchar
+#define BOXFILTER_TEMP_TYPE guchar
+#define BOXFILTER_ROUND(val) ((int)((val)+0.5f))
+#include "gegl-algorithms-boxfilter.inc"
+#undef BOXFILTER_FUNCNAME
+#undef BOXFILTER_TYPE
+#undef BOXFILTER_TEMP_TYPE
+#undef BOXFILTER_ROUND
+
+#define BOXFILTER_FUNCNAME gegl_resample_boxfilter_u16
+#define BOXFILTER_TYPE guint16
+#define BOXFILTER_TEMP_TYPE guint16
+#define BOXFILTER_ROUND(val) ((int)((val)+0.5f))
+#include "gegl-algorithms-boxfilter.inc"
+#undef BOXFILTER_FUNCNAME
+#undef BOXFILTER_TYPE
+#undef BOXFILTER_TEMP_TYPE
+#undef BOXFILTER_ROUND
+
+static inline guint32 _gegl_trunc_u32(guint64 value)
+{
+ if ((guint64) value > G_MAXUINT32)
+ return G_MAXUINT32;
+ return value;
+}
+
+#define BOXFILTER_FUNCNAME gegl_resample_boxfilter_u32
+#define BOXFILTER_TYPE guint32
+#define BOXFILTER_TEMP_TYPE guint64
+#define BOXFILTER_ROUND(val) _gegl_trunc_u32((val)+0.5f)
+#include "gegl-algorithms-boxfilter.inc"
+#undef BOXFILTER_FUNCNAME
+#undef BOXFILTER_TEMP_TYPE
+#undef BOXFILTER_TYPE
+#undef BOXFILTER_ROUND
+
+#define DOWNSCALE_FUNCNAME gegl_downscale_2x2_double
+#define DOWNSCALE_TYPE gdouble
+#define DOWNSCALE_SUM gdouble
+#define DOWNSCALE_DIVISOR 4.0
+#include "gegl-algorithms-2x2-downscale.inc"
+#undef DOWNSCALE_FUNCNAME
+#undef DOWNSCALE_TYPE
+#undef DOWNSCALE_SUM
+#undef DOWNSCALE_DIVISOR
+
+#define DOWNSCALE_FUNCNAME gegl_downscale_2x2_float
+#define DOWNSCALE_TYPE gfloat
+#define DOWNSCALE_SUM gfloat
+#define DOWNSCALE_DIVISOR 4.0f
+#include "gegl-algorithms-2x2-downscale.inc"
+#undef DOWNSCALE_FUNCNAME
+#undef DOWNSCALE_TYPE
+#undef DOWNSCALE_SUM
+#undef DOWNSCALE_DIVISOR
+
+#define DOWNSCALE_FUNCNAME gegl_downscale_2x2_u32
+#define DOWNSCALE_TYPE guint32
+#define DOWNSCALE_SUM guint64
+#define DOWNSCALE_DIVISOR 4
+#include "gegl-algorithms-2x2-downscale.inc"
+#undef DOWNSCALE_FUNCNAME
+#undef DOWNSCALE_TYPE
+#undef DOWNSCALE_SUM
+#undef DOWNSCALE_DIVISOR
+
+#define DOWNSCALE_FUNCNAME gegl_downscale_2x2_u16
+#define DOWNSCALE_TYPE guint16
+#define DOWNSCALE_SUM guint
+#define DOWNSCALE_DIVISOR 4
+#include "gegl-algorithms-2x2-downscale.inc"
+#undef DOWNSCALE_FUNCNAME
+#undef DOWNSCALE_TYPE
+#undef DOWNSCALE_SUM
+#undef DOWNSCALE_DIVISOR
+
+#define DOWNSCALE_FUNCNAME gegl_downscale_2x2_u8
+#define DOWNSCALE_TYPE guint8
+#define DOWNSCALE_SUM guint
+#define DOWNSCALE_DIVISOR 4
+#include "gegl-algorithms-2x2-downscale.inc"
+#undef DOWNSCALE_FUNCNAME
+#undef DOWNSCALE_TYPE
+#undef DOWNSCALE_SUM
+#undef DOWNSCALE_DIVISOR
+
+
+#define BILINEAR_FUNCNAME gegl_resample_bilinear_double
+#define BILINEAR_TYPE gdouble
+#define BILINEAR_ROUND(val) (val)
+#include "gegl-algorithms-bilinear.inc"
+#undef BILINEAR_FUNCNAME
+#undef BILINEAR_TYPE
+#undef BILINEAR_ROUND
+
+#define BILINEAR_FUNCNAME gegl_resample_bilinear_float
+#define BILINEAR_TYPE gfloat
+#define BILINEAR_ROUND(val) (val)
+#include "gegl-algorithms-bilinear.inc"
+#undef BILINEAR_FUNCNAME
+#undef BILINEAR_TYPE
+#undef BILINEAR_ROUND
+
+#define BILINEAR_FUNCNAME gegl_resample_bilinear_u8
+#define BILINEAR_TYPE guchar
+#define BILINEAR_ROUND(val) ((int)((val)+0.5f))
+#include "gegl-algorithms-bilinear.inc"
+#undef BILINEAR_FUNCNAME
+#undef BILINEAR_TYPE
+#undef BILINEAR_ROUND
+
+#define BILINEAR_FUNCNAME gegl_resample_bilinear_u16
+#define BILINEAR_TYPE guint16
+#define BILINEAR_ROUND(val) ((int)((val)+0.5f))
+#include "gegl-algorithms-bilinear.inc"
+#undef BILINEAR_FUNCNAME
+#undef BILINEAR_TYPE
+#undef BILINEAR_ROUND
+
+#define BILINEAR_FUNCNAME gegl_resample_bilinear_u32
+#define BILINEAR_TYPE guint32
+#define BILINEAR_ROUND(val) _gegl_trunc_u32((val)+0.5f)
+#include "gegl-algorithms-bilinear.inc"
+#undef BILINEAR_FUNCNAME
+#undef BILINEAR_TYPE
+#undef BILINEAR_ROUND
+
+static void
+gegl_downscale_2x2_generic2 (const Babl *format,
+ gint src_width,
+ gint src_height,
+ guchar *src_data,
+ gint src_rowstride,
+ guchar *dst_data,
+ gint dst_rowstride)
+{
+ const Babl *tmp_format = babl_format_with_space ("RGBA float", format);
+ const Babl *from_fish = babl_fish (format, tmp_format);
+ const Babl *to_fish = babl_fish (tmp_format, format);
+ const gint tmp_bpp = 4 * 4;
+ gint dst_width = src_width / 2;
+ gint dst_height = src_height / 2;
+ gint in_tmp_rowstride = src_width * tmp_bpp;
+ gint out_tmp_rowstride = dst_width * tmp_bpp;
+ gint do_free = 0;
+
+ void *in_tmp;
+ void *out_tmp;
+
+ if (src_height * in_tmp_rowstride + dst_height * out_tmp_rowstride < GEGL_ALLOCA_THRESHOLD)
+ {
+ in_tmp = align_16 (alloca (src_height * in_tmp_rowstride + 16));
+ out_tmp = align_16 (alloca (dst_height * out_tmp_rowstride + 16));
+ }
+ else
+ {
+ in_tmp = gegl_scratch_alloc (src_height * in_tmp_rowstride);
+ out_tmp = gegl_scratch_alloc (dst_height * out_tmp_rowstride);
+ do_free = 1;
+ }
+
+ babl_process_rows (from_fish,
+ src_data, src_rowstride,
+ in_tmp, in_tmp_rowstride,
+ src_width, src_height);
+ gegl_downscale_2x2_float (tmp_format, src_width, src_height,
+ in_tmp, in_tmp_rowstride,
+ out_tmp, out_tmp_rowstride);
+ babl_process_rows (to_fish,
+ out_tmp, out_tmp_rowstride,
+ dst_data, dst_rowstride,
+ dst_width, dst_height);
+
+ if (do_free)
+ {
+ gegl_scratch_free (out_tmp);
+ gegl_scratch_free (in_tmp);
+ }
+}
+
+GeglDownscale2x2Fun GEGL_SIMD_SUFFIX(gegl_downscale_2x2_get_fun) (const Babl *format)
+{
+ const Babl *comp_type = babl_format_get_type (format, 0);
+ const Babl *model = babl_format_get_model (format);
+ BablModelFlag model_flags = babl_get_model_flags (model);
+
+ if ((model_flags & BABL_MODEL_FLAG_LINEAR)||
+ (model_flags & BABL_MODEL_FLAG_CMYK))
+ {
+ if (comp_type == gegl_babl_float())
+ {
+ return gegl_downscale_2x2_float;
+ }
+ else if (comp_type == gegl_babl_u8())
+ {
+ return gegl_downscale_2x2_u8;
+ }
+ else if (comp_type == gegl_babl_u16())
+ {
+ return gegl_downscale_2x2_u16;
+ }
+ else if (comp_type == gegl_babl_u32())
+ {
+ return gegl_downscale_2x2_u32;
+ }
+ else if (comp_type == gegl_babl_double())
+ {
+ return gegl_downscale_2x2_double;
+ }
+ }
+ if (comp_type == gegl_babl_u8())
+ {
+ if (format == gegl_babl_rgba_u8())
+ return gegl_downscale_2x2_u8_rgba;
+ if (format == gegl_babl_rgb_u8())
+ return gegl_downscale_2x2_u8_rgb;
+
+ if (babl_format_has_alpha (format))
+ return gegl_downscale_2x2_u8_nl_alpha;
+ else
+ return gegl_downscale_2x2_u8_nl;
+ }
+ return gegl_downscale_2x2_generic2;
+}
+
+
+static void
+gegl_resample_boxfilter_generic2 (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint bpp,
+ gint d_rowstride)
+{
+ const Babl *tmp_format = babl_format_with_space ("RGBA float", format);
+ const Babl *from_fish = babl_fish (format, tmp_format);
+ const Babl *to_fish = babl_fish (tmp_format, format);
+
+ const gint tmp_bpp = 4 * 4;
+ gint in_tmp_rowstride = src_rect->width * tmp_bpp;
+ gint out_tmp_rowstride = dst_rect->width * tmp_bpp;
+ gint do_free = 0;
+
+ guchar *in_tmp, *out_tmp;
+
+ if (src_rect->height * in_tmp_rowstride + dst_rect->height * out_tmp_rowstride < GEGL_ALLOCA_THRESHOLD)
+ {
+ in_tmp = align_16 (alloca (src_rect->height * in_tmp_rowstride + 16));
+ out_tmp = align_16 (alloca (dst_rect->height * out_tmp_rowstride + 16));
+ }
+ else
+ {
+ in_tmp = gegl_scratch_alloc (src_rect->height * in_tmp_rowstride);
+ out_tmp = gegl_scratch_alloc (dst_rect->height * out_tmp_rowstride);
+ do_free = 1;
+ }
+
+ babl_process_rows (from_fish,
+ source_buf, s_rowstride,
+ in_tmp, in_tmp_rowstride,
+ src_rect->width, src_rect->height);
+
+ gegl_resample_boxfilter_float (out_tmp, in_tmp, dst_rect, src_rect,
+ in_tmp_rowstride, scale, tmp_format, tmp_bpp, out_tmp_rowstride);
+
+ babl_process_rows (to_fish,
+ out_tmp, out_tmp_rowstride,
+ dest_buf, d_rowstride,
+ dst_rect->width, dst_rect->height);
+
+ if (do_free)
+ {
+ gegl_scratch_free (out_tmp);
gegl_scratch_free (in_tmp);
}
}
-void gegl_resample_boxfilter (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint d_rowstride)
+
+void
+GEGL_SIMD_SUFFIX(gegl_resample_boxfilter) (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride)
{
void (*func) (guchar *dest_buf,
const guchar *source_buf,
@@ -1078,7 +1303,7 @@ void gegl_resample_boxfilter (guchar *dest_buf,
gdouble scale,
const Babl *format,
gint bpp,
- gint d_rowstride) = gegl_resample_boxfilter_generic;
+ gint d_rowstride) = gegl_resample_boxfilter_generic2;
const Babl *model = babl_format_get_model (format);
@@ -1119,15 +1344,16 @@ void gegl_resample_boxfilter (guchar *dest_buf,
}
+
static void
-gegl_resample_bilinear_generic (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint d_rowstride)
+gegl_resample_bilinear_generic2 (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride)
{
const Babl *tmp_format = babl_format_with_space ("RGBA float", format);
const Babl *from_fish = babl_fish (format, tmp_format);
@@ -1173,14 +1399,15 @@ gegl_resample_bilinear_generic (guchar *dest_buf,
}
}
-void gegl_resample_bilinear (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint d_rowstride)
+void
+GEGL_SIMD_SUFFIX(gegl_resample_bilinear) (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride)
{
const Babl *model = babl_format_get_model (format);
const Babl *comp_type = babl_format_get_type (format, 0);
@@ -1207,8 +1434,8 @@ void gegl_resample_bilinear (guchar *dest_buf,
gegl_resample_bilinear_double (dest_buf, source_buf, dst_rect, src_rect,
s_rowstride, scale, bpp, d_rowstride);
else
- gegl_resample_bilinear_generic (dest_buf, source_buf, dst_rect, src_rect,
- s_rowstride, scale, format, d_rowstride);
+ gegl_resample_bilinear_generic2 (dest_buf, source_buf, dst_rect, src_rect,
+ s_rowstride, scale, format, d_rowstride);
}
else
{
@@ -1224,262 +1451,11 @@ void gegl_resample_bilinear (guchar *dest_buf,
}
else
{
- gegl_resample_bilinear_generic (dest_buf, source_buf,
- dst_rect, src_rect,
- s_rowstride, scale, format,
- d_rowstride);
+ gegl_resample_bilinear_generic2 (dest_buf, source_buf,
+ dst_rect, src_rect,
+ s_rowstride, scale, format,
+ d_rowstride);
}
}
}
-void
-gegl_resample_nearest (guchar *dst,
- const guchar *src,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- const gint src_stride,
- const gdouble scale,
- const gint bpp,
- const gint dst_stride)
-{
- gint jj[dst_rect->width];
- gint x, y;
- for (x = 0; x < dst_rect->width; x++)
- {
- const gfloat sx = (dst_rect->x + .5 + x) / scale - src_rect->x;
- jj[x] = int_floorf (sx ) * bpp;
- }
-
-#define IMPL(...) do{ \
- for (y = 0; y < dst_rect->height; y++)\
- {\
- const gfloat sy = (dst_rect->y + .5 + y) / scale - src_rect->y;\
- const gint ii = int_floorf (sy);\
- gint *ijj = &jj[0];\
- guchar *d = &dst[y*dst_stride];\
- const guchar *s = &src[ii * src_stride];\
- for (x = 0; x < dst_rect->width; x++)\
- {\
- __VA_ARGS__;\
- d += bpp; \
- }\
- }\
- }while(0)
-
- switch(bpp)
- {
- case 1:IMPL(
- d[0] = s[*(ijj++)];
- );
- break;
- case 2:IMPL(
- uint16_t* d16 = (void*) d;
- const uint16_t* s16 = (void*) &s[*(ijj++)];
- d16[0] = s16[0];
- );
- break;
- case 3:IMPL(
- d[0] = s[*ijj];
- d[1] = s[*ijj + 1];
- d[2] = s[*(ijj++) + 2];
- );
- break;
- case 5:IMPL(
- uint32_t* d32 = (void*) d;
- const uint32_t* s32 = (void*) &s[*(ijj++)];
- d32[0] = s32[0];
- d[4] = s[4];
- );
- break;
- case 4:IMPL(
- uint32_t* d32 = (void*) d;
- const uint32_t* s32 = (void*) &s[*(ijj++)];
- d32[0] = s32[0];
- );
- break;
- case 6:IMPL(
- uint32_t* d32 = (void*) d;
- const uint32_t* s32 = (void*) &s[*(ijj++)];
- d32[0] = s32[0];
- d[4] = s[4];
- d[5] = s[5];
- );
- break;
- case 8:IMPL(
- uint64_t* d64 = (void*) d;
- const uint64_t* s64 = (void*) &s[*(ijj++)];
- d64[0] = s64[0];
- );
- break;
- case 12:IMPL(
- uint32_t* d32 = (void*) d;
- const uint32_t* s32 = (void*) &s[*(ijj++)];
- d32[0] = s32[0];
- d32[1] = s32[1];
- d32[2] = s32[2];
- );
- break;
- case 16:IMPL(
- uint64_t* d64 = (void*) d;
- const uint64_t* s64 = (void*) &s[*(ijj++)];
- d64[0] = s64[0];
- d64[1] = s64[1];
- );
- break;
- default:
- IMPL(
- memcpy (&d[0], &s[*(ijj++)], bpp);
- );
- break;
- }
-#undef IMPL
-}
-
-#define BOXFILTER_FUNCNAME gegl_resample_boxfilter_double
-#define BOXFILTER_TYPE gdouble
-#define BOXFILTER_TEMP_TYPE gdouble
-#define BOXFILTER_ROUND(val) (val)
-#include "gegl-algorithms-boxfilter.inc"
-#undef BOXFILTER_FUNCNAME
-#undef BOXFILTER_TYPE
-#undef BOXFILTER_TEMP_TYPE
-#undef BOXFILTER_ROUND
-
-#define BOXFILTER_FUNCNAME gegl_resample_boxfilter_float
-#define BOXFILTER_TYPE gfloat
-#define BOXFILTER_TEMP_TYPE gfloat
-#define BOXFILTER_ROUND(val) (val)
-#include "gegl-algorithms-boxfilter.inc"
-#undef BOXFILTER_FUNCNAME
-#undef BOXFILTER_TYPE
-#undef BOXFILTER_TEMP_TYPE
-#undef BOXFILTER_ROUND
-
-#define BOXFILTER_FUNCNAME gegl_resample_boxfilter_u8
-#define BOXFILTER_TYPE guchar
-#define BOXFILTER_TEMP_TYPE guchar
-#define BOXFILTER_ROUND(val) ((int)((val)+0.5f))
-#include "gegl-algorithms-boxfilter.inc"
-#undef BOXFILTER_FUNCNAME
-#undef BOXFILTER_TYPE
-#undef BOXFILTER_TEMP_TYPE
-#undef BOXFILTER_ROUND
-
-#define BOXFILTER_FUNCNAME gegl_resample_boxfilter_u16
-#define BOXFILTER_TYPE guint16
-#define BOXFILTER_TEMP_TYPE guint16
-#define BOXFILTER_ROUND(val) ((int)((val)+0.5f))
-#include "gegl-algorithms-boxfilter.inc"
-#undef BOXFILTER_FUNCNAME
-#undef BOXFILTER_TYPE
-#undef BOXFILTER_TEMP_TYPE
-#undef BOXFILTER_ROUND
-
-static inline guint32 _gegl_trunc_u32(guint64 value)
-{
- if ((guint64) value > G_MAXUINT32)
- return G_MAXUINT32;
- return value;
-}
-
-#define BOXFILTER_FUNCNAME gegl_resample_boxfilter_u32
-#define BOXFILTER_TYPE guint32
-#define BOXFILTER_TEMP_TYPE guint64
-#define BOXFILTER_ROUND(val) _gegl_trunc_u32((val)+0.5f)
-#include "gegl-algorithms-boxfilter.inc"
-#undef BOXFILTER_FUNCNAME
-#undef BOXFILTER_TEMP_TYPE
-#undef BOXFILTER_TYPE
-#undef BOXFILTER_ROUND
-
-#define DOWNSCALE_FUNCNAME gegl_downscale_2x2_double
-#define DOWNSCALE_TYPE gdouble
-#define DOWNSCALE_SUM gdouble
-#define DOWNSCALE_DIVISOR 4.0
-#include "gegl-algorithms-2x2-downscale.inc"
-#undef DOWNSCALE_FUNCNAME
-#undef DOWNSCALE_TYPE
-#undef DOWNSCALE_SUM
-#undef DOWNSCALE_DIVISOR
-
-#define DOWNSCALE_FUNCNAME gegl_downscale_2x2_float
-#define DOWNSCALE_TYPE gfloat
-#define DOWNSCALE_SUM gfloat
-#define DOWNSCALE_DIVISOR 4.0f
-#include "gegl-algorithms-2x2-downscale.inc"
-#undef DOWNSCALE_FUNCNAME
-#undef DOWNSCALE_TYPE
-#undef DOWNSCALE_SUM
-#undef DOWNSCALE_DIVISOR
-
-#define DOWNSCALE_FUNCNAME gegl_downscale_2x2_u32
-#define DOWNSCALE_TYPE guint32
-#define DOWNSCALE_SUM guint64
-#define DOWNSCALE_DIVISOR 4
-#include "gegl-algorithms-2x2-downscale.inc"
-#undef DOWNSCALE_FUNCNAME
-#undef DOWNSCALE_TYPE
-#undef DOWNSCALE_SUM
-#undef DOWNSCALE_DIVISOR
-
-#define DOWNSCALE_FUNCNAME gegl_downscale_2x2_u16
-#define DOWNSCALE_TYPE guint16
-#define DOWNSCALE_SUM guint
-#define DOWNSCALE_DIVISOR 4
-#include "gegl-algorithms-2x2-downscale.inc"
-#undef DOWNSCALE_FUNCNAME
-#undef DOWNSCALE_TYPE
-#undef DOWNSCALE_SUM
-#undef DOWNSCALE_DIVISOR
-
-#define DOWNSCALE_FUNCNAME gegl_downscale_2x2_u8
-#define DOWNSCALE_TYPE guint8
-#define DOWNSCALE_SUM guint
-#define DOWNSCALE_DIVISOR 4
-#include "gegl-algorithms-2x2-downscale.inc"
-#undef DOWNSCALE_FUNCNAME
-#undef DOWNSCALE_TYPE
-#undef DOWNSCALE_SUM
-#undef DOWNSCALE_DIVISOR
-
-
-#define BILINEAR_FUNCNAME gegl_resample_bilinear_double
-#define BILINEAR_TYPE gdouble
-#define BILINEAR_ROUND(val) (val)
-#include "gegl-algorithms-bilinear.inc"
-#undef BILINEAR_FUNCNAME
-#undef BILINEAR_TYPE
-#undef BILINEAR_ROUND
-
-#define BILINEAR_FUNCNAME gegl_resample_bilinear_float
-#define BILINEAR_TYPE gfloat
-#define BILINEAR_ROUND(val) (val)
-#include "gegl-algorithms-bilinear.inc"
-#undef BILINEAR_FUNCNAME
-#undef BILINEAR_TYPE
-#undef BILINEAR_ROUND
-
-#define BILINEAR_FUNCNAME gegl_resample_bilinear_u8
-#define BILINEAR_TYPE guchar
-#define BILINEAR_ROUND(val) ((int)((val)+0.5f))
-#include "gegl-algorithms-bilinear.inc"
-#undef BILINEAR_FUNCNAME
-#undef BILINEAR_TYPE
-#undef BILINEAR_ROUND
-
-#define BILINEAR_FUNCNAME gegl_resample_bilinear_u16
-#define BILINEAR_TYPE guint16
-#define BILINEAR_ROUND(val) ((int)((val)+0.5f))
-#include "gegl-algorithms-bilinear.inc"
-#undef BILINEAR_FUNCNAME
-#undef BILINEAR_TYPE
-#undef BILINEAR_ROUND
-
-#define BILINEAR_FUNCNAME gegl_resample_bilinear_u32
-#define BILINEAR_TYPE guint32
-#define BILINEAR_ROUND(val) _gegl_trunc_u32((val)+0.5f)
-#include "gegl-algorithms-bilinear.inc"
-#undef BILINEAR_FUNCNAME
-#undef BILINEAR_TYPE
-#undef BILINEAR_ROUND
-
diff --git a/gegl/buffer/gegl-algorithms.h b/gegl/buffer/gegl-algorithms.h
index a057f9c36..9cd4de864 100644
--- a/gegl/buffer/gegl-algorithms.h
+++ b/gegl/buffer/gegl-algorithms.h
@@ -19,12 +19,17 @@
#ifndef __GEGL_ALGORITHMS_H__
#define __GEGL_ALGORITHMS_H__
+
#include "gegl-buffer.h"
G_BEGIN_DECLS
#define GEGL_SCALE_EPSILON 1.e-6
-void gegl_downscale_2x2 (const Babl *format,
+#ifndef GEGL_SIMD_SUFFIX
+#define GEGL_SIMD_SUFFIX(symbol) symbol##_generic
+#endif
+
+void GEGL_SIMD_SUFFIX(gegl_downscale_2x2) (const Babl *format,
gint src_width,
gint src_height,
guchar *src_data,
@@ -32,23 +37,6 @@ void gegl_downscale_2x2 (const Babl *format,
guchar *dst_data,
gint dst_rowstride);
-
-void gegl_downscale_2x2_double (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride);
-
-void gegl_downscale_2x2_float (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride);
-
typedef void (*GeglDownscale2x2Fun) (const Babl *format,
gint src_width,
gint src_height,
@@ -57,31 +45,7 @@ typedef void (*GeglDownscale2x2Fun) (const Babl *format,
guchar *dst_data,
gint dst_rowstride);
-void gegl_downscale_2x2_u32 (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride);
-
-void gegl_downscale_2x2_u16 (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride);
-
-void gegl_downscale_2x2_u8 (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride);
-
-void gegl_downscale_2x2_nearest (const Babl *format,
+void GEGL_SIMD_SUFFIX(gegl_downscale_2x2_nearest) (const Babl *format,
gint src_width,
gint src_height,
guchar *src_data,
@@ -93,7 +57,7 @@ void gegl_downscale_2x2_nearest (const Babl *format,
* available for #format fall back to nearest neighbor.
* #scale is assumed to be between 0.5 and +inf.
*/
-void gegl_resample_boxfilter (guchar *dest_buf,
+void GEGL_SIMD_SUFFIX(gegl_resample_boxfilter) (guchar *dest_buf,
const guchar *source_buf,
const GeglRectangle *dst_rect,
const GeglRectangle *src_rect,
@@ -102,60 +66,10 @@ void gegl_resample_boxfilter (guchar *dest_buf,
const Babl *format,
gint d_rowstride);
-void gegl_resample_boxfilter_double (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint bpp,
- gint d_rowstride);
-
-void gegl_resample_boxfilter_float (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint bpp,
- gint d_rowstride);
-
-void gegl_resample_boxfilter_u32 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint bpp,
- gint d_rowstride);
-
-void gegl_resample_boxfilter_u16 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint bpp,
- gint d_rowstride);
-
-void gegl_resample_boxfilter_u8 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint bpp,
- gint d_rowstride);
-
/* Attempt to resample with a 2x2 bilinear filter, if no implementation is
* available for #format fall back to nearest neighbor.
*/
-void gegl_resample_bilinear (guchar *dest_buf,
+void GEGL_SIMD_SUFFIX(gegl_resample_bilinear) (guchar *dest_buf,
const guchar *source_buf,
const GeglRectangle *dst_rect,
const GeglRectangle *src_rect,
@@ -164,52 +78,7 @@ void gegl_resample_bilinear (guchar *dest_buf,
const Babl *format,
gint d_rowstride);
-void gegl_resample_bilinear_double (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- gint bpp,
- gint d_rowstride);
-
-void gegl_resample_bilinear_float (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- gint bpp,
- gint d_rowstride);
-
-void gegl_resample_bilinear_u32 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- gint bpp,
- gint d_rowstride);
-
-void gegl_resample_bilinear_u16 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- gint bpp,
- gint d_rowstride);
-
-void gegl_resample_bilinear_u8 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- gint bpp,
- gint d_rowstride);
-
-void gegl_resample_nearest (guchar *dst,
+void GEGL_SIMD_SUFFIX(gegl_resample_nearest) (guchar *dst,
const guchar *src,
const GeglRectangle *dst_rect,
const GeglRectangle *src_rect,
@@ -218,8 +87,14 @@ void gegl_resample_nearest (guchar *dst,
gint bpp,
gint dst_stride);
-GeglDownscale2x2Fun gegl_downscale_2x2_get_fun (const Babl *format);
+GeglDownscale2x2Fun GEGL_SIMD_SUFFIX(gegl_downscale_2x2_get_fun) (const Babl *format);
+
+#ifdef ARCH_X86_64
+GeglDownscale2x2Fun gegl_downscale_2x2_get_fun_x86_64_v2 (const Babl *format);
+GeglDownscale2x2Fun gegl_downscale_2x2_get_fun_x86_64_v3 (const Babl *format);
+#endif
+#define GEGL_ALGORITHMS_LUT_DIVISOR 16
G_END_DECLS
diff --git a/gegl/buffer/gegl-buffer-private.h b/gegl/buffer/gegl-buffer-private.h
index 62e4716d2..d15330dba 100644
--- a/gegl/buffer/gegl-buffer-private.h
+++ b/gegl/buffer/gegl-buffer-private.h
@@ -217,6 +217,42 @@ extern void (*gegl_buffer_ext_flush) (GeglBuffer *buffer, const GeglRectangle *r
extern void (*gegl_buffer_ext_invalidate) (GeglBuffer *buffer, const GeglRectangle *rect);
+extern void (*gegl_resample_bilinear) (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride);
+
+extern void (*gegl_resample_boxfilter)(guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride);
+
+extern void (*gegl_resample_nearest)(guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const gint bpp,
+ gint d_rowstride);
+
+extern void (*gegl_downscale_2x2) (const Babl *format,
+ gint src_width,
+ gint src_height,
+ guchar *src_data,
+ gint src_rowstride,
+ guchar *dst_data,
+ gint dst_rowstride);
+
+
#ifndef __GEGL_TILE_H__
#define gegl_tile_get_data(tile) ((tile)->data)
#endif
diff --git a/gegl/buffer/gegl-buffer.c b/gegl/buffer/gegl-buffer.c
index ed1a1b5be..6b5980ee1 100644
--- a/gegl/buffer/gegl-buffer.c
+++ b/gegl/buffer/gegl-buffer.c
@@ -44,6 +44,7 @@
#include "gegl-tile-backend-swap.h"
#include "gegl-tile-backend-ram.h"
#include "gegl-buffer-formats.h"
+#include "gegl-algorithms.h"
#ifdef GEGL_ENABLE_DEBUG
#define DEBUG_ALLOCATIONS (gegl_debug_flags & GEGL_DEBUG_BUFFER_ALLOC)
@@ -1303,3 +1304,176 @@ gegl_buffer_get_tile (GeglBuffer *buffer,
void (*gegl_tile_handler_cache_ext_flush) (void *cache, const GeglRectangle *rect)=NULL;
void (*gegl_buffer_ext_flush) (GeglBuffer *buffer, const GeglRectangle *rect)=NULL;
void (*gegl_buffer_ext_invalidate) (GeglBuffer *buffer, const GeglRectangle *rect)=NULL;
+
+void (*gegl_resample_bilinear) (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride) =
+ gegl_resample_bilinear_generic;
+
+
+void (*gegl_resample_boxfilter) (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride) =
+ gegl_resample_boxfilter_generic;
+
+
+void (*gegl_resample_nearest) (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const gint bpp,
+ gint d_rowstride) =
+ gegl_resample_nearest_generic;
+
+void (*gegl_downscale_2x2) (const Babl *format,
+ gint src_width,
+ gint src_height,
+ guchar *src_data,
+ gint src_rowstride,
+ guchar *dst_data,
+ gint dst_rowstride) =
+ gegl_downscale_2x2_generic;
+
+#ifdef ARCH_X86_64
+
+void gegl_resample_bilinear_x86_64_v2 (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride);
+
+
+void gegl_resample_boxfilter_x86_64_v2 (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride);
+
+
+void gegl_resample_nearest_x86_64_v2 (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const gint bpp,
+ gint d_rowstride);
+
+void gegl_downscale_2x2_x86_64_v2 (const Babl *format,
+ gint src_width,
+ gint src_height,
+ guchar *src_data,
+ gint src_rowstride,
+ guchar *dst_data,
+ gint dst_rowstride);
+
+
+
+void gegl_resample_bilinear_x86_64_v3 (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride);
+
+
+void gegl_resample_boxfilter_x86_64_v3 (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride);
+
+
+void gegl_resample_nearest_x86_64_v3 (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const gint bpp,
+ gint d_rowstride);
+
+void gegl_downscale_2x2_x86_64_v3 (const Babl *format,
+ gint src_width,
+ gint src_height,
+ guchar *src_data,
+ gint src_rowstride,
+ guchar *dst_data,
+ gint dst_rowstride);
+
+#endif
+
+guint16 gegl_lut_u8_to_u16[256];
+gfloat gegl_lut_u8_to_u16f[256];
+guint8 gegl_lut_u16_to_u8[65536/GEGL_ALGORITHMS_LUT_DIVISOR];
+
+
+void _gegl_init_buffer (int x86_64_version);
+void _gegl_init_buffer (int x86_64_version)
+{
+ static int inited = 0;
+ guint8 u8_ramp[256];
+ guint16 u16_ramp[65536/GEGL_ALGORITHMS_LUT_DIVISOR];
+ int i;
+
+ if (inited)
+ return;
+ inited = 1;
+
+ for (i = 0; i < 256; i++) u8_ramp[i]=i;
+ for (i = 0; i < 65536/GEGL_ALGORITHMS_LUT_DIVISOR; i++) u16_ramp[i]=i * GEGL_ALGORITHMS_LUT_DIVISOR;
+ babl_process (babl_fish (babl_format ("Y' u8"), babl_format("Y u16")),
+ &u8_ramp[0], &gegl_lut_u8_to_u16[0],
+ 256);
+ for (i = 0; i < 256; i++)
+ {
+ gegl_lut_u8_to_u16[i] = gegl_lut_u8_to_u16[i]/GEGL_ALGORITHMS_LUT_DIVISOR;
+ gegl_lut_u8_to_u16f[i] = gegl_lut_u8_to_u16[i];
+ }
+
+ babl_process (babl_fish (babl_format ("Y u16"), babl_format("Y' u8")),
+ &u16_ramp[0], &gegl_lut_u16_to_u8[0],
+ 65536/GEGL_ALGORITHMS_LUT_DIVISOR);
+#ifdef ARCH_X86_64
+ switch (x86_64_version)
+ {
+ case 0:
+ case 1: break;
+ case 2:
+ gegl_resample_bilinear = gegl_resample_bilinear_x86_64_v2;
+ gegl_resample_boxfilter = gegl_resample_boxfilter_x86_64_v2;
+ gegl_resample_nearest = gegl_resample_nearest_x86_64_v2;
+ gegl_downscale_2x2 = gegl_downscale_2x2_x86_64_v2;
+ break;
+ case 3:
+ gegl_resample_bilinear = gegl_resample_bilinear_x86_64_v3;
+ gegl_resample_boxfilter = gegl_resample_boxfilter_x86_64_v3;
+ gegl_resample_nearest = gegl_resample_nearest_x86_64_v3;
+ gegl_downscale_2x2 = gegl_downscale_2x2_x86_64_v3;
+ break;
+ }
+#endif
+}
diff --git a/gegl/buffer/gegl-tile-handler-zoom.c b/gegl/buffer/gegl-tile-handler-zoom.c
index 4674f1323..c5d48a9d7 100644
--- a/gegl/buffer/gegl-tile-handler-zoom.c
+++ b/gegl/buffer/gegl-tile-handler-zoom.c
@@ -32,6 +32,7 @@
#include "gegl-tile-storage.h"
#include "gegl-buffer-private.h"
#include "gegl-algorithms.h"
+#include "gegl-cpuaccel.h"
G_DEFINE_TYPE (GeglTileHandlerZoom, gegl_tile_handler_zoom,
@@ -61,7 +62,17 @@ downscale (GeglTileHandlerZoom *zoom,
if (src)
{
if (!zoom->downscale_2x2)
- zoom->downscale_2x2 = gegl_downscale_2x2_get_fun (format);
+ {
+#ifdef ARCH_X86_64
+ GeglCpuAccelFlags cpu_accel = gegl_cpu_accel_get_support ();
+ if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V3)
+ zoom->downscale_2x2 = gegl_downscale_2x2_get_fun_x86_64_v3 (format);
+ else if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V2)
+ zoom->downscale_2x2 = gegl_downscale_2x2_get_fun_x86_64_v2 (format);
+ else
+#endif
+ zoom->downscale_2x2 = gegl_downscale_2x2_get_fun_generic (format);
+ }
zoom->downscale_2x2 (format,
width, height,
diff --git a/gegl/buffer/meson.build b/gegl/buffer/meson.build
index e997d71eb..7097743fc 100644
--- a/gegl/buffer/meson.build
+++ b/gegl/buffer/meson.build
@@ -1,3 +1,18 @@
+if host_cpu_family == 'x86_64'
+
+ lib_gegl_x86_64_v2 = static_library('gegl-x86-64-v2', 'gegl-algorithms-x86-64-v2.c',
+ include_directories:[geglInclude, rootInclude],
+ dependencies:[glib, babl],
+ c_args: [gegl_cflags ] + x86_64_v2_flags
+ )
+
+ lib_gegl_x86_64_v3 = static_library('gegl-x86-64-v3', 'gegl-algorithms-x86-64-v3.c',
+ include_directories:[geglInclude, rootInclude],
+ dependencies:[glib, babl],
+ c_args: [gegl_cflags ] + x86_64_v3_flags
+ )
+endif
+
gegl_sources += files(
'gegl-algorithms.c',
'gegl-buffer-access.c',
diff --git a/gegl/gegl-init.c b/gegl/gegl-init.c
index d9af0842b..0744209a4 100644
--- a/gegl/gegl-init.c
+++ b/gegl/gegl-init.c
@@ -83,6 +83,7 @@ guint gegl_debug_flags = 0;
#include "graph/gegl-node-private.h"
#include "gegl-random-private.h"
#include "gegl-parallel-private.h"
+#include "gegl-cpuaccel.h"
static gboolean gegl_post_parse_hook (GOptionContext *context,
GOptionGroup *group,
@@ -164,7 +165,7 @@ gboolean gegl_is_main_thread (void)
return g_thread_self () == main_thread;
}
-void _gegl_init_u8_lut (void);
+void _gegl_init_buffer (int x86_64_version);
void
gegl_init (gint *argc,
@@ -540,7 +541,13 @@ gegl_post_parse_hook (GOptionContext *context,
gegl_config_parse_env (config);
babl_init ();
- _gegl_init_u8_lut ();
+
+ GeglCpuAccelFlags cpu_accel = gegl_cpu_accel_get_support ();
+ int x86_64_version = 0;
+ if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V2) x86_64_version = 2;
+ if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V3) x86_64_version = 3;
+
+ _gegl_init_buffer (x86_64_version);
#ifdef GEGL_ENABLE_DEBUG
{
diff --git a/gegl/meson.build b/gegl/meson.build
index 86c7a6fa1..1781db293 100644
--- a/gegl/meson.build
+++ b/gegl/meson.build
@@ -1,4 +1,3 @@
-
gegl_library_build_dir = meson.current_build_dir()
geglInclude = include_directories(
@@ -96,6 +95,13 @@ opencl_dep = declare_dependency(
link_with : [gegl_sources],
)
+
+if host_cpu_family == 'x86_64'
+ x86_64_extra = [lib_gegl_x86_64_v2, lib_gegl_x86_64_v3]
+else
+ x86_64_extra = []
+endif
+
gegl_lib = library(api_name,
gegl_sources,
include_directories: [rootInclude, geglInclude],
@@ -107,6 +113,8 @@ gegl_lib = library(api_name,
gmodule,
],
c_args: gegl_cflags,
+
+ link_with: x86_64_extra,
link_args: gegl_ldflags,
install: true,
version: so_version,
diff --git a/meson.build b/meson.build
index 9aea7da45..9044dd429 100644
--- a/meson.build
+++ b/meson.build
@@ -98,6 +98,8 @@ dep_ver += {
}
+
+
################################################################################
# Project infos
@@ -203,6 +205,12 @@ cflags_cpp = cflags_common + cflags_cpp
add_project_arguments(cc.get_supported_arguments(cflags_c), language: 'c')
add_project_arguments(cpp.get_supported_arguments(cflags_cpp), language: 'cpp')
+
+if host_cpu_family == 'x86_64'
+ x86_64_v2_flags = cc.get_supported_arguments(['-march=x86-64','-msse2',
'-msse2','-msse4.1','-msse4.2','-mpopcnt','-mssse3'])
+ x86_64_v3_flags = x86_64_v2_flags +
cc.get_supported_arguments(['-mavx','-mavx2','-mf16c','-mfma','-mmovbe', '-mbmi', '-mbmi2'])
+endif
+
################################################################################
# Build Utilities
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]