[gegl] buffer: simplify multipass SIMD variant build
- From: Øyvind "pippin" Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] buffer: simplify multipass SIMD variant build
- Date: Tue, 25 Jan 2022 00:57:10 +0000 (UTC)
commit 5a680fb9730bb0d22f9fc8f8ef31c1cde22ddaab
Author: Øyvind Kolås <pippin gimp org>
Date: Tue Jan 25 01:30:46 2022 +0100
buffer: simplify multipass SIMD variant build
gegl/buffer/gegl-algorithms-arm-neon.c | 4 -
gegl/buffer/gegl-algorithms-x86-64-v2.c | 4 -
gegl/buffer/gegl-algorithms-x86-64-v3.c | 4 -
gegl/buffer/gegl-algorithms.h | 13 +++
gegl/buffer/gegl-buffer.c | 179 +++++++++-----------------------
gegl/buffer/gegl-variants.inc | 9 ++
gegl/buffer/meson.build | 6 +-
meson.build | 9 +-
8 files changed, 80 insertions(+), 148 deletions(-)
---
diff --git a/gegl/buffer/gegl-algorithms.h b/gegl/buffer/gegl-algorithms.h
index 9cd4de864..9108dee53 100644
--- a/gegl/buffer/gegl-algorithms.h
+++ b/gegl/buffer/gegl-algorithms.h
@@ -25,9 +25,22 @@ G_BEGIN_DECLS
#define GEGL_SCALE_EPSILON 1.e-6
+#ifdef SIMD_X86_64_V2
+#define GEGL_SIMD_SUFFIX(symbol) symbol##_x86_64_v2
+#endif
+#ifdef SIMD_X86_64_V3
+#define GEGL_SIMD_SUFFIX(symbol) symbol##_x86_64_v3
+#endif
+#ifdef SIMD_ARM_NEON
+#define GEGL_SIMD_SUFFIX(symbol) symbol##_arm_neon
+#endif
#ifndef GEGL_SIMD_SUFFIX
#define GEGL_SIMD_SUFFIX(symbol) symbol##_generic
+#ifndef SIMD_GENERIC
+#define SIMD_GENERIC
#endif
+#endif
+
void GEGL_SIMD_SUFFIX(gegl_downscale_2x2) (const Babl *format,
gint src_width,
diff --git a/gegl/buffer/gegl-buffer.c b/gegl/buffer/gegl-buffer.c
index 3bd9586be..c4d91b3e1 100644
--- a/gegl/buffer/gegl-buffer.c
+++ b/gegl/buffer/gegl-buffer.c
@@ -1305,8 +1305,8 @@ void (*gegl_tile_handler_cache_ext_flush) (void *cache, const GeglRectangle *rec
void (*gegl_buffer_ext_flush) (GeglBuffer *buffer, const GeglRectangle *rect)=NULL;
void (*gegl_buffer_ext_invalidate) (GeglBuffer *buffer, const GeglRectangle *rect)=NULL;
-void (*gegl_resample_bilinear) (guchar *dest_buf,
- const guchar *source_buf,
+void (*gegl_resample_bilinear) (guchar *dest_buf,
+ const guchar *source_buf,
const GeglRectangle *dst_rect,
const GeglRectangle *src_rect,
gint s_rowstride,
@@ -1316,8 +1316,8 @@ void (*gegl_resample_bilinear) (guchar *dest_buf,
gegl_resample_bilinear_generic;
-void (*gegl_resample_boxfilter) (guchar *dest_buf,
- const guchar *source_buf,
+void (*gegl_resample_boxfilter) (guchar *dest_buf,
+ const guchar *source_buf,
const GeglRectangle *dst_rect,
const GeglRectangle *src_rect,
gint s_rowstride,
@@ -1327,8 +1327,8 @@ void (*gegl_resample_boxfilter) (guchar *dest_buf,
gegl_resample_boxfilter_generic;
-void (*gegl_resample_nearest) (guchar *dest_buf,
- const guchar *source_buf,
+void (*gegl_resample_nearest) (guchar *dest_buf,
+ const guchar *source_buf,
const GeglRectangle *dst_rect,
const GeglRectangle *src_rect,
gint s_rowstride,
@@ -1338,134 +1338,51 @@ void (*gegl_resample_nearest) (guchar *dest_buf,
gegl_resample_nearest_generic;
void (*gegl_downscale_2x2) (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride) =
+ gint src_width,
+ gint src_height,
+ guchar *src_data,
+ gint src_rowstride,
+ guchar *dst_data,
+ gint dst_rowstride) =
gegl_downscale_2x2_generic;
-#ifdef ARCH_X86_64
-
-void gegl_resample_bilinear_x86_64_v2 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint d_rowstride);
-
-
-void gegl_resample_boxfilter_x86_64_v2 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint d_rowstride);
-
-
-void gegl_resample_nearest_x86_64_v2 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const gint bpp,
- gint d_rowstride);
-
-void gegl_downscale_2x2_x86_64_v2 (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride);
-
-
-
-void gegl_resample_bilinear_x86_64_v3 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint d_rowstride);
-
-
-void gegl_resample_boxfilter_x86_64_v3 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint d_rowstride);
-
-
-void gegl_resample_nearest_x86_64_v3 (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const gint bpp,
- gint d_rowstride);
-
-void gegl_downscale_2x2_x86_64_v3 (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride);
-
-#endif
-#ifdef ARCH_ARM
-
-void gegl_resample_bilinear_arm_neon (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint d_rowstride);
-
-
-void gegl_resample_boxfilter_arm_neon (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const Babl *format,
- gint d_rowstride);
-
-
-void gegl_resample_nearest_arm_neon (guchar *dest_buf,
- const guchar *source_buf,
- const GeglRectangle *dst_rect,
- const GeglRectangle *src_rect,
- gint s_rowstride,
- gdouble scale,
- const gint bpp,
- gint d_rowstride);
-
-void gegl_downscale_2x2_arm_neon (const Babl *format,
- gint src_width,
- gint src_height,
- guchar *src_data,
- gint src_rowstride,
- guchar *dst_data,
- gint dst_rowstride);
-
-#endif
+#define GEGL_VARIANTS(variant) \
+void gegl_resample_nearest_##variant (guchar *dest_buf, \
+ const guchar *source_buf, \
+ const GeglRectangle *dst_rect, \
+ const GeglRectangle *src_rect, \
+ gint s_rowstride, \
+ gdouble scale, \
+ const gint bpp, \
+ gint d_rowstride); \
+void gegl_resample_bilinear_##variant (guchar *dest_buf, \
+ const guchar *source_buf, \
+ const GeglRectangle *dst_rect, \
+ const GeglRectangle *src_rect, \
+ gint s_rowstride, \
+ gdouble scale, \
+ const Babl *format, \
+ gint d_rowstride); \
+void gegl_resample_boxfilter_##variant (guchar *dest_buf, \
+ const guchar *source_buf, \
+ const GeglRectangle *dst_rect, \
+ const GeglRectangle *src_rect, \
+ gint s_rowstride, \
+ gdouble scale, \
+ const Babl *format, \
+ gint d_rowstride); \
+void gegl_downscale_2x2_##variant (const Babl *format, \
+ gint src_width, \
+ gint src_height, \
+ guchar *src_data, \
+ gint src_rowstride,\
+ guchar *dst_data, \
+ gint dst_rowstride);
+
+#include "gegl-variants.inc"
+//GEGL_VARIANTS(generic)
+#undef GEGL_VARIANTS
guint16 gegl_lut_u8_to_u16[256];
gfloat gegl_lut_u8_to_u16f[256];
diff --git a/gegl/buffer/gegl-variants.inc b/gegl/buffer/gegl-variants.inc
new file mode 100644
index 000000000..eb22191fc
--- /dev/null
+++ b/gegl/buffer/gegl-variants.inc
@@ -0,0 +1,9 @@
+
+#if ARCH_X86_64
+GEGL_VARIANTS(x86_64_v2)
+GEGL_VARIANTS(x86_64_v3)
+#endif
+#if ARCH_ARM
+GEGL_VARIANTS(arm_neon)
+#endif
+
diff --git a/gegl/buffer/meson.build b/gegl/buffer/meson.build
index 1e125bac8..42cd6f455 100644
--- a/gegl/buffer/meson.build
+++ b/gegl/buffer/meson.build
@@ -1,18 +1,18 @@
if host_cpu_family == 'x86_64'
- lib_gegl_x86_64_v2 = static_library('gegl-x86-64-v2', 'gegl-algorithms-x86-64-v2.c',
+ lib_gegl_x86_64_v2 = static_library('gegl-x86-64-v2', 'gegl-algorithms.c',
include_directories:[geglInclude, rootInclude],
dependencies:[glib, babl],
c_args: [gegl_cflags ] + x86_64_v2_flags
)
- lib_gegl_x86_64_v3 = static_library('gegl-x86-64-v3', 'gegl-algorithms-x86-64-v3.c',
+ lib_gegl_x86_64_v3 = static_library('gegl-x86-64-v3', 'gegl-algorithms.c',
include_directories:[geglInclude, rootInclude],
dependencies:[glib, babl],
c_args: [gegl_cflags ] + x86_64_v3_flags
)
elif host_cpu_family == 'arm'
- lib_gegl_arm_neon = static_library('gegl-arm-neon', 'gegl-algorithms-arm-neon.c',
+ lib_gegl_arm_neon = static_library('gegl-arm-neon', 'gegl-algorithms.c',
include_directories:[geglInclude, rootInclude],
dependencies:[glib, babl],
c_args: [gegl_cflags ] + arm_neon_flags
diff --git a/meson.build b/meson.build
index e3864efb7..c94c9a9ef 100644
--- a/meson.build
+++ b/meson.build
@@ -211,10 +211,15 @@ cflags_common += cc.get_supported_arguments(['-ftree-vectorize'])
if host_cpu_family == 'x86_64'
x86_64_v2_flags = cc.get_supported_arguments(['-march=x86-64','-msse2',
'-msse2','-msse4.1','-msse4.2','-mpopcnt','-mssse3'])
x86_64_v3_flags = x86_64_v2_flags +
cc.get_supported_arguments(['-mavx','-mavx2','-mf16c','-mfma','-mmovbe', '-mbmi', '-mbmi2'])
+
+ x86_64_v2_flags += ['-DSIMD_X86_64_V2']
+ x86_64_v3_flags += ['-DSIMD_X86_64_V3']
+
elif host_cpu_family == 'arm'
- arm_neon_flags = cc.get_supported_arguments(['-mfpu=neon'])
+ arm_neon_flags = cc.get_supported_arguments(['-mfpu=neon-vfpv4'])
+ arm_neon_flags += ['-DSIMD_ARM_NEON']
elif host_cpu_family == 'aarch64'
- cflags_common += cc.get_supported_arguments(['-mfpu=neon'])
+ cflags_common += cc.get_supported_arguments(['-mfpu=neon-vfpv4'])
endif
cflags_c = cflags_common + cflags_c
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]