[gegl] build, gegl, operations: extend SIMD dispatch to handle ARM NEON
- From: Øyvind "pippin" Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] build, gegl, operations: extend SIMD dispatch to handle ARM NEON
- Date: Fri, 21 Jan 2022 00:48:58 +0000 (UTC)
commit feb357214af81ff2d500cf08a4ee6273e5e21675
Author: Øyvind Kolås <pippin gimp org>
Date: Fri Jan 21 01:24:48 2022 +0100
build,gegl,operations: extend SIMD dispatch to handle ARM NEON
gegl/buffer/gegl-algorithms-arm-neon.c | 4 +++
gegl/buffer/gegl-buffer.c | 56 ++++++++++++++++++++++++++++++++--
gegl/buffer/meson.build | 7 +++++
gegl/gegl-cpuaccel.c | 36 ++++++++++++++++++++++
gegl/gegl-cpuaccel.h | 3 ++
gegl/gegl-init.c | 5 +++
gegl/meson.build | 8 +++--
gegl/module/geglmoduledb.c | 29 ++++++++++++++++--
meson.build | 9 ++++--
operations/common-cxx/meson.build | 22 +++++++++++++
operations/common-gpl3+/meson.build | 22 +++++++++++++
operations/common/meson.build | 13 ++++++++
operations/generated/meson.build | 14 +++++++++
operations/transform/meson.build | 14 +++++++++
14 files changed, 232 insertions(+), 10 deletions(-)
---
diff --git a/gegl/buffer/gegl-algorithms-arm-neon.c b/gegl/buffer/gegl-algorithms-arm-neon.c
new file mode 100644
index 000000000..015987bc8
--- /dev/null
+++ b/gegl/buffer/gegl-algorithms-arm-neon.c
@@ -0,0 +1,4 @@
+
+#define GEGL_SIMD_SUFFIX(symbol) symbol##_arm_neon
+
+#include "gegl-algorithms.c"
diff --git a/gegl/buffer/gegl-buffer.c b/gegl/buffer/gegl-buffer.c
index 6b5980ee1..3bd9586be 100644
--- a/gegl/buffer/gegl-buffer.c
+++ b/gegl/buffer/gegl-buffer.c
@@ -1426,13 +1426,54 @@ void gegl_downscale_2x2_x86_64_v3 (const Babl *format,
#endif
+#ifdef ARCH_ARM
+
+void gegl_resample_bilinear_arm_neon (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride);
+
+
+void gegl_resample_boxfilter_arm_neon (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const Babl *format,
+ gint d_rowstride);
+
+
+void gegl_resample_nearest_arm_neon (guchar *dest_buf,
+ const guchar *source_buf,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *src_rect,
+ gint s_rowstride,
+ gdouble scale,
+ const gint bpp,
+ gint d_rowstride);
+
+void gegl_downscale_2x2_arm_neon (const Babl *format,
+ gint src_width,
+ gint src_height,
+ guchar *src_data,
+ gint src_rowstride,
+ guchar *dst_data,
+ gint dst_rowstride);
+
+#endif
+
guint16 gegl_lut_u8_to_u16[256];
gfloat gegl_lut_u8_to_u16f[256];
guint8 gegl_lut_u16_to_u8[65536/GEGL_ALGORITHMS_LUT_DIVISOR];
-void _gegl_init_buffer (int x86_64_version);
-void _gegl_init_buffer (int x86_64_version)
+void _gegl_init_buffer (int variant);
+void _gegl_init_buffer (int variant)
{
static int inited = 0;
guint8 u8_ramp[256];
@@ -1457,8 +1498,17 @@ void _gegl_init_buffer (int x86_64_version)
babl_process (babl_fish (babl_format ("Y u16"), babl_format("Y' u8")),
&u16_ramp[0], &gegl_lut_u16_to_u8[0],
65536/GEGL_ALGORITHMS_LUT_DIVISOR);
+#ifdef ARCH_ARM
+ if (variant)
+ {
+ gegl_resample_bilinear = gegl_resample_bilinear_arm_neon;
+ gegl_resample_boxfilter = gegl_resample_boxfilter_arm_neon;
+ gegl_resample_nearest = gegl_resample_nearest_arm_neon;
+ gegl_downscale_2x2 = gegl_downscale_2x2_arm_neon;
+ }
+#endif
#ifdef ARCH_X86_64
- switch (x86_64_version)
+ switch (variant)
{
case 0:
case 1: break;
diff --git a/gegl/buffer/meson.build b/gegl/buffer/meson.build
index 7097743fc..1e125bac8 100644
--- a/gegl/buffer/meson.build
+++ b/gegl/buffer/meson.build
@@ -11,6 +11,13 @@ if host_cpu_family == 'x86_64'
dependencies:[glib, babl],
c_args: [gegl_cflags ] + x86_64_v3_flags
)
+elif host_cpu_family == 'arm'
+ lib_gegl_arm_neon = static_library('gegl-arm-neon', 'gegl-algorithms-arm-neon.c',
+ include_directories:[geglInclude, rootInclude],
+ dependencies:[glib, babl],
+ c_args: [gegl_cflags ] + arm_neon_flags
+ )
+
endif
gegl_sources += files(
diff --git a/gegl/gegl-cpuaccel.c b/gegl/gegl-cpuaccel.c
index cf0395270..21353b912 100644
--- a/gegl/gegl-cpuaccel.c
+++ b/gegl/gegl-cpuaccel.c
@@ -546,6 +546,42 @@ arch_accel (void)
#endif /* ARCH_PPC && USE_ALTIVEC */
+#if defined(ARCH_ARM)
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <elf.h>
+
+#define HAVE_ACCEL 1
+
+static guint32
+arch_accel (void)
+{
+ /* TODO : add or hardcode the other ways it can be on arm, where
+ * this info comes from the system and not from running cpu
+ * instructions
+ */
+ int has_neon = 0;
+ int fd = open ("/proc/self/auxv", O_RDONLY);
+ Elf32_auxv_t auxv;
+ if (fd >= 0)
+ {
+ while (read (fd, &auxv, sizeof (Elf32_auxv_t)) == sizeof (Elf32_auxv_t))
+ {
+ if (auxv.a_type == AT_HWCAP)
+ {
+ if (auxv.a_un.a_val & 4096)
+ has_neon = 1;
+ }
+ }
+ close (fd);
+ }
+ return has_neon?GEGL_CPU_ACCEL_ARM_NEON:0;
+}
+
+#endif /* ARCH_ARM */
+
static GeglCpuAccelFlags
cpu_accel (void)
{
diff --git a/gegl/gegl-cpuaccel.h b/gegl/gegl-cpuaccel.h
index 9e3dad36a..cf615719a 100644
--- a/gegl/gegl-cpuaccel.h
+++ b/gegl/gegl-cpuaccel.h
@@ -67,6 +67,9 @@ typedef enum
/* powerpc accelerations */
GEGL_CPU_ACCEL_PPC_ALTIVEC = 0x00000010,
+
+ /* arm accelerations */
+ GEGL_CPU_ACCEL_ARM_NEON = 0x00000020,
} GeglCpuAccelFlags;
diff --git a/gegl/gegl-init.c b/gegl/gegl-init.c
index 0744209a4..9381d9959 100644
--- a/gegl/gegl-init.c
+++ b/gegl/gegl-init.c
@@ -542,12 +542,17 @@ gegl_post_parse_hook (GOptionContext *context,
babl_init ();
+#if ARCH_ARM
+ GeglCpuAccelFlags cpu_accel = gegl_cpu_accel_get_support ();
+ _gegl_init_buffer ((cpu_accel & GEGL_CPU_ACCEL_ARM_NEON) != 0);
+#else
GeglCpuAccelFlags cpu_accel = gegl_cpu_accel_get_support ();
int x86_64_version = 0;
if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V2) x86_64_version = 2;
if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V3) x86_64_version = 3;
_gegl_init_buffer (x86_64_version);
+#endif
#ifdef GEGL_ENABLE_DEBUG
{
diff --git a/gegl/meson.build b/gegl/meson.build
index 48456757b..54977f5ed 100644
--- a/gegl/meson.build
+++ b/gegl/meson.build
@@ -96,9 +96,11 @@ opencl_dep = declare_dependency(
if host_cpu_family == 'x86_64'
- x86_64_extra = [lib_gegl_x86_64_v2, lib_gegl_x86_64_v3]
+ simd_extra = [lib_gegl_x86_64_v2, lib_gegl_x86_64_v3]
+elif host_cpu_family == 'arm'
+ simd_extra = [lib_gegl_arm_neon]
else
- x86_64_extra = []
+ simd_extra = []
endif
gegl_lib = library(api_name,
@@ -114,7 +116,7 @@ gegl_lib = library(api_name,
],
c_args: gegl_cflags,
- link_with: x86_64_extra,
+ link_with: simd_extra,
link_args: gegl_ldflags,
install: true,
version: so_version,
diff --git a/gegl/module/geglmoduledb.c b/gegl/module/geglmoduledb.c
index c3b628035..f8848c13c 100644
--- a/gegl/module/geglmoduledb.c
+++ b/gegl/module/geglmoduledb.c
@@ -25,6 +25,15 @@
#include "gegl-cpuaccel.h"
#include "gegl-config.h"
+
+#ifdef ARCH_X86_64
+#define ARCH_SIMD
+#endif
+#ifdef ARCH_ARM
+#define ARCH_SIMD
+#endif
+
+
enum
{
ADD,
@@ -228,7 +237,7 @@ gegl_module_db_get_load_inhibit (GeglModuleDB *db)
return db->load_inhibit;
}
-#ifdef ARCH_X86_64
+#ifdef ARCH_SIMD
static gboolean
gegl_str_has_one_of_suffixes (const char *str,
@@ -245,6 +254,8 @@ gegl_str_has_one_of_suffixes (const char *str,
static void
gegl_module_db_remove_duplicates (GeglModuleDB *db)
{
+#ifdef ARCH_X86_64
+
#ifdef __APPLE__ /* G_MODULE_SUFFIX is defined to .so instead of .dylib */
char *suffix_list[] = {"-x86_64-v2.dylib","-x86_64-v3.dylib", NULL};
#else
@@ -257,6 +268,20 @@ gegl_module_db_remove_duplicates (GeglModuleDB *db)
if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V3) preferred = 1;
else if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V2) preferred = 0;
+#endif
+#ifdef ARCH_ARM
+#ifdef __APPLE__ /* G_MODULE_SUFFIX is defined to .so instead of .dylib */
+ char *suffix_list[] = {"-arm-neon.dylib", NULL};
+#else
+ char *suffix_list[] = {"-arm-neon.so", NULL};
+#endif
+
+ GList *suffix_entries = NULL;
+ int preferred = -1;
+
+ GeglCpuAccelFlags cpu_accel = gegl_cpu_accel_get_support ();
+ if (cpu_accel & GEGL_CPU_ACCEL_ARM_NEON) preferred = 0;
+#endif
for (GList *l = db->to_load; l; l = l->next)
{
@@ -337,7 +362,7 @@ gegl_module_db_load (GeglModuleDB *db,
G_FILE_TEST_EXISTS,
gegl_module_db_module_search,
db);
-#if ARCH_X86_64
+#ifdef ARCH_SIMD
gegl_module_db_remove_duplicates (db);
#endif
while (db->to_load)
diff --git a/meson.build b/meson.build
index 9044dd429..a9366bc03 100644
--- a/meson.build
+++ b/meson.build
@@ -158,6 +158,9 @@ elif host_cpu_family == 'ppc64'
have_ppc = true
config.set10('ARCH_PPC', true)
config.set10('ARCH_PPC64', true)
+elif host_cpu_family == 'arm'
+ have_arm = true
+ config.set10('ARCH_ARM', true)
endif
# Only try to run compiled programs if native compile or cross-compile
@@ -207,8 +210,10 @@ add_project_arguments(cpp.get_supported_arguments(cflags_cpp), language: 'cpp')
if host_cpu_family == 'x86_64'
- x86_64_v2_flags = cc.get_supported_arguments(['-march=x86-64','-msse2',
'-msse2','-msse4.1','-msse4.2','-mpopcnt','-mssse3'])
- x86_64_v3_flags = x86_64_v2_flags +
cc.get_supported_arguments(['-mavx','-mavx2','-mf16c','-mfma','-mmovbe', '-mbmi', '-mbmi2'])
+ x86_64_v2_flags = cc.get_supported_arguments(['-ftree-vectorize','-march=x86-64','-msse2',
'-msse2','-msse4.1','-msse4.2','-mpopcnt','-mssse3'])
+ x86_64_v3_flags = x86_64_v2_flags +
cc.get_supported_arguments(['-ftree-vectorize','-mavx','-mavx2','-mf16c','-mfma','-mmovbe', '-mbmi',
'-mbmi2'])
+elif host_cpu_family == 'arm'
+ arm_neon_flags = cc.get_supported_arguments(['-ftree-vectorize','-mfpu=neon'])
endif
################################################################################
diff --git a/operations/common-cxx/meson.build b/operations/common-cxx/meson.build
index ebced92c2..2d6c18b58 100644
--- a/operations/common-cxx/meson.build
+++ b/operations/common-cxx/meson.build
@@ -81,4 +81,26 @@ if host_cpu_family == 'x86_64'
)
gegl_operations += gegl_common_cxx_x86_64_v3
+elif host_cpu_family == 'arm'
+
+ gegl_common_cxx_arm_neon = shared_library('gegl-common-cxx-arm-neon',
+ gegl_common_cxx_sources, opencl_headers,
+ include_directories: [ rootInclude, geglInclude, ],
+ dependencies: [
+ babl,
+ glib,
+ json_glib,
+ math,
+ ],
+ link_with: [
+ gegl_lib,
+ ],
+ c_args: [ '-DGEGL_OP_BUNDLE' ] + arm_neon_flags,
+ cpp_args: [ '-DGEGL_OP_BUNDLE' ] + arm_neon_flags,
+ name_prefix: '',
+ install: true,
+ install_dir: get_option('libdir') / api_name,
+ )
+ gegl_operations += gegl_common_cxx_arm_neon
+
endif
diff --git a/operations/common-gpl3+/meson.build b/operations/common-gpl3+/meson.build
index 3607b6e51..0513fb44d 100644
--- a/operations/common-gpl3+/meson.build
+++ b/operations/common-gpl3+/meson.build
@@ -125,4 +125,26 @@ if host_cpu_family == 'x86_64'
)
gegl_operations += gegl_common_gpl3_x86_64_v3
+elif host_cpu_family == 'arm'
+
+ gegl_common_gpl3_arm_neon = shared_library('gegl-common-gpl3-arm-neon',
+ gegl_common_gpl3_sources,
+ opencl_headers,
+ include_directories: [ rootInclude, geglInclude, ],
+ dependencies: [
+ babl,
+ glib,
+ json_glib,
+ math,
+ ],
+ link_with: [
+ gegl_lib,
+ ],
+ c_args: [ '-DGEGL_OP_BUNDLE' ] + arm_neon_flags,
+ name_prefix: '',
+ install: true,
+ install_dir: get_option('libdir') / api_name,
+ )
+ gegl_operations += gegl_common_gpl3_arm_neon
+
endif
diff --git a/operations/common/meson.build b/operations/common/meson.build
index bc594a96d..120af48de 100644
--- a/operations/common/meson.build
+++ b/operations/common/meson.build
@@ -180,4 +180,17 @@ if host_cpu_family == 'x86_64'
)
gegl_operations += gegl_common_x86_64_v3
+elif host_cpu_family == 'arm'
+
+ gegl_common_arm_neon = shared_library('gegl-common-arm-neon',
+ gegl_common_sources, opencl_headers,
+ include_directories: [ rootInclude, geglInclude, ],
+ dependencies: [ babl, glib, json_glib, math, ],
+ link_with: [ gegl_lib, ],
+ c_args: [ '-DGEGL_OP_BUNDLE' ] + arm_neon_flags,
+ name_prefix: '',
+ install: true,
+ install_dir: get_option('libdir') / api_name,
+ )
+ gegl_operations += gegl_common_arm_neon
endif
diff --git a/operations/generated/meson.build b/operations/generated/meson.build
index a7a400339..b2d0e82d2 100644
--- a/operations/generated/meson.build
+++ b/operations/generated/meson.build
@@ -83,4 +83,18 @@ if host_cpu_family == 'x86_64'
)
gegl_operations += gegl_generated_x86_64_v3
+elif host_cpu_family == 'arm'
+
+ gegl_generated_arm_neon = shared_library('gegl-generated-arm-neon',
+ gegl_generated_sources, opencl_headers,
+ include_directories: [ rootInclude, geglInclude, ],
+ dependencies: [ babl, glib, json_glib, math, ],
+ link_with: [ gegl_lib, ],
+ c_args: [ '-DGEGL_OP_BUNDLE' ] + arm_neon_flags,
+ name_prefix: '',
+ install: true,
+ install_dir: get_option('libdir') / api_name,
+ )
+ gegl_operations += gegl_generated_arm_neon
+
endif
diff --git a/operations/transform/meson.build b/operations/transform/meson.build
index 352ce6564..110b28962 100644
--- a/operations/transform/meson.build
+++ b/operations/transform/meson.build
@@ -60,4 +60,18 @@ if host_cpu_family == 'x86_64'
)
gegl_operations += gegl_transformops_x86_64_v3
+elif host_cpu_family == 'arm'
+
+ gegl_transformops_arm_neon = shared_library('gegl-transformops-arm-neon',
+ gegl_transformops_sources, opencl_headers,
+ include_directories: [ rootInclude, geglInclude, ],
+ dependencies: [ babl, glib, json_glib, math, ],
+ link_with: [ gegl_lib, ],
+ c_args: arm_neon_flags,
+ name_prefix: '',
+ install: true,
+ install_dir: get_option('libdir') / api_name,
+ )
+ gegl_operations += gegl_transformops_arm_neon
+
endif
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]