[babl] configure.ac, meson, cpu-accel: add AVX2 detection
- From: Ell <ell src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] configure.ac, meson, cpu-accel: add AVX2 detection
- Date: Wed, 24 Jul 2019 20:41:56 +0000 (UTC)
commit 385f0b545727262f58d3cfcf5523f69ace0e0166
Author: Ell <ell_se yahoo com>
Date: Wed Jul 24 23:21:01 2019 +0300
configure.ac, meson, cpu-accel: add AVX2 detection
Detect AVX2 support during configuration and runtime, in
preperation for next commit.
babl/babl-cpuaccel.c | 17 ++++++++++++++++-
babl/babl-cpuaccel.h | 1 +
configure.ac | 24 ++++++++++++++++++++++++
meson.build | 10 ++++++++++
meson_options.txt | 1 +
5 files changed, 52 insertions(+), 1 deletion(-)
---
diff --git a/babl/babl-cpuaccel.c b/babl/babl-cpuaccel.c
index 534fa89..ef26fa5 100644
--- a/babl/babl-cpuaccel.c
+++ b/babl/babl-cpuaccel.c
@@ -121,11 +121,15 @@ enum
ARCH_X86_INTEL_FEATURE_SSE4_2 = 1 << 20,
ARCH_X86_INTEL_FEATURE_AVX = 1 << 28,
ARCH_X86_INTEL_FEATURE_F16C = 1 << 29,
+
+ /* extended features */
+ ARCH_X86_INTEL_FEATURE_AVX2 = 1 << 5
};
#if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
#define cpuid(op,eax,ebx,ecx,edx) \
__asm__ ("movl %%ebx, %%esi\n\t" \
+ "xor %%ecx, %%ecx\n\t" \
"cpuid\n\t" \
"xchgl %%ebx,%%esi" \
: "=a" (eax), \
@@ -135,7 +139,8 @@ enum
: "0" (op))
#else
#define cpuid(op,eax,ebx,ecx,edx) \
- __asm__ ("cpuid" \
+ __asm__ ("xor %%ecx, %%ecx\n\t" \
+ "cpuid" \
: "=a" (eax), \
"=b" (ebx), \
"=c" (ecx), \
@@ -253,6 +258,16 @@ arch_accel_intel (void)
if (ecx & ARCH_X86_INTEL_FEATURE_F16C)
caps |= BABL_CPU_ACCEL_X86_F16C;
+
+ cpuid (0, eax, ebx, ecx, edx);
+
+ if (eax >= 7)
+ {
+ cpuid (7, eax, ebx, ecx, edx);
+
+ if (ebx & ARCH_X86_INTEL_FEATURE_AVX2)
+ caps |= BABL_CPU_ACCEL_X86_AVX2;
+ }
#endif /* USE_SSE */
}
#endif /* USE_MMX */
diff --git a/babl/babl-cpuaccel.h b/babl/babl-cpuaccel.h
index 738bc59..b8a6855 100644
--- a/babl/babl-cpuaccel.h
+++ b/babl/babl-cpuaccel.h
@@ -35,6 +35,7 @@ typedef enum
/* BABL_CPU_ACCEL_X86_SSE4_2 = 0x00200000, */
/* BABL_CPU_ACCEL_X86_AVX = 0x00080000, */
BABL_CPU_ACCEL_X86_F16C = 0x00040000,
+ BABL_CPU_ACCEL_X86_AVX2 = 0x00020000,
/* powerpc accelerations */
BABL_CPU_ACCEL_PPC_ALTIVEC = 0x04000000,
diff --git a/configure.ac b/configure.ac
index 7f53331..bb29428 100644
--- a/configure.ac
+++ b/configure.ac
@@ -353,6 +353,10 @@ AC_ARG_ENABLE(sse4_1,
[ --enable-sse4_1 enable SSE4_1 support (default=auto)],,
enable_sse4_1=$enable_sse)
+AC_ARG_ENABLE(avx2,
+ [ --enable-avx2 enable AVX2 support (default=auto)],,
+ enable_avx2=$enable_sse)
+
AC_ARG_ENABLE(f16c,
[ --enable-f16c enable hardware half-float support (default=auto)],,
enable_f16c=$enable_sse)
@@ -363,6 +367,7 @@ if test "x$enable_mmx" = xyes; then
SSE2_EXTRA_CFLAGS=
SSE3_EXTRA_CFLAGS=
SSE4_1_EXTRA_CFLAGS=
+ AVX2_EXTRA_CFLAGS=
F16C_EXTRA_CFLAGS=
AC_MSG_CHECKING(whether we can compile MMX code)
@@ -448,6 +453,24 @@ if test "x$enable_mmx" = xyes; then
AC_MSG_RESULT(no)
AC_MSG_WARN([The assembler does not support the SSE4_1 command set.])
)
+
+ if test "x$enable_avx2" = xyes; then
+ BABL_DETECT_CFLAGS(avx2_flag, '-mavx2')
+ AVX2_EXTRA_CFLAGS="$SSE4_1_EXTRA_CFLAGS $avx2_flag"
+
+ AC_MSG_CHECKING(whether we can compile AVX2 code)
+
+ CFLAGS="$CFLAGS $avx2_flag"
+
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpgatherdd %ymm0,(%rax,%ymm1,4),%ymm2");])],
+ AC_DEFINE(USE_AVX2, 1, [Define to 1 if AVX2 assembly is available.])
+ AC_MSG_RESULT(yes)
+ ,
+ enable_avx2=no
+ AC_MSG_RESULT(no)
+ AC_MSG_WARN([The assembler does not support the AVX2 command set.])
+ )
+ fi
fi
fi
fi
@@ -486,6 +509,7 @@ if test "x$enable_mmx" = xyes; then
AC_SUBST(SSE2_EXTRA_CFLAGS)
AC_SUBST(SSE3_EXTRA_CFLAGS)
AC_SUBST(SSE4_1_EXTRA_CFLAGS)
+ AC_SUBST(AVX2_EXTRA_CFLAGS)
AC_SUBST(F16C_EXTRA_CFLAGS)
fi
diff --git a/meson.build b/meson.build
index b17db52..c72688e 100644
--- a/meson.build
+++ b/meson.build
@@ -216,6 +216,16 @@ if cc.has_argument('-mmmx') and get_option('enable-mmx')
conf.set('USE_SSE4_1', 1, description:
'Define to 1 if sse4.1 assembly is available.')
endif
+
+ # avx2 assembly
+ if cc.has_argument('-mavx2') and get_option('enable-avx2')
+ if cc.compiles('asm ("vpgatherdd %ymm0,(%rax,%ymm1,4),%ymm2");')
+ message('avx2 assembly available')
+ avx2_cflags = '-mavx2'
+ conf.set('USE_AVX2', 1, description:
+ 'Define to 1 if avx2 assembly is available.')
+ endif
+ endif
endif
endif
endif
diff --git a/meson_options.txt b/meson_options.txt
index ab08ce9..f4a7ced 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -3,6 +3,7 @@ option('enable-sse', type: 'boolean', value: true, description: 'enable SSE s
option('enable-sse2', type: 'boolean', value: true, description: 'enable SSE2 support')
option('enable-sse3', type: 'boolean', value: true, description: 'enable SSE3 support')
option('enable-sse4_1', type: 'boolean', value: true, description: 'enable SSE4.1 support')
+option('enable-avx2', type: 'boolean', value: true, description: 'enable AVX2 support')
option('enable-f16c', type: 'boolean', value: true, description: 'enable hardware half-float support')
option('with-docs', type: 'boolean', value: true, description: 'build website')
option('with-lcms', type: 'boolean', value: true, description: 'build with lcms')
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]