[babl] babl: sync cpuaccel with GEGL
- From: Øyvind "pippin" Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] babl: sync cpuaccel with GEGL
- Date: Fri, 21 Jan 2022 23:06:49 +0000 (UTC)
commit 1c151bd73fb3cd28719addfcafa3f92569fd0baa
Author: Øyvind Kolås <pippin gimp org>
Date: Sat Jan 22 00:02:43 2022 +0100
babl: sync cpuaccel with GEGL
babl/babl-cpuaccel.c | 94 ++++++++++++++++++++++++++++++++++++++++++++--------
babl/babl-cpuaccel.h | 48 ++++++++++++++++++++++-----
2 files changed, 120 insertions(+), 22 deletions(-)
---
diff --git a/babl/babl-cpuaccel.c b/babl/babl-cpuaccel.c
index 6c1f1cc11..7d613d94f 100644
--- a/babl/babl-cpuaccel.c
+++ b/babl/babl-cpuaccel.c
@@ -14,11 +14,10 @@
* You should have received a copy of the GNU Lesser General
* Public License along with this library; if not, see
* <https://www.gnu.org/licenses/>.
+ *
+ * (c) Manish Singh, Aaron Holtzman, Jan Heller, Ell, Øyvind Kolås
*/
-/*
- * x86 bits Copyright (C) Manish Singh <yosh gimp org>
- */
/*
* PPC CPU acceleration detection was taken from DirectFB but seems to be
@@ -78,7 +77,6 @@ babl_cpu_accel_set_use (gboolean use)
#define HAVE_ACCEL 1
-
typedef enum
{
ARCH_X86_VENDOR_NONE,
@@ -117,15 +115,26 @@ enum
{
ARCH_X86_INTEL_FEATURE_PNI = 1 << 0,
ARCH_X86_INTEL_FEATURE_SSSE3 = 1 << 9,
+ ARCH_X86_INTEL_FEATURE_FMA = 1 << 12,
ARCH_X86_INTEL_FEATURE_SSE4_1 = 1 << 19,
ARCH_X86_INTEL_FEATURE_SSE4_2 = 1 << 20,
+ ARCH_X86_INTEL_FEATURE_MOVBE = 1 << 22,
+ ARCH_X86_INTEL_FEATURE_POPCNT = 1 << 23,
+ ARCH_X86_INTEL_FEATURE_XSAVE = 1 << 26,
+ ARCH_X86_INTEL_FEATURE_OSXSAVE = 1 << 27,
ARCH_X86_INTEL_FEATURE_AVX = 1 << 28,
ARCH_X86_INTEL_FEATURE_F16C = 1 << 29,
- /* extended features */
- ARCH_X86_INTEL_FEATURE_AVX2 = 1 << 5
+ // extended features
+
+ ARCH_X86_INTEL_FEATURE_BMI1 = 1 << 3,
+ ARCH_X86_INTEL_FEATURE_BMI2 = 1 << 8,
+ ARCH_X86_INTEL_FEATURE_AVX2 = 1 << 5,
};
+
+/* x86 asm bit Copyright (C) Manish Singh <yosh gimp org>
+ */
#if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
#define cpuid(op,eax,ebx,ecx,edx) \
__asm__ ("movl %%ebx, %%esi\n\t" \
@@ -256,19 +265,43 @@ arch_accel_intel (void)
if (ecx & ARCH_X86_INTEL_FEATURE_SSE4_1)
caps |= BABL_CPU_ACCEL_X86_SSE4_1;
+ if (ecx & ARCH_X86_INTEL_FEATURE_SSE4_2)
+ caps |= BABL_CPU_ACCEL_X86_SSE4_2;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_AVX)
+ caps |= BABL_CPU_ACCEL_X86_AVX;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_POPCNT)
+ caps |= BABL_CPU_ACCEL_X86_POPCNT;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_XSAVE)
+ caps |= BABL_CPU_ACCEL_X86_XSAVE;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_OSXSAVE)
+ caps |= BABL_CPU_ACCEL_X86_OSXSAVE;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_FMA)
+ caps |= BABL_CPU_ACCEL_X86_FMA;
+
if (ecx & ARCH_X86_INTEL_FEATURE_F16C)
caps |= BABL_CPU_ACCEL_X86_F16C;
- cpuid (0, eax, ebx, ecx, edx);
+ if (ecx & ARCH_X86_INTEL_FEATURE_MOVBE)
+ caps |= BABL_CPU_ACCEL_X86_MOVBE;
+ cpuid (0, eax, ebx, ecx, edx);
if (eax >= 7)
- {
- cpuid (7, eax, ebx, ecx, edx);
-
- if (ebx & ARCH_X86_INTEL_FEATURE_AVX2)
- caps |= BABL_CPU_ACCEL_X86_AVX2;
- }
+ {
+ cpuid (7, eax, ebx, ecx, edx);
+ if (ebx & ARCH_X86_INTEL_FEATURE_AVX2)
+ caps |= BABL_CPU_ACCEL_X86_AVX2;
+ if (ebx & ARCH_X86_INTEL_FEATURE_BMI1)
+ caps |= BABL_CPU_ACCEL_X86_BMI1;
+ if (ebx & ARCH_X86_INTEL_FEATURE_BMI2)
+ caps |= BABL_CPU_ACCEL_X86_BMI2;
+ }
#endif /* USE_SSE */
+
}
#endif /* USE_MMX */
@@ -517,6 +550,41 @@ arch_accel (void)
#endif /* ARCH_PPC && USE_ALTIVEC */
+#if defined(ARCH_ARM)
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <elf.h>
+
+#define HAVE_ACCEL 1
+
+static guint32
+arch_accel (void)
+{
+ /* TODO : add or hardcode the other ways it can be on arm, where
+ * this info comes from the system and not from running cpu
+ * instructions
+ */
+ int has_neon = 0;
+ int fd = open ("/proc/self/auxv", O_RDONLY);
+ Elf32_auxv_t auxv;
+ if (fd >= 0)
+ {
+ while (read (fd, &auxv, sizeof (Elf32_auxv_t)) == sizeof (Elf32_auxv_t))
+ {
+ if (auxv.a_type == AT_HWCAP)
+ {
+ if (auxv.a_un.a_val & 4096)
+ has_neon = 1;
+ }
+ }
+ close (fd);
+ }
+ return has_neon?GEGL_CPU_ACCEL_ARM_NEON:0;
+}
+
+#endif /* ARCH_ARM */
static BablCpuAccelFlags
cpu_accel (void)
diff --git a/babl/babl-cpuaccel.h b/babl/babl-cpuaccel.h
index b8a685510..133d13844 100644
--- a/babl/babl-cpuaccel.h
+++ b/babl/babl-cpuaccel.h
@@ -24,25 +24,55 @@ typedef enum
BABL_CPU_ACCEL_NONE = 0x0,
/* x86 accelerations */
- BABL_CPU_ACCEL_X86_MMX = 0x01000000,
+ BABL_CPU_ACCEL_X86_MMX = 0x80000000,
BABL_CPU_ACCEL_X86_3DNOW = 0x40000000,
BABL_CPU_ACCEL_X86_MMXEXT = 0x20000000,
BABL_CPU_ACCEL_X86_SSE = 0x10000000,
BABL_CPU_ACCEL_X86_SSE2 = 0x08000000,
- BABL_CPU_ACCEL_X86_SSE3 = 0x02000000,
- BABL_CPU_ACCEL_X86_SSSE3 = 0x00800000,
- BABL_CPU_ACCEL_X86_SSE4_1 = 0x00400000,
- /* BABL_CPU_ACCEL_X86_SSE4_2 = 0x00200000, */
- /* BABL_CPU_ACCEL_X86_AVX = 0x00080000, */
+ BABL_CPU_ACCEL_X86_SSE3 = 0x04000000,
+ BABL_CPU_ACCEL_X86_SSSE3 = 0x02000000,
+ BABL_CPU_ACCEL_X86_SSE4_1 = 0x01000000,
+ BABL_CPU_ACCEL_X86_SSE4_2 = 0x00800000,
+ BABL_CPU_ACCEL_X86_AVX = 0x00400000,
+ BABL_CPU_ACCEL_X86_POPCNT = 0x00200000,
+ BABL_CPU_ACCEL_X86_FMA = 0x00100000,
+ BABL_CPU_ACCEL_X86_MOVBE = 0x00080000,
BABL_CPU_ACCEL_X86_F16C = 0x00040000,
- BABL_CPU_ACCEL_X86_AVX2 = 0x00020000,
+ BABL_CPU_ACCEL_X86_XSAVE = 0x00020000,
+ BABL_CPU_ACCEL_X86_OSXSAVE = 0x00010000,
+ BABL_CPU_ACCEL_X86_BMI1 = 0x00008000,
+ BABL_CPU_ACCEL_X86_BMI2 = 0x00004000,
+ BABL_CPU_ACCEL_X86_AVX2 = 0x00002000,
+
+ BABL_CPU_ACCEL_X86_64_V2 =
+ (BABL_CPU_ACCEL_X86_POPCNT|
+ BABL_CPU_ACCEL_X86_SSE4_1|
+ BABL_CPU_ACCEL_X86_SSE4_2|
+ BABL_CPU_ACCEL_X86_SSSE3),
+
+ BABL_CPU_ACCEL_X86_64_V3 =
+ (BABL_CPU_ACCEL_X86_64_V2|
+ BABL_CPU_ACCEL_X86_BMI1|
+ BABL_CPU_ACCEL_X86_BMI2|
+ BABL_CPU_ACCEL_X86_AVX|
+ BABL_CPU_ACCEL_X86_FMA|
+ BABL_CPU_ACCEL_X86_F16C|
+ BABL_CPU_ACCEL_X86_AVX2|
+ BABL_CPU_ACCEL_X86_OSXSAVE|
+ BABL_CPU_ACCEL_X86_MOVBE),
/* powerpc accelerations */
- BABL_CPU_ACCEL_PPC_ALTIVEC = 0x04000000,
- BABL_CPU_ACCEL_X86_64 = 0x00100000
+ BABL_CPU_ACCEL_PPC_ALTIVEC = 0x00000010,
+
+ /* arm accelerations */
+ BABL_CPU_ACCEL_ARM_NEON = 0x00000020,
+
+ /* x86_64 arch */
+ BABL_CPU_ACCEL_X86_64 = 0x00000040
} BablCpuAccelFlags;
+
BablCpuAccelFlags babl_cpu_accel_get_support (void);
void babl_cpu_accel_set_use (unsigned int use);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]