[babl] multipass build, with SIMD variants for x86_64 and arm
- From: Øyvind "pippin" Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] multipass build, with SIMD variants for x86_64 and arm
- Date: Sun, 23 Jan 2022 00:12:40 +0000 (UTC)
commit ef3a19a4d96c6701939a37eed1d27fc3d33873ec
Author: Øyvind Kolås <pippin gimp org>
Date: Sun Jan 23 01:10:09 2022 +0100
multipass build, with SIMD variants for x86_64 and arm
Some extensions have their own separate build, so does the
RGB color space conversions, trc computations and base
set of conversions.
babl/babl-classes.h | 2 +-
babl/babl-core.c | 181 ++++++++++++
babl/babl-extension.c | 22 +-
babl/babl-internal.h | 12 +-
babl/babl-introspect.c | 2 +-
babl/babl-matrix.h | 18 +-
babl/babl-space.c | 644 ------------------------------------------
babl/babl-trc.c | 708 -----------------------------------------------
babl/babl-trc.h | 107 -------
babl/babl.c | 106 +++++++
babl/base/babl-base.h | 18 +-
babl/base/meson.build | 18 +-
babl/meson.build | 5 +-
extensions/CIE.c | 2 +
extensions/HSL.c | 1 -
extensions/cairo.c | 78 +++---
extensions/double.c | 4 +
extensions/fast-float.c | 6 +-
extensions/float.c | 152 ++++------
extensions/gegl-fixups.c | 4 +
extensions/gggl-lies.c | 181 ++++++------
extensions/gggl.c | 220 +++++++--------
extensions/grey.c | 20 +-
extensions/half.c | 4 +
extensions/meson.build | 48 ++++
extensions/simple.c | 134 ++++-----
extensions/sse-half.c | 1 -
extensions/sse2-int8.c | 1 -
extensions/sse4-int8.c | 1 -
extensions/two-table.c | 1 -
extensions/u16.c | 12 +-
extensions/u32.c | 2 +
extensions/ycbcr.c | 2 +
meson.build | 14 +-
34 files changed, 796 insertions(+), 1935 deletions(-)
---
diff --git a/babl/babl-classes.h b/babl/babl-classes.h
index c25172ae5..cf0344739 100644
--- a/babl/babl-classes.h
+++ b/babl/babl-classes.h
@@ -59,7 +59,7 @@ enum {
#include "babl-type.h"
#include "babl-sampling.h"
-#include "babl-trc.h"
+#include "base/babl-trc.h"
#include "babl-space.h"
#include "babl-component.h"
#include "babl-model.h"
diff --git a/babl/babl-core.c b/babl/babl-core.c
index d78b5e582..2bd36c7ee 100644
--- a/babl/babl-core.c
+++ b/babl/babl-core.c
@@ -162,3 +162,184 @@ babl_core_init (void)
NULL
);
}
+
+
+/////////////////// temporary here
+///////////////////
+
+const Babl *
+babl_trc_lut (const char *name,
+ int n,
+ float *entries)
+{
+ return babl_trc_new (name, BABL_TRC_LUT, 0, n, entries);
+}
+
+
+const Babl *
+babl_trc_formula_srgb (double g,
+ double a,
+ double b,
+ double c,
+ double d,
+ double e,
+ double f)
+{
+ char name[128];
+ int i;
+ float params[7]={g, a, b, c, d, e, f};
+
+ if (fabs (g - 2.400) < 0.01 &&
+ fabs (a - 0.947) < 0.01 &&
+ fabs (b - 0.052) < 0.01 &&
+ fabs (c - 0.077) < 0.01 &&
+ fabs (d - 0.040) < 0.01 &&
+ fabs (e - 0.000) < 0.01 &&
+ fabs (f - 0.000) < 0.01
+ )
+ return babl_trc ("sRGB");
+
+ snprintf (name, sizeof (name), "%.6f %.6f %.4f %.4f %.4f %.4f %.4f", g, a, b, c, d, e, f);
+ for (i = 0; name[i]; i++)
+ if (name[i] == ',') name[i] = '.';
+ while (name[strlen(name)-1]=='0')
+ name[strlen(name)-1]='\0';
+ return babl_trc_new (name, BABL_TRC_FORMULA_SRGB, g, 0, params);
+}
+
+const Babl *
+babl_trc_formula_cie (double g,
+ double a,
+ double b,
+ double c)
+{
+ char name[128];
+ int i;
+ float params[4]={g, a, b, c};
+
+ snprintf (name, sizeof (name), "%.6f %.6f %.4f %.4f", g, a, b, c);
+ for (i = 0; name[i]; i++)
+ if (name[i] == ',') name[i] = '.';
+ while (name[strlen(name)-1]=='0')
+ name[strlen(name)-1]='\0';
+ return babl_trc_new (name, BABL_TRC_FORMULA_CIE, g, 0, params);
+}
+
+
+const Babl *
+babl_trc_gamma (double gamma)
+{
+ char name[32];
+ int i;
+ if (fabs (gamma - 1.0) < 0.01)
+ return babl_trc_new ("linear", BABL_TRC_LINEAR, 1.0, 0, NULL);
+
+ snprintf (name, sizeof (name), "%.6f", gamma);
+ for (i = 0; name[i]; i++)
+ if (name[i] == ',') name[i] = '.';
+ while (name[strlen(name)-1]=='0')
+ name[strlen(name)-1]='\0';
+ return babl_trc_new (name, BABL_TRC_FORMULA_GAMMA, gamma, 0, NULL);
+}
+
+void
+babl_trc_class_init (void)
+{
+ babl_trc_new ("sRGB", BABL_TRC_SRGB, 2.2, 0, NULL);
+ babl_trc_gamma (2.2);
+ babl_trc_gamma (1.8);
+ babl_trc_gamma (1.0);
+ babl_trc_new ("linear", BABL_TRC_LINEAR, 1.0, 0, NULL);
+}
+
+#if 0
+float
+babl_trc_from_linear (const Babl *trc_,
+ float value)
+{
+ return babl_trc_from_linear (trc_, value);
+}
+
+float
+babl_trc_to_linear (const Babl *trc_,
+ float value)
+{
+ return babl_trc_to_linear (trc_, value);
+}
+#endif
+
+static int
+babl_lut_match_gamma (float *lut,
+ int lut_size,
+ float gamma)
+{
+ int match = 1;
+ int i;
+ if (lut_size > 1024)
+ {
+ for (i = 0; match && i < lut_size; i++)
+ {
+ if (fabs (lut[i] - pow ((i / (lut_size-1.0)), gamma)) > 0.0001)
+ match = 0;
+ }
+ }
+ else
+ {
+ for (i = 0; match && i < lut_size; i++)
+ {
+ if (fabs (lut[i] - pow ((i / (lut_size-1.0)), gamma)) > 0.001)
+ match = 0;
+ }
+ }
+ return match;
+}
+
+const Babl *
+babl_trc_lut_find (float *lut,
+ int lut_size)
+{
+ int i;
+ int match = 1;
+
+ /* look for linear match */
+ for (i = 0; match && i < lut_size; i++)
+ if (fabs (lut[i] - i / (lut_size-1.0)) > 0.015)
+ match = 0;
+ if (match)
+ return babl_trc_gamma (1.0);
+
+ /* look for sRGB match: */
+ match = 1;
+ if (lut_size > 1024)
+ {
+ for (i = 0; match && i < lut_size; i++)
+ {
+ if (fabs (lut[i] - gamma_2_2_to_linear (i / (lut_size-1.0))) > 0.0001)
+ match = 0;
+ }
+ }
+ else
+ {
+ for (i = 0; match && i < lut_size; i++)
+ {
+ if (fabs (lut[i] - gamma_2_2_to_linear (i / (lut_size-1.0))) > 0.001)
+ match = 0;
+ }
+ }
+ if (match)
+ return babl_trc ("sRGB");
+
+ if (babl_lut_match_gamma (lut, lut_size, 2.2))
+ return babl_trc_gamma(2.2);
+
+ if (babl_lut_match_gamma (lut, lut_size, 1.8))
+ return babl_trc_gamma(1.8);
+
+ return NULL;
+}
+
+const Babl * babl_trc (const char *name)
+{
+ return babl_trc_lookup_by_name (name);
+}
+
diff --git a/babl/babl-extension.c b/babl/babl-extension.c
index ed0a64b01..554c764a3 100644
--- a/babl/babl-extension.c
+++ b/babl/babl-extension.c
@@ -32,29 +32,9 @@
#include "babl-db.h"
#include "babl-base.h"
-#include "babl-cpuaccel.h"
#include <string.h>
#include <stdarg.h>
-void (*babl_base_init) (void) = babl_base_init_generic;
-#ifdef ARCH_X86_64
-void babl_base_init_x86_64_v2 (void);
-void babl_base_init_x86_64_v3 (void);
-#endif
-
-static void base_init (void)
-{
-#ifdef ARCH_X86_64
- BablCpuAccelFlags accel = babl_cpu_accel_get_support ();
- if (accel & BABL_CPU_ACCEL_X86_64_V3)
- babl_base_init_x86_64_v3 ();
- else if (accel & BABL_CPU_ACCEL_X86_64_V2)
- babl_base_init_x86_64_v2 ();
- else
-#endif
- babl_base_init_generic ();
-}
-
static Babl *babl_extension_current_extender = NULL;
@@ -130,7 +110,7 @@ babl_extension_base (void)
babl_free (babl);
else
{
- base_init ();
+ babl_base_init ();
}
babl = ret;
}
diff --git a/babl/babl-internal.h b/babl/babl-internal.h
index 8b8ebd64d..ec6008b6d 100644
--- a/babl/babl-internal.h
+++ b/babl/babl-internal.h
@@ -363,13 +363,16 @@ void babl_store_db (void);
int _babl_max_path_len (void);
-const Babl *
-babl_trc_new (const char *name,
+extern const Babl *
+(*babl_trc_new) (const char *name,
BablTRCType type,
double gamma,
int n_lut,
float *lut);
+extern const Babl *
+(*babl_trc_lookup_by_name) (const char *name);
+
void babl_space_to_xyz (const Babl *space, const double *rgb, double *xyz);
void babl_space_from_xyz (const Babl *space, const double *xyz, double *rgb);
@@ -384,7 +387,7 @@ const char *
babl_conversion_create_name (Babl *source, Babl *destination, int type,
int allow_collision);
-void _babl_space_add_universal_rgb (const Babl *space);
+extern void (*_babl_space_add_universal_rgb) (const Babl *space);
const Babl *
babl_trc_formula_srgb (double gamma, double a, double b, double c, double d, double e, double f);
const Babl *
@@ -468,4 +471,7 @@ char *babl_space_to_icc (const Babl *space,
Babl *
_babl_space_for_lcms (const char *icc_data, int icc_length); // XXX pass profile for dedup?
+void
+babl_trc_class_init (void);
+
#endif
diff --git a/babl/babl-introspect.c b/babl/babl-introspect.c
index 6230f9210..00168cc4d 100644
--- a/babl/babl-introspect.c
+++ b/babl/babl-introspect.c
@@ -68,7 +68,7 @@ babl_introspect (Babl *babl)
babl_conversion_class_for_each (each_introspect, NULL);
babl_log ("");
babl_log ("trcs:");
- babl_trc_class_for_each (each_introspect, NULL);
+ //babl_trc_class_for_each (each_introspect, NULL);
babl_log ("");
babl_log ("spaces:");
babl_space_class_for_each (each_introspect, NULL);
diff --git a/babl/babl-matrix.h b/babl/babl-matrix.h
index 2107b31c3..714debc56 100644
--- a/babl/babl-matrix.h
+++ b/babl/babl-matrix.h
@@ -9,7 +9,7 @@ static inline void babl_matrix_mul_matrix (const double *matA_,
const double *matB_,
double *out)
{
- int i, j;
+ unsigned int i, j;
double matA[9];
double matB[9];
double t1, t2, t3;
@@ -36,7 +36,7 @@ static inline void babl_matrix_mul_matrixf (const float *matA_,
const float *matB_,
float *out)
{
- int i, j;
+ unsigned int i, j;
float matA[9];
float matB[9];
float t1, t2, t3;
@@ -60,7 +60,7 @@ static inline void babl_matrix_mul_matrixf (const float *matA_,
static inline void babl_matrix_to_float (const double *in, float *out)
{
- int i;
+ unsigned int i;
for (i = 0; i < 9; i ++)
out[i] = in[i];
}
@@ -141,9 +141,9 @@ static inline void babl_matrix_mul_vectorff (const float *mat, const float *v_in
}
static inline void babl_matrix_mul_vectorff_buf3 (const float *mat, const float *v_in, float *v_out,
- int samples)
+ unsigned int samples)
{
- int i;
+ unsigned int i;
const float m_0_0 = m(mat, 0, 0);
const float m_0_1 = m(mat, 0, 1);
const float m_0_2 = m(mat, 0, 2);
@@ -166,7 +166,7 @@ static inline void babl_matrix_mul_vectorff_buf3 (const float *mat, const float
}
static inline void babl_matrix_mul_vectorff_buf4 (const float *mat, const float *v_in, float *v_out,
- int samples)
+ unsigned int samples)
{
const float m_0_0 = m(mat, 0, 0);
const float m_0_1 = m(mat, 0, 1);
@@ -177,7 +177,7 @@ static inline void babl_matrix_mul_vectorff_buf4 (const float *mat, const float
const float m_2_0 = m(mat, 2, 0);
const float m_2_1 = m(mat, 2, 1);
const float m_2_2 = m(mat, 2, 2);
- int i;
+ unsigned int i;
for (i = 0; i < samples; i ++)
{
float a = v_in[0], b = v_in[1], c = v_in[2];
@@ -192,9 +192,9 @@ static inline void babl_matrix_mul_vectorff_buf4 (const float *mat, const float
}
static inline void babl_matrix_mul_vector_buf4 (const double *mat, const double *v_in, double *v_out,
- int samples)
+ unsigned int samples)
{
- int i;
+ unsigned int i;
const double m_0_0 = m(mat, 0, 0);
const double m_0_1 = m(mat, 0, 1);
const double m_0_2 = m(mat, 0, 2);
diff --git a/babl/babl-space.c b/babl/babl-space.c
index 2cb1b27a4..092335426 100644
--- a/babl/babl-space.c
+++ b/babl/babl-space.c
@@ -675,650 +675,6 @@ babl_space_get_rgbtoxyz (const Babl *space)
return space->space.RGBtoXYZ;
}
-///////////////////
-
-
-static void
-prep_conversion (const Babl *babl)
-{
- Babl *conversion = (void*) babl;
- const Babl *source_space = babl_conversion_get_source_space (conversion);
- float *matrixf;
- int i;
- float *lut_red;
- float *lut_green;
- float *lut_blue;
-
- double matrix[9];
- babl_matrix_mul_matrix (
- (conversion->conversion.destination)->format.space->space.XYZtoRGB,
- (conversion->conversion.source)->format.space->space.RGBtoXYZ,
- matrix);
-
- matrixf = babl_calloc (sizeof (float), 9 + 256 * 3); // we leak this matrix , which is a singleton
- babl_matrix_to_float (matrix, matrixf);
- conversion->conversion.data = matrixf;
-
- lut_red = matrixf + 9;
- lut_green = lut_red + 256;
- lut_blue = lut_green + 256;
- for (i = 0; i < 256; i++)
- {
- lut_red[i] = babl_trc_to_linear (source_space->space.trc[0], i/255.0);
- lut_green[i] = babl_trc_to_linear (source_space->space.trc[1], i/255.0);
- lut_blue[i] = babl_trc_to_linear (source_space->space.trc[2], i/255.0);
- }
-}
-
-#define TRC_IN(rgba_in, rgba_out) do{ int i;\
- for (i = 0; i < samples; i++) \
- { \
- rgba_out[i*4+3] = rgba_in[i*4+3]; \
- } \
- if ((source_space->space.trc[0] == source_space->space.trc[1]) && \
- (source_space->space.trc[1] == source_space->space.trc[2])) \
- { \
- const Babl *trc = (void*)source_space->space.trc[0]; \
- babl_trc_to_linear_buf(trc, rgba_in, rgba_out, 4, 4, 3, samples); \
- } \
- else \
- { \
- int c; \
- for (c = 0; c < 3; c ++) \
- { \
- const Babl *trc = (void*)source_space->space.trc[c]; \
- babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); \
- } \
- } \
-}while(0)
-
-#define TRC_OUT(rgba_in, rgba_out) do{\
- { \
- int c; \
- if ((destination_space->space.trc[0] == destination_space->space.trc[1]) && \
- (destination_space->space.trc[1] == destination_space->space.trc[2])) \
- { \
- const Babl *trc = (void*)destination_space->space.trc[0]; \
- babl_trc_from_linear_buf(trc, rgba_in, rgba_out, 4, 4, 3, samples); \
- } \
- else \
- { \
- for (c = 0; c < 3; c ++) \
- { \
- const Babl *trc = (void*)destination_space->space.trc[c]; \
- babl_trc_from_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); \
- } \
- } \
- }\
-} while(0)
-
-
-
-
-static inline void
-universal_nonlinear_rgba_converter (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- const Babl *source_space = babl_conversion_get_source_space (conversion);
- const Babl *destination_space = babl_conversion_get_destination_space (conversion);
-
- float * matrixf = data;
- float *rgba_in = (void*)src_char;
- float *rgba_out = (void*)dst_char;
-
- TRC_IN(rgba_in, rgba_out);
-
- babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples);
-
- TRC_OUT(rgba_out, rgba_out);
-}
-
-static inline void
-universal_nonlinear_rgb_linear_converter (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- const Babl *source_space = babl_conversion_get_source_space (conversion);
- float * matrixf = data;
- float *rgba_in = (void*)src_char;
- float *rgba_out = (void*)dst_char;
-
- TRC_IN(rgba_in, rgba_out);
-
- babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples);
-}
-
-static inline void
-universal_linear_rgb_nonlinear_converter (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- const Babl *destination_space = conversion->conversion.destination->format.space;
- float * matrixf = data;
- float *rgba_in = (void*)src_char;
- float *rgba_out = (void*)dst_char;
-
- babl_matrix_mul_vectorff_buf4 (matrixf, rgba_in, rgba_out, samples);
-
- TRC_OUT(rgba_out, rgba_out);
-}
-
-static inline void
-universal_nonlinear_rgba_u8_converter (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- const Babl *destination_space = conversion->conversion.destination->format.space;
-
- float * matrixf = data;
- float * in_trc_lut_red = matrixf + 9;
- float * in_trc_lut_green = in_trc_lut_red + 256;
- float * in_trc_lut_blue = in_trc_lut_green + 256;
- int i;
- uint8_t *rgba_in_u8 = (void*)src_char;
- uint8_t *rgba_out_u8 = (void*)dst_char;
-
- float *rgb = babl_malloc (sizeof(float) * 4 * samples);
-
- for (i = 0; i < samples; i++)
- {
- rgb[i*4+0]=in_trc_lut_red[rgba_in_u8[i*4+0]];
- rgb[i*4+1]=in_trc_lut_green[rgba_in_u8[i*4+1]];
- rgb[i*4+2]=in_trc_lut_blue[rgba_in_u8[i*4+2]];
- rgba_out_u8[i*4+3] = rgba_in_u8[i*4+3];
- }
-
- babl_matrix_mul_vectorff_buf4 (matrixf, rgb, rgb, samples);
-
- {
- const Babl *from_trc_red = (void*)destination_space->space.trc[0];
- const Babl *from_trc_green = (void*)destination_space->space.trc[1];
- const Babl *from_trc_blue = (void*)destination_space->space.trc[2];
- for (i = 0; i < samples * 4; i+=4)
- {
- rgba_out_u8[i+0] = babl_trc_from_linear (from_trc_red, rgb[i+0]) * 255.5f;
- rgba_out_u8[i+1] = babl_trc_from_linear (from_trc_green, rgb[i+1]) * 255.5f;
- rgba_out_u8[i+2] = babl_trc_from_linear (from_trc_blue, rgb[i+2]) * 255.5f;
- }
- }
- babl_free (rgb);
-}
-
-
-static inline void
-universal_rgba_converter (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- float *matrixf = data;
- float *rgba_in = (void*)src_char;
- float *rgba_out = (void*)dst_char;
-
- babl_matrix_mul_vectorff_buf4 (matrixf, rgba_in, rgba_out, samples);
-}
-
-static inline void
-universal_rgb_converter (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- float *matrixf = data;
- float *rgb_in = (void*)src_char;
- float *rgb_out = (void*)dst_char;
-
- babl_matrix_mul_vectorff_buf3 (matrixf, rgb_in, rgb_out, samples);
-}
-
-
-static inline void
-universal_ya_converter (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- memcpy (dst_char, src_char, samples * 4 * 2);
-}
-
-static inline void
-universal_y_converter (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- memcpy (dst_char, src_char, samples * 4);
-}
-
-
-static inline void
-universal_nonlinear_rgb_u8_converter (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- const Babl *destination_space = conversion->conversion.destination->format.space;
-
- float * matrixf = data;
- float * in_trc_lut_red = matrixf + 9;
- float * in_trc_lut_green = in_trc_lut_red + 256;
- float * in_trc_lut_blue = in_trc_lut_green + 256;
- int i;
- uint8_t *rgb_in_u8 = (void*)src_char;
- uint8_t *rgb_out_u8 = (void*)dst_char;
-
- float *rgba_out = babl_malloc (sizeof(float) * 4 * samples);
-
- for (i = 0; i < samples; i++)
- {
- rgba_out[i*4+0]=in_trc_lut_red[rgb_in_u8[i*3+0]];
- rgba_out[i*4+1]=in_trc_lut_green[rgb_in_u8[i*3+1]];
- rgba_out[i*4+2]=in_trc_lut_blue[rgb_in_u8[i*3+2]];
- rgba_out[i*4+3]=rgb_in_u8[i*3+2] * 255.5f;
- }
-
- babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples);
-
- {
- int c;
- TRC_OUT(rgba_out, rgba_out);
-
- for (i = 0; i < samples; i++)
- for (c = 0; c < 3; c ++)
- rgb_out_u8[i*3+c] = rgba_out[i*4+c] * 255.5f;
- }
-
- babl_free (rgba_out);
-}
-
-
-#if defined(USE_SSE2)
-
-#define m(matr, j, i) matr[j*3+i]
-
-#include <emmintrin.h>
-
-static inline void babl_matrix_mul_vectorff_buf4_sse2 (const float *mat,
- const float *v_in,
- float *v_out,
- int samples)
-{
- const __v4sf m___0 = {m(mat, 0, 0), m(mat, 1, 0), m(mat, 2, 0), 0};
- const __v4sf m___1 = {m(mat, 0, 1), m(mat, 1, 1), m(mat, 2, 1), 0};
- const __v4sf m___2 = {m(mat, 0, 2), m(mat, 1, 2), m(mat, 2, 2), 1};
- int i;
- for (i = 0; i < samples; i ++)
- {
- __v4sf a, b, c = _mm_load_ps(&v_in[0]);
- a = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(0,0,0,0));
- b = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(1,1,1,1));
- c = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(3,2,2,2));
- _mm_store_ps (v_out, m___0 * a + m___1 * b + m___2 * c);
- v_out += 4;
- v_in += 4;
- }
- _mm_empty ();
-}
-
-#undef m
-
-
-static inline void
-universal_nonlinear_rgba_converter_sse2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- const Babl *source_space = babl_conversion_get_source_space (conversion);
- const Babl *destination_space = babl_conversion_get_destination_space (conversion);
- float * matrixf = data;
- float *rgba_in = (void*)src_char;
- float *rgba_out = (void*)dst_char;
-
- TRC_IN(rgba_in, rgba_out);
-
- babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples);
-
- TRC_OUT(rgba_out, rgba_out);
-}
-
-
-static inline void
-universal_rgba_converter_sse2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- float *matrixf = data;
- float *rgba_in = (void*)src_char;
- float *rgba_out = (void*)dst_char;
-
- babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_in, rgba_out, samples);
-}
-
-static inline void
-universal_nonlinear_rgba_u8_converter_sse2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- const Babl *destination_space = conversion->conversion.destination->format.space;
-
- float * matrixf = data;
- float * in_trc_lut_red = matrixf + 9;
- float * in_trc_lut_green = in_trc_lut_red + 256;
- float * in_trc_lut_blue = in_trc_lut_green + 256;
- int i;
- uint8_t *rgba_in_u8 = (void*)src_char;
- uint8_t *rgba_out_u8 = (void*)dst_char;
-
- float *rgba_out = babl_malloc (sizeof(float) * 4 * samples);
-
- for (i = 0; i < samples * 4; i+= 4)
- {
- rgba_out[i+0]=in_trc_lut_red[rgba_in_u8[i+0]];
- rgba_out[i+1]=in_trc_lut_green[rgba_in_u8[i+1]];
- rgba_out[i+2]=in_trc_lut_blue[rgba_in_u8[i+2]];
- rgba_out_u8[i+3] = rgba_in_u8[i+3];
- }
-
- babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples);
-
- {
- int c;
- TRC_OUT(rgba_out, rgba_out);
-
- for (i = 0; i < samples * 4; i+= 4)
- for (c = 0; c < 3; c ++)
- rgba_out_u8[i+c] = rgba_out[i+c] * 255.5f;
- }
-
- babl_free (rgba_out);
-}
-
-static inline void
-universal_nonlinear_rgb_u8_converter_sse2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- const Babl *destination_space = conversion->conversion.destination->format.space;
-
- float * matrixf = data;
- float * in_trc_lut_red = matrixf + 9;
- float * in_trc_lut_green = in_trc_lut_red + 256;
- float * in_trc_lut_blue = in_trc_lut_green + 256;
- int i;
- uint8_t *rgb_in_u8 = (void*)src_char;
- uint8_t *rgb_out_u8 = (void*)dst_char;
-
- float *rgba_out = babl_malloc (sizeof(float) * 4 * samples);
-
- for (i = 0; i < samples; i++)
- {
- rgba_out[i*4+0]=in_trc_lut_red[rgb_in_u8[i*3+0]];
- rgba_out[i*4+1]=in_trc_lut_green[rgb_in_u8[i*3+1]];
- rgba_out[i*4+2]=in_trc_lut_blue[rgb_in_u8[i*3+2]];
- }
-
- babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples);
-
- {
- int c;
- TRC_OUT(rgba_out, rgba_out);
-
- for (i = 0; i < samples; i++)
- for (c = 0; c < 3; c ++)
- rgb_out_u8[i*3+c] = rgba_out[i*4+c] * 255.5f;
- }
-
- babl_free (rgba_out);
-}
-
-
-static inline void
-universal_nonlinear_rgb_linear_converter_sse2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- const Babl *source_space = babl_conversion_get_source_space (conversion);
- float * matrixf = data;
- float *rgba_in = (void*)src_char;
- float *rgba_out = (void*)dst_char;
-
- TRC_IN(rgba_in, rgba_out);
-
- babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples);
-}
-
-
-static inline void
-universal_linear_rgb_nonlinear_converter_sse2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
- long samples,
- void *data)
-{
- const Babl *destination_space = conversion->conversion.destination->format.space;
- float * matrixf = data;
- float *rgba_in = (void*)src_char;
- float *rgba_out = (void*)dst_char;
-
- babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_in, rgba_out, samples);
-
- TRC_OUT(rgba_out, rgba_out);
-}
-#endif
-
-
-static int
-add_rgb_adapter (Babl *babl,
- void *space)
-{
- if (babl != space)
- {
-
-#if defined(USE_SSE2)
- if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE) &&
- (babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE2))
- {
-
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("RGBA float", space),
- babl_format_with_space("RGBA float", babl),
- "linear", universal_rgba_converter_sse2,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("RGBA float", babl),
- babl_format_with_space("RGBA float", space),
- "linear", universal_rgba_converter_sse2,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A float", space),
- babl_format_with_space("R'G'B'A float", babl),
- "linear", universal_nonlinear_rgba_converter_sse2,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A float", babl),
- babl_format_with_space("R'G'B'A float", space),
- "linear", universal_nonlinear_rgba_converter_sse2,
- NULL));
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A float", space),
- babl_format_with_space("RGBA float", babl),
- "linear", universal_nonlinear_rgb_linear_converter_sse2,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A float", babl),
- babl_format_with_space("RGBA float", space),
- "linear", universal_nonlinear_rgb_linear_converter_sse2,
- NULL));
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("RGBA float", babl),
- babl_format_with_space("R'G'B'A float", space),
- "linear", universal_linear_rgb_nonlinear_converter_sse2,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("RGBA float", space),
- babl_format_with_space("R'G'B'A float", babl),
- "linear", universal_linear_rgb_nonlinear_converter_sse2,
- NULL));
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A u8", space),
- babl_format_with_space("R'G'B'A u8", babl),
- "linear", universal_nonlinear_rgba_u8_converter_sse2,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A u8", babl),
- babl_format_with_space("R'G'B'A u8", space),
- "linear", universal_nonlinear_rgba_u8_converter_sse2,
- NULL));
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B' u8", space),
- babl_format_with_space("R'G'B' u8", babl),
- "linear", universal_nonlinear_rgb_u8_converter_sse2,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B' u8", babl),
- babl_format_with_space("R'G'B' u8", space),
- "linear", universal_nonlinear_rgb_u8_converter_sse2,
- NULL));
- }
- //else
-#endif
- {
- prep_conversion(babl_conversion_new(
- babl_format_with_space("RGBA float", space),
- babl_format_with_space("RGBA float", babl),
- "linear", universal_rgba_converter,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("RGBA float", babl),
- babl_format_with_space("RGBA float", space),
- "linear", universal_rgba_converter,
- NULL));
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A float", space),
- babl_format_with_space("R'G'B'A float", babl),
- "linear", universal_nonlinear_rgba_converter,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A float", babl),
- babl_format_with_space("R'G'B'A float", space),
- "linear", universal_nonlinear_rgba_converter,
- NULL));
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A float", space),
- babl_format_with_space("RGBA float", babl),
- "linear", universal_nonlinear_rgb_linear_converter,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A float", babl),
- babl_format_with_space("RGBA float", space),
- "linear", universal_nonlinear_rgb_linear_converter,
- NULL));
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A u8", space),
- babl_format_with_space("R'G'B'A u8", babl),
- "linear", universal_nonlinear_rgba_u8_converter,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B'A u8", babl),
- babl_format_with_space("R'G'B'A u8", space),
- "linear", universal_nonlinear_rgba_u8_converter,
- NULL));
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B' u8", space),
- babl_format_with_space("R'G'B' u8", babl),
- "linear", universal_nonlinear_rgb_u8_converter,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("R'G'B' u8", babl),
- babl_format_with_space("R'G'B' u8", space),
- "linear", universal_nonlinear_rgb_u8_converter,
- NULL));
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("RGBA float", babl),
- babl_format_with_space("R'G'B'A float", space),
- "linear", universal_linear_rgb_nonlinear_converter,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("RGBA float", space),
- babl_format_with_space("R'G'B'A float", babl),
- "linear", universal_linear_rgb_nonlinear_converter,
- NULL));
- }
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("RGB float", space),
- babl_format_with_space("RGB float", babl),
- "linear", universal_rgb_converter,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("RGB float", babl),
- babl_format_with_space("RGB float", space),
- "linear", universal_rgb_converter,
- NULL));
-
- prep_conversion(babl_conversion_new(
- babl_format_with_space("Y float", space),
- babl_format_with_space("Y float", babl),
- "linear", universal_y_converter,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("YaA float", babl),
- babl_format_with_space("YaA float", space),
- "linear", universal_ya_converter,
- NULL));
- prep_conversion(babl_conversion_new(
- babl_format_with_space("YA float", babl),
- babl_format_with_space("YA float", space),
- "linear", universal_ya_converter,
- NULL));
- }
- return 0;
-}
-
-/* The first time a new Babl space is used - for creation of a fish, is when
- * this function is called, it adds conversions hooks that provides its formats
- * with conversions internally as well as for conversions to and from other RGB
- * spaces.
- */
-void
-_babl_space_add_universal_rgb (const Babl *space)
-{
- babl_space_class_for_each (add_rgb_adapter, (void*)space);
-}
const Babl *
diff --git a/babl/babl.c b/babl/babl.c
index fd903238d..515fa09b0 100644
--- a/babl/babl.c
+++ b/babl/babl.c
@@ -18,6 +18,7 @@
#include "config.h"
#include "babl-internal.h"
+#include "babl-base.h"
static int ref_count = 0;
@@ -125,10 +126,13 @@ babl_dir_list (void)
return ret;
}
+
+static void simd_init (void);
void
babl_init (void)
{
babl_cpu_accel_set_use (1);
+ simd_init ();
if (ref_count++ == 0)
{
@@ -190,3 +194,105 @@ babl_model_is (const Babl *babl,
return babl && ((babl)==babl_model_with_space(model, babl));
}
+
+#include "babl-cpuaccel.h"
+void (*babl_base_init) (void) = babl_base_init_generic;
+
+const Babl * babl_trc_lookup_by_name_generic (const char *name);
+
+
+const Babl *
+babl_trc_new_generic (const char *name,
+ BablTRCType type,
+ double gamma,
+ int n_lut,
+ float *lut);
+
+void _babl_space_add_universal_rgb_generic (const Babl *space);
+void (*_babl_space_add_universal_rgb) (const Babl *space) =
+ _babl_space_add_universal_rgb_generic;
+
+const Babl *
+(*babl_trc_lookup_by_name) (const char *name) = babl_trc_lookup_by_name_generic;
+const Babl *
+(*babl_trc_new) (const char *name,
+ BablTRCType type,
+ double gamma,
+ int n_lut,
+ float *lut) = babl_trc_new_generic;
+
+#ifdef ARCH_X86_64
+void babl_base_init_x86_64_v2 (void);
+void babl_base_init_x86_64_v3 (void);
+void _babl_space_add_universal_rgb_x86_64_v2 (const Babl *space);
+void _babl_space_add_universal_rgb_x86_64_v3 (const Babl *space);
+
+const Babl *
+babl_trc_lookup_by_name_x86_64_v2 (const char *name);
+const Babl *
+babl_trc_lookup_by_name_x86_64_v3 (const char *name);
+
+const Babl *
+babl_trc_new_x86_64_v2 (const char *name,
+ BablTRCType type,
+ double gamma,
+ int n_lut,
+ float *lut);
+const Babl *
+babl_trc_new_x86_64_v3 (const char *name,
+ BablTRCType type,
+ double gamma,
+ int n_lut,
+ float *lut);
+
+#endif
+#ifdef ARCH_ARM
+void babl_base_init_arm_neon (void);
+void _babl_space_add_universal_rgb_arm_neon (const Babl *space);
+
+const Babl *
+babl_trc_lookup_by_name_arm_neon (const char *name);
+
+const Babl *
+babl_trc_new_arm_neon (const char *name,
+ BablTRCType type,
+ double gamma,
+ int n_lut,
+ float *lut);
+
+#endif
+
+static void simd_init (void)
+{
+#ifdef ARCH_X86_64
+ BablCpuAccelFlags accel = babl_cpu_accel_get_support ();
+ if ((accel & BABL_CPU_ACCEL_X86_64_V3) == BABL_CPU_ACCEL_X86_64_V3)
+ {
+ babl_base_init = babl_base_init_x86_64_v2; /// !!
+ // this is correct,
+ // it performs better
+ // as observed in benchmarking
+ babl_trc_new = babl_trc_new_x86_64_v2;
+ babl_trc_lookup_by_name = babl_trc_lookup_by_name_x86_64_v2;
+ _babl_space_add_universal_rgb = _babl_space_add_universal_rgb_x86_64_v3;
+ }
+ else if ((accel & BABL_CPU_ACCEL_X86_64_V2) == BABL_CPU_ACCEL_X86_64_V2)
+ {
+ babl_base_init = babl_base_init_x86_64_v2;
+ babl_trc_new = babl_trc_new_x86_64_v2;
+ babl_trc_lookup_by_name = babl_trc_lookup_by_name_x86_64_v2;
+ _babl_space_add_universal_rgb = _babl_space_add_universal_rgb_x86_64_v2;
+ }
+#endif
+#ifdef ARCH_ARM
+ BablCpuAccelFlags accel = babl_cpu_accel_get_support ();
+ if ((accel & BABL_CPU_ACCEL_ARM_NEON) == BABL_CPU_ACCEL_ARM_NEON)
+ {
+ babl_base_init = babl_base_init_arm_neon;
+ babl_trc_new = babl_trc_new_arm_neon;
+ babl_trc_lookup_by_name = babl_trc_lookup_by_name_arm_neon;
+ _babl_space_add_universal_rgb = _babl_space_add_universal_rgb_arm_neon;
+ }
+#endif
+}
+
diff --git a/babl/base/babl-base.h b/babl/base/babl-base.h
index 67c4a539e..291697b81 100644
--- a/babl/base/babl-base.h
+++ b/babl/base/babl-base.h
@@ -19,6 +19,9 @@
#ifndef _BABL_BASE_H
#define _BABL_BASE_H
+#ifdef ARM_NEON
+#define BABL_SIMD_SUFFIX(symbol) symbol##_arm_neon
+#else
#ifdef X86_64_V2
#define BABL_SIMD_SUFFIX(symbol) symbol##_x86_64_v2
#else
@@ -28,22 +31,9 @@
#define BABL_SIMD_SUFFIX(symbol) symbol##_generic
#endif
#endif
+#endif
extern void (*babl_base_init) (void);
-extern void (*babl_base_destroy) (void);
-extern void (*babl_formats_init) (void);
-
-extern void (*babl_base_type_half) (void);
-extern void (*babl_base_type_float) (void);
-extern void (*babl_base_type_u8) (void);
-extern void (*babl_base_type_u16) (void);
-extern void (*babl_base_type_u15) (void);
-extern void (*babl_base_type_u32) (void);
-
-extern void (*babl_base_model_rgb) (void);
-extern void (*babl_base_model_cmyk) (void);
-extern void (*babl_base_model_gray) (void);
-extern void (*babl_base_model_ycbcr) (void);
void BABL_SIMD_SUFFIX(babl_base_init) (void);
void BABL_SIMD_SUFFIX(babl_base_destroy) (void);
diff --git a/babl/base/meson.build b/babl/base/meson.build
index 41287c0a3..886e1ae1a 100644
--- a/babl/base/meson.build
+++ b/babl/base/meson.build
@@ -14,6 +14,8 @@ babl_base_sources = [
'type-u16.c',
'type-u32.c',
'type-u8.c',
+ 'babl-trc.c',
+ 'babl-rgb-converter.c',
]
babl_base = static_library('babl_base',
@@ -29,14 +31,26 @@ if host_cpu_family == 'x86_64'
babl_base_sources,
include_directories: [rootInclude, bablInclude],
dependencies: [math, lcms],
- c_args: common_c_flags + x86_64_v2_flags + '-DX86_64_V2'
+ c_args: common_c_flags + x86_64_v2_flags
)
babl_base_x86_64_v3 = static_library('babl_base-x86-64-v3',
babl_base_sources,
include_directories: [rootInclude, bablInclude],
dependencies: [math, lcms],
- c_args: common_c_flags + x86_64_v3_flags + '-DX86_64_V3'
+ c_args: common_c_flags + x86_64_v3_flags
+ )
+
+endif
+
+
+if host_cpu_family == 'arm'
+
+ babl_base_arm_neon = static_library('babl_base-arm-neon',
+ babl_base_sources,
+ include_directories: [rootInclude, bablInclude],
+ dependencies: [math, lcms],
+ c_args: common_c_flags + arm_neon_flags
)
endif
diff --git a/babl/meson.build b/babl/meson.build
index b9b2a0529..d432dca6d 100644
--- a/babl/meson.build
+++ b/babl/meson.build
@@ -89,7 +89,6 @@ babl_sources = files(
'babl-sampling.c',
'babl-sanity.c',
'babl-space.c',
- 'babl-trc.c',
'babl-type.c',
'babl-util.c',
'babl-version.c',
@@ -123,8 +122,8 @@ babl_includes = [rootInclude, bablBaseInclude]
if host_cpu_family == 'x86_64'
simd_extra = [babl_base_x86_64_v2, babl_base_x86_64_v3]
-#elif host_cpu_family == 'arm'
-# simd_extra = [babl_base_arm_neon]
+elif host_cpu_family == 'arm'
+ simd_extra = [babl_base_arm_neon]
else
simd_extra = []
endif
diff --git a/extensions/CIE.c b/extensions/CIE.c
index 1607b2706..ecf024184 100644
--- a/extensions/CIE.c
+++ b/extensions/CIE.c
@@ -70,10 +70,12 @@ static void conversions (void);
static void formats (void);
int init (void);
+#include "babl-verify-cpu.inc"
int
init (void)
{
+ BABL_VERIFY_CPU();
types ();
components ();
models ();
diff --git a/extensions/HSL.c b/extensions/HSL.c
index bf48f34c9..73b2f03fe 100644
--- a/extensions/HSL.c
+++ b/extensions/HSL.c
@@ -63,7 +63,6 @@ hue2cpn (double p,
int init (void);
-
int
init (void)
{
diff --git a/extensions/cairo.c b/extensions/cairo.c
index 08ccf6780..3af4a9bd3 100644
--- a/extensions/cairo.c
+++ b/extensions/cairo.c
@@ -28,8 +28,8 @@ int init (void);
static void
conv_rgba8_cairo24_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -49,8 +49,8 @@ conv_rgba8_cairo24_le (const Babl *conversion,
static void
conv_rgb8_cairo24_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -71,8 +71,8 @@ conv_rgb8_cairo24_le (const Babl *conversion,
#if 0
static void
conv_rgbA8_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -93,8 +93,8 @@ conv_rgbA8_cairo32_le (const Babl *conversion,
static void
conv_rgbA8_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -115,8 +115,8 @@ conv_rgbA8_cairo32_le (const Babl *conversion,
static void
conv_cairo32_rgbA8_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -136,8 +136,8 @@ conv_cairo32_rgbA8_le (const Babl *conversion,
static void
conv_cairo32_rgba8_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -184,8 +184,8 @@ conv_cairo32_rgba8_le (const Babl *conversion,
static void
conv_cairo32_rgbAF_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst_char,
long samples)
{
long n = samples;
@@ -207,8 +207,8 @@ conv_cairo32_rgbAF_le (const Babl *conversion,
static void
conv_cairo32_rgbaF_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst_char,
long samples)
{
long n = samples;
@@ -236,8 +236,8 @@ conv_cairo32_rgbaF_le (const Babl *conversion,
static void
conv_cairo24_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -253,8 +253,8 @@ conv_cairo24_cairo32_le (const Babl *conversion,
static void
conv_rgba8_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -295,8 +295,8 @@ conv_rgba8_cairo32_le (const Babl *conversion,
static void
conv_rgb8_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -318,8 +318,8 @@ conv_rgb8_cairo32_le (const Babl *conversion,
static void
conv_yA8_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -342,8 +342,8 @@ conv_yA8_cairo32_le (const Babl *conversion,
static void
conv_yA16_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -362,8 +362,8 @@ conv_yA16_cairo32_le (const Babl *conversion,
static void
conv_y8_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -379,8 +379,8 @@ conv_y8_cairo32_le (const Babl *conversion,
static void
conv_y16_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -400,8 +400,8 @@ conv_y16_cairo32_le (const Babl *conversion,
static void
conv_rgbA_gamma_float_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *fsrc = (float *) src;
@@ -424,8 +424,8 @@ conv_rgbA_gamma_float_cairo32_le (const Babl *conversion,
static void
conv_rgbafloat_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -473,8 +473,8 @@ conv_rgbafloat_cairo32_le (const Babl *conversion,
static void
conv_yafloat_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -517,8 +517,8 @@ conv_yafloat_cairo32_le (const Babl *conversion,
static void
conv_yafloat_nl_cairo32_le (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *fsrc = (float *) src;
@@ -556,6 +556,7 @@ conv_yafloat_nl_cairo32_le (const Babl *conversion,
}
}
+#include "babl-verify-cpu.inc"
int
init (void)
@@ -563,6 +564,7 @@ init (void)
int testint = 23;
char *testchar = (char*) &testint;
int littleendian = (testchar[0] == 23);
+ BABL_VERIFY_CPU();
if (littleendian)
{
diff --git a/extensions/double.c b/extensions/double.c
index fe29cd9d5..21fc58115 100644
--- a/extensions/double.c
+++ b/extensions/double.c
@@ -222,10 +222,13 @@ conv_rgbD_linear_rgbaD_linear (const Babl *conversion,
babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL)
int init (void);
+#include "babl-verify-cpu.inc"
int
init (void)
{
+ BABL_VERIFY_CPU();
+ {
const Babl *rgbaD_linear = babl_format_new (
babl_model ("RGBA"),
babl_type ("double"),
@@ -288,6 +291,7 @@ init (void)
o (rgbaD_linear, rgbD_linear);
o (rgbaD_gamma, rgbD_gamma);
+ }
return 0;
}
diff --git a/extensions/fast-float.c b/extensions/fast-float.c
index 87300468a..451e20d28 100644
--- a/extensions/fast-float.c
+++ b/extensions/fast-float.c
@@ -589,11 +589,14 @@ conv_rgbF_gamma_rgbF_linear (const Babl *conversion,
#define o(src, dst) \
babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL)
-int init (void);
+#include "babl-verify-cpu.inc"
+int init (void);
int
init (void)
{
+ BABL_VERIFY_CPU();
+ {
const Babl *yaF_linear = babl_format_new (
babl_model ("YA"),
babl_type ("float"),
@@ -713,6 +716,7 @@ init (void)
o (rgbF_linear, rgbF_gamma);
o (rgbF_gamma, rgbF_linear);
o (yaF_linear, rgbA8_gamma);
+ }
return 0;
}
diff --git a/extensions/float.c b/extensions/float.c
index cd3442156..74b5a8a86 100644
--- a/extensions/float.c
+++ b/extensions/float.c
@@ -31,8 +31,8 @@ static const Babl *trc_srgb = NULL;
static void
conv_yaF_linear_yAF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *fsrc = (float *) src;
@@ -52,8 +52,8 @@ conv_yaF_linear_yAF_linear (const Babl *conversion,
static void
conv_yAF_linear_yaF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *fsrc = (float *) src;
@@ -73,8 +73,8 @@ conv_yAF_linear_yaF_linear (const Babl *conversion,
static void
conv_yaF_linear_yAF_nonlinear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -96,8 +96,8 @@ conv_yaF_linear_yAF_nonlinear (const Babl *conversion,
static void
conv_rgbaF_linear_rgbAF_nonlinear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -121,8 +121,8 @@ conv_rgbaF_linear_rgbAF_nonlinear (const Babl *conversion,
static void
conv_rgbaF_linear_rgbAF_perceptual (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *fsrc = (float *) src;
@@ -144,8 +144,8 @@ conv_rgbaF_linear_rgbAF_perceptual (const Babl *conversion,
static void
conv_rgbAF_linear_rgbAF_nonlinear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -180,8 +180,8 @@ conv_rgbAF_linear_rgbAF_nonlinear (const Babl *conversion,
static void
conv_yAF_linear_yAF_nonlinear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -215,8 +215,8 @@ conv_yAF_linear_yAF_nonlinear (const Babl *conversion,
static void
conv_rgbAF_linear_rgbAF_perceptual (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
@@ -249,8 +249,8 @@ conv_rgbAF_linear_rgbAF_perceptual (const Babl *conversion,
static void
conv_yaF_linear_yaF_nonlinear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -258,19 +258,13 @@ conv_yaF_linear_yaF_nonlinear (const Babl *conversion,
float *fsrc = (float *) src;
float *fdst = (float *) dst;
- int n = samples;
-
- while (n--)
- {
- *fdst++ = babl_trc_from_linear (trc[0], *fsrc++);
- *fdst++ = *fsrc++;
- }
+ babl_trc_from_linear_buf (trc[0], fsrc, fdst, 2, 2, 1, samples);
}
static void
conv_rgbaF_linear_rgbaF_nonlinear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -291,46 +285,33 @@ conv_rgbaF_linear_rgbaF_nonlinear (const Babl *conversion,
static void
conv_rgbaF_linear_rgbaF_perceptual (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *fsrc = (float *) src;
float *fdst = (float *) dst;
- int n = samples;
-
- while (n--)
- {
- *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++);
- *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++);
- *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++);
- *fdst++ = *fsrc++;
- }
+ babl_trc_from_linear_buf (trc_srgb, fsrc, fdst, 4, 4, 3, samples);
}
static void
conv_yF_linear_yF_nonlinear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
const Babl **trc = (void*)space->space.trc;
float *fsrc = (float *) src;
float *fdst = (float *) dst;
- int n = samples;
-
- while (n--)
- {
- *fdst++ = babl_trc_from_linear (trc[0], *fsrc++);
- }
+ babl_trc_from_linear_buf (trc[0], fsrc, fdst, 1, 1, 1, samples);
}
static void
conv_rgbF_linear_rgbF_nonlinear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -349,26 +330,19 @@ conv_rgbF_linear_rgbF_nonlinear (const Babl *conversion,
static void
conv_rgbF_linear_rgbF_perceptual (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *fsrc = (float *) src;
float *fdst = (float *) dst;
- int n = samples;
-
- while (n--)
- {
- *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++);
- *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++);
- *fdst++ = babl_trc_from_linear (trc_srgb, *fsrc++);
- }
+ babl_trc_from_linear_buf (trc_srgb, fsrc, fdst, 3, 3, 3, samples);
}
static void
conv_rgbaF_nonlinear_rgbaF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -389,48 +363,34 @@ conv_rgbaF_nonlinear_rgbaF_linear (const Babl *conversion,
static void
conv_yaF_nonlinear_yaF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
const Babl **trc = (void*)space->space.trc;
float *fsrc = (float *) src;
float *fdst = (float *) dst;
- int n = samples;
-
- while (n--)
- {
- *fdst++ = babl_trc_to_linear (trc[0], *fsrc++);
- *fdst++ = *fsrc++;
- }
+ babl_trc_to_linear_buf (trc[0], fsrc, fdst, 2, 2, 1, samples);
}
static void
conv_rgbaF_perceptual_rgbaF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *fsrc = (float *) src;
float *fdst = (float *) dst;
- int n = samples;
-
- while (n--)
- {
- *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++);
- *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++);
- *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++);
- *fdst++ = *fsrc++;
- }
+ babl_trc_to_linear_buf (trc_srgb, fsrc, fdst, 4, 4, 3, samples);
}
static void
conv_rgbF_nonlinear_rgbF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
@@ -450,38 +410,27 @@ conv_rgbF_nonlinear_rgbF_linear (const Babl *conversion,
static void
conv_yF_nonlinear_yF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_destination_space (conversion);
const Babl **trc = (void*)space->space.trc;
float *fsrc = (float *) src;
float *fdst = (float *) dst;
- int n = samples;
- while (n--)
- {
- *fdst++ = babl_trc_to_linear (trc[0], *fsrc++);
- }
+ babl_trc_to_linear_buf (trc[0], fsrc, fdst, 1, 1, 1, samples);
}
static void
conv_rgbF_perceptual_rgbF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *fsrc = (float *) src;
float *fdst = (float *) dst;
- int n = samples;
-
- while (n--)
- {
- *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++);
- *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++);
- *fdst++ = babl_trc_to_linear (trc_srgb, *fsrc++);
- }
+ babl_trc_to_linear_buf (trc_srgb, fsrc, fdst, 3, 3, 3, samples);
}
@@ -489,10 +438,13 @@ conv_rgbF_perceptual_rgbF_linear (const Babl *conversion,
babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL)
int init (void);
+#include "babl-verify-cpu.inc"
int
init (void)
{
+ BABL_VERIFY_CPU();
+ {
const Babl *yaF_linear = babl_format_new (
babl_model ("YA"),
babl_type ("float"),
@@ -621,7 +573,7 @@ init (void)
o (rgbaF_perceptual, rgbaF_linear);
o (rgbF_linear, rgbF_perceptual);
o (rgbF_perceptual, rgbF_linear);
-
+ }
return 0;
}
diff --git a/extensions/gegl-fixups.c b/extensions/gegl-fixups.c
index 45888ce65..881b78540 100644
--- a/extensions/gegl-fixups.c
+++ b/extensions/gegl-fixups.c
@@ -524,10 +524,13 @@ conv_rgba8_rgb8 (const Babl *conversion,
#define conv_gamma_rgbAF_gamma_rgbaF conv_rgbAF_rgbaF
int init (void);
+#include "babl-verify-cpu.inc"
int
init (void)
{
+ BABL_VERIFY_CPU();
+ {
const Babl *rgbaF = babl_format_new (
babl_model ("RGBA"),
babl_type ("float"),
@@ -621,5 +624,6 @@ init (void)
o (rgba8, rgb8);
o (ga8, rgbaF);
+ }
return 0;
}
diff --git a/extensions/gggl-lies.c b/extensions/gggl-lies.c
index 09c4a90f3..4a2e9cd6d 100644
--- a/extensions/gggl-lies.c
+++ b/extensions/gggl-lies.c
@@ -50,8 +50,8 @@
static void
conv_F_8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -78,8 +78,8 @@ conv_F_8 (const Babl *conversion,
static void
conv_F_16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -108,8 +108,8 @@ conv_F_16 (const Babl *conversion,
static void
conv_8_F (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -124,8 +124,8 @@ conv_8_F (const Babl *conversion,
static void
conv_16_F (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -140,8 +140,8 @@ conv_16_F (const Babl *conversion,
static void
conv_F_D (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -156,8 +156,8 @@ conv_F_D (const Babl *conversion,
static void
conv_D_F (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -172,8 +172,8 @@ conv_D_F (const Babl *conversion,
static void
conv_16_8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -200,8 +200,8 @@ conv_16_8 (const Babl *conversion,
static void
conv_8_16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -217,8 +217,8 @@ conv_8_16 (const Babl *conversion,
/*********/
static void
conv_rgbaF_rgba8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_8 (conversion, src, dst, samples * 4);
@@ -228,8 +228,8 @@ conv_rgbaF_rgba8 (const Babl *conversion,
static void
conv_rgbF_rgb8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_8 (conversion, src, dst, samples * 3);
@@ -237,8 +237,8 @@ conv_rgbF_rgb8 (const Babl *conversion,
static void
conv_gaF_ga8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_8 (conversion, src, dst, samples * 2);
@@ -250,8 +250,8 @@ conv_gaF_ga8 (const Babl *conversion,
static void
conv_rgbaF_rgba16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_16 (conversion, src, dst, samples * 4);
@@ -259,8 +259,8 @@ conv_rgbaF_rgba16 (const Babl *conversion,
static void
conv_rgbaF_rgbaD (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_D (conversion, src, dst, samples * 4);
@@ -268,8 +268,8 @@ conv_rgbaF_rgbaD (const Babl *conversion,
static void
conv_rgbaD_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_D_F (conversion, src, dst, samples * 4);
@@ -277,8 +277,8 @@ conv_rgbaD_rgbaF (const Babl *conversion,
static void
conv_rgbF_rgb16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_16 (conversion, src, dst, samples * 3);
@@ -286,8 +286,8 @@ conv_rgbF_rgb16 (const Babl *conversion,
static void
conv_gaF_ga16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_16 (conversion, src, dst, samples * 2);
@@ -301,8 +301,8 @@ conv_gaF_ga16 (const Babl *conversion,
static void
conv_rgba8_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_F (conversion, src, dst, samples * 4);
@@ -310,8 +310,8 @@ conv_rgba8_rgbaF (const Babl *conversion,
static void
conv_rgb8_rgbF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_F (conversion, src, dst, samples * 3);
@@ -319,8 +319,8 @@ conv_rgb8_rgbF (const Babl *conversion,
static void
conv_ga8_gaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_F (conversion, src, dst, samples * 2);
@@ -332,8 +332,8 @@ conv_ga8_gaF (const Babl *conversion,
static void
conv_rgba16_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_F (conversion, src, dst, samples * 4);
@@ -341,8 +341,8 @@ conv_rgba16_rgbaF (const Babl *conversion,
static void
conv_rgb16_rgbF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_F (conversion, src, dst, samples * 3);
@@ -350,8 +350,8 @@ conv_rgb16_rgbF (const Babl *conversion,
static void
conv_ga16_gaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_F (conversion, src, dst, samples * 2);
@@ -363,8 +363,8 @@ conv_ga16_gaF (const Babl *conversion,
static void
conv_rgba16_rgba8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_8 (conversion, src, dst, samples * 4);
@@ -372,8 +372,8 @@ conv_rgba16_rgba8 (const Babl *conversion,
static void
conv_rgb16_rgb8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_8 (conversion, src, dst, samples * 3);
@@ -381,8 +381,8 @@ conv_rgb16_rgb8 (const Babl *conversion,
static void
conv_ga16_ga8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_8 (conversion, src, dst, samples * 2);
@@ -394,8 +394,8 @@ conv_ga16_ga8 (const Babl *conversion,
static void
conv_rgba8_rgba16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_16 (conversion, src, dst, samples * 4);
@@ -403,8 +403,8 @@ conv_rgba8_rgba16 (const Babl *conversion,
static void
conv_rgb8_rgb16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_16 (conversion, src, dst, samples * 3);
@@ -412,8 +412,8 @@ conv_rgb8_rgb16 (const Babl *conversion,
static void
conv_ga8_ga16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_16 (conversion, src, dst, samples * 2);
@@ -427,8 +427,8 @@ conv_ga8_ga16 (const Babl *conversion,
static void
conv_gaF_gAF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -448,8 +448,8 @@ conv_gaF_gAF (const Babl *conversion,
static void
conv_gAF_gaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -474,8 +474,8 @@ conv_gAF_gaF (const Babl *conversion,
static void
conv_rgbaF_rgbF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -498,8 +498,8 @@ conv_rgbaF_rgbF (const Babl *conversion,
static void
conv_gF_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -522,8 +522,8 @@ conv_gF_rgbaF (const Babl *conversion,
static void
conv_rgbF_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -546,8 +546,8 @@ conv_rgbF_rgbaF (const Babl *conversion,
static void
conv_gaF_gF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -563,8 +563,8 @@ conv_gaF_gF (const Babl *conversion,
static void
conv_gF_gaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -587,8 +587,8 @@ conv_gF_gaF (const Babl *conversion,
static void
conv_gF_rgbF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -608,8 +608,8 @@ conv_gF_rgbF (const Babl *conversion,
static void
conv_gaF_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -636,8 +636,8 @@ conv_gaF_rgbaF (const Babl *conversion,
static void
conv_rgbaF_rgbA8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -661,8 +661,8 @@ conv_rgbaF_rgbA8 (const Babl *conversion,
static void
conv_rgbaF_rgb8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -683,8 +683,8 @@ conv_rgbaF_rgb8 (const Babl *conversion,
static void
conv_rgbaF_rgb16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -705,8 +705,8 @@ conv_rgbaF_rgb16 (const Babl *conversion,
static void
conv_rgba8_rgbA8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -736,8 +736,8 @@ conv_rgba8_rgbA8 (const Babl *conversion,
static void
conv_rgbA8_rgba8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -768,8 +768,8 @@ conv_rgbA8_rgba8 (const Babl *conversion,
static void
conv_rgb8_rgba8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -790,8 +790,8 @@ conv_rgb8_rgba8 (const Babl *conversion,
static void
conv_rgba8_rgb8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -807,10 +807,13 @@ conv_rgba8_rgb8 (const Babl *conversion,
}
int init (void);
+#include "babl-verify-cpu.inc"
int
init (void)
{
+ BABL_VERIFY_CPU();
+ {
const Babl *rgbaF = babl_format_new (
babl_model ("RGBA"),
babl_type ("float"),
@@ -1012,6 +1015,6 @@ init (void)
o (rgb8, rgbA8);
o (rgba8, rgb8);
o (rgbaF, rgbA8);
-
+ }
return 0;
}
diff --git a/extensions/gggl.c b/extensions/gggl.c
index 34068f132..06843ea21 100644
--- a/extensions/gggl.c
+++ b/extensions/gggl.c
@@ -30,6 +30,7 @@
#include "babl.h"
#include "extensions/util.h"
+#include "babl-verify-cpu.inc"
/*
* Implemented according to information read from:
@@ -51,8 +52,8 @@
static void
conv_F_8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -73,8 +74,8 @@ conv_F_8 (const Babl *conversion,
static void
conv_F_16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -101,8 +102,8 @@ conv_F_16 (const Babl *conversion,
static void
conv_8_F (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -117,8 +118,8 @@ conv_8_F (const Babl *conversion,
static void
conv_16_F (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -133,8 +134,8 @@ conv_16_F (const Babl *conversion,
static void
conv_rgbaF_rgb8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -161,8 +162,8 @@ conv_rgbaF_rgb8 (const Babl *conversion,
static void
conv_F_D (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -177,8 +178,8 @@ conv_F_D (const Babl *conversion,
static void
conv_D_F (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -193,8 +194,8 @@ conv_D_F (const Babl *conversion,
static void
conv_16_8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -221,8 +222,8 @@ conv_16_8 (const Babl *conversion,
static inline void
conv_8_16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -238,8 +239,8 @@ conv_8_16 (const Babl *conversion,
/*********/
static void
conv_rgbaF_rgba8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_8 (conversion, src, dst, samples * 4);
@@ -247,8 +248,8 @@ conv_rgbaF_rgba8 (const Babl *conversion,
static void
conv_rgbF_rgb8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_8 (conversion, src, dst, samples * 3);
@@ -256,8 +257,8 @@ conv_rgbF_rgb8 (const Babl *conversion,
static void
conv_gaF_ga8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_8 (conversion, src, dst, samples * 2);
@@ -269,8 +270,8 @@ conv_gaF_ga8 (const Babl *conversion,
static void
conv_rgbaF_rgba16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_16 (conversion, src, dst, samples * 4);
@@ -278,8 +279,8 @@ conv_rgbaF_rgba16 (const Babl *conversion,
static void
conv_rgbF_rgb16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_16 (conversion, src, dst, samples * 3);
@@ -287,8 +288,8 @@ conv_rgbF_rgb16 (const Babl *conversion,
static void
conv_gaF_ga16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_16 (conversion, src, dst, samples * 2);
@@ -300,8 +301,8 @@ conv_gaF_ga16 (const Babl *conversion,
static void
conv_rgba8_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_F (conversion, src, dst, samples * 4);
@@ -310,8 +311,8 @@ conv_rgba8_rgbaF (const Babl *conversion,
static void
conv_rgb8_rgbF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_F (conversion, src, dst, samples * 3);
@@ -319,8 +320,8 @@ conv_rgb8_rgbF (const Babl *conversion,
static void
conv_ga8_gaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_F (conversion, src, dst, samples * 2);
@@ -332,8 +333,8 @@ conv_ga8_gaF (const Babl *conversion,
static void
conv_rgbaF_rgbaD (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_F_D (conversion, src, dst, samples * 4);
@@ -341,8 +342,8 @@ conv_rgbaF_rgbaD (const Babl *conversion,
static void
conv_rgbaD_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_D_F (conversion, src, dst, samples * 4);
@@ -350,8 +351,8 @@ conv_rgbaD_rgbaF (const Babl *conversion,
static void
conv_rgba16_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_F (conversion, src, dst, samples * 4);
@@ -359,8 +360,8 @@ conv_rgba16_rgbaF (const Babl *conversion,
static void
conv_rgb16_rgbF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_F (conversion, src, dst, samples * 3);
@@ -368,8 +369,8 @@ conv_rgb16_rgbF (const Babl *conversion,
static void
conv_ga16_gaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_F (conversion, src, dst, samples * 2);
@@ -381,8 +382,8 @@ conv_ga16_gaF (const Babl *conversion,
static void
conv_rgba16_rgba8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_8 (conversion, src, dst, samples * 4);
@@ -390,8 +391,8 @@ conv_rgba16_rgba8 (const Babl *conversion,
static void
conv_rgb16_rgb8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_8 (conversion, src, dst, samples * 3);
@@ -399,8 +400,8 @@ conv_rgb16_rgb8 (const Babl *conversion,
static void
conv_ga16_ga8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_16_8 (conversion, src, dst, samples * 2);
@@ -412,8 +413,8 @@ conv_ga16_ga8 (const Babl *conversion,
static void
conv_rgba8_rgba16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_16 (conversion, src, dst, samples * 4);
@@ -421,8 +422,8 @@ conv_rgba8_rgba16 (const Babl *conversion,
static void
conv_rgb8_rgb16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_16 (conversion, src, dst, samples * 3);
@@ -430,8 +431,8 @@ conv_rgb8_rgb16 (const Babl *conversion,
static void
conv_ga8_ga16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
conv_8_16 (conversion, src, dst, samples * 2);
@@ -445,8 +446,8 @@ conv_ga8_ga16 (const Babl *conversion,
static void
conv_gaF_gAF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -466,8 +467,8 @@ conv_gaF_gAF (const Babl *conversion,
static void
conv_gAF_gaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -492,8 +493,8 @@ conv_gAF_gaF (const Babl *conversion,
static void
conv_rgbaF_rgbF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -515,8 +516,8 @@ conv_rgbaF_rgbF (const Babl *conversion,
static void
conv_rgbF_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -537,8 +538,8 @@ conv_rgbF_rgbaF (const Babl *conversion,
static void
conv_gaF_gF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -554,8 +555,8 @@ conv_gaF_gF (const Babl *conversion,
static void
conv_gF_gaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -578,8 +579,8 @@ conv_gF_gaF (const Babl *conversion,
static void
conv_gF_rgbF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -599,8 +600,8 @@ conv_gF_rgbF (const Babl *conversion,
static void
conv_g8_rgb8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -617,8 +618,8 @@ conv_g8_rgb8 (const Babl *conversion,
#define conv_g8_rgbA8 conv_g8_rgba8
static void
conv_g8_rgba8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -636,8 +637,8 @@ conv_g8_rgba8 (const Babl *conversion,
static void
conv_gaF_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -665,8 +666,8 @@ conv_gaF_rgbaF (const Babl *conversion,
static void
conv_rgbaF_rgbA8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -690,8 +691,8 @@ conv_rgbaF_rgbA8 (const Babl *conversion,
static void
conv_rgbaF_rgb16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -717,8 +718,8 @@ conv_rgbaF_rgb16 (const Babl *conversion,
static void
conv_rgbA16_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -748,8 +749,8 @@ conv_rgbA16_rgbaF (const Babl *conversion,
static void
conv_gF_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -772,8 +773,8 @@ conv_gF_rgbaF (const Babl *conversion,
/*
static void
- conv_rgb8_rgbaF (unsigned char *src,
- unsigned char *dst,
+ conv_rgb8_rgbaF (unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
int samples)
{
long n=samples;
@@ -791,8 +792,8 @@ conv_gF_rgbaF (const Babl *conversion,
}
static void
- conv_g8_rgbaF (unsigned char *src,
- unsigned char *dst,
+ conv_g8_rgbaF (unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
int samples)
{
long n=samples;
@@ -810,8 +811,8 @@ conv_gF_rgbaF (const Babl *conversion,
}
static void
- conv_rgb16_rgbaF (unsigned char *src,
- unsigned char *dst,
+ conv_rgb16_rgbaF (unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
int samples)
{
long n=samples;
@@ -830,8 +831,8 @@ conv_gF_rgbaF (const Babl *conversion,
}
static void
- conv_gF_rgbaF (unsigned char *src,
- unsigned char *dst,
+ conv_gF_rgbaF (unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
int samples)
{
long n=samples;
@@ -851,8 +852,8 @@ conv_gF_rgbaF (const Babl *conversion,
*/
static void
conv_rgba8_rgbA8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -882,8 +883,8 @@ conv_rgba8_rgbA8 (const Babl *conversion,
static void
conv_rgbA8_rgba8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -917,8 +918,8 @@ conv_rgbA8_rgba8 (const Babl *conversion,
static void
conv_rgb8_rgba8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples-1;
@@ -938,8 +939,8 @@ conv_rgb8_rgba8 (const Babl *conversion,
static void
conv_rgba8_rgb8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -956,8 +957,8 @@ conv_rgba8_rgb8 (const Babl *conversion,
static void
conv_rgbA8_rgb8 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
long n = samples;
@@ -1014,8 +1015,8 @@ conv_rgbA8_rgb8 (const Babl *conversion,
static void
conv_yuvaF_rgbaF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *src_f = (float *) src;
@@ -1048,8 +1049,8 @@ conv_yuvaF_rgbaF (const Babl *conversion,
static void
conv_yuvF_rgbF (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *src_f = (float *) src;
@@ -1083,6 +1084,8 @@ int init (void);
int
init (void)
{
+ BABL_VERIFY_CPU();
+ {
const Babl *rgbaD = babl_format_new (
babl_model ("R'G'B'A"),
babl_type ("double"),
@@ -1298,5 +1301,6 @@ init (void)
o (rgbaF, rgbaD);
o (rgbaD, rgbaF);
+ }
return 0;
}
diff --git a/extensions/grey.c b/extensions/grey.c
index 3f208426c..416e3dccf 100644
--- a/extensions/grey.c
+++ b/extensions/grey.c
@@ -26,8 +26,8 @@
static void
conv_rgbaF_linear_y8_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_source_space (conversion);
@@ -55,8 +55,8 @@ conv_rgbaF_linear_y8_linear (const Babl *conversion,
static void
conv_rgbaF_linear_yF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_source_space (conversion);
@@ -82,8 +82,8 @@ conv_rgbaF_linear_yF_linear (const Babl *conversion,
static void
conv_rgbaF_linear_yaF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
const Babl *space = babl_conversion_get_source_space (conversion);
@@ -109,8 +109,8 @@ conv_rgbaF_linear_yaF_linear (const Babl *conversion,
static void
conv_yaF_linear_rgbaF_linear (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
{
float *s = (float *) src;
@@ -130,10 +130,13 @@ conv_yaF_linear_rgbaF_linear (const Babl *conversion,
int init (void);
+#include "babl-verify-cpu.inc"
int
init (void)
{
+ BABL_VERIFY_CPU();
+ {
babl_conversion_new (babl_format ("RGBA float"),
babl_format ("Y u8"),
"linear",
@@ -171,5 +174,6 @@ init (void)
conv_yaF_linear_rgbaF_linear,
NULL);
+ }
return 0;
}
diff --git a/extensions/half.c b/extensions/half.c
index f308e03b5..789caf660 100644
--- a/extensions/half.c
+++ b/extensions/half.c
@@ -412,10 +412,13 @@ conv2_rgbaF_rgbaHalf (const Babl *conversion,
#define conv_yAHalf_yAF conv_yaHalf_yaF
int init (void);
+#include "babl-verify-cpu.inc"
int
init (void)
{
+ BABL_VERIFY_CPU();
+ {
int i;
const Babl *rgbaF_linear = babl_format_new (
babl_model ("RGBA"),
@@ -617,5 +620,6 @@ init (void)
CONV2(yaF, yaHalf);
CONV2(yF, yHalf);
+ }
return 0;
}
diff --git a/extensions/meson.build b/extensions/meson.build
index df75a2369..fe7dcc020 100644
--- a/extensions/meson.build
+++ b/extensions/meson.build
@@ -21,6 +21,20 @@ if platform_win32
babl_ext_link_args += no_undefined
endif
+autosimd_extensions = [
+ ['u16', no_cflags],
+ ['u32', no_cflags],
+ ['cairo', no_cflags],
+ ['grey', no_cflags],
+ ['gggl', no_cflags],
+ ['gggl-lies', no_cflags],
+ ['gegl-fixups', no_cflags],
+ ['CIE', sse2_cflags],
+ ['float', no_cflags],
+ ['double', no_cflags],
+ ['simple', no_cflags],
+ ['ycbcr', no_cflags],
+]
extensions = [
['u16', no_cflags],
@@ -68,3 +82,37 @@ foreach ext : extensions
install_dir: babl_libdir / lib_name,
)
endforeach
+
+if host_cpu_family == 'x86_64'
+
+ foreach ext : autosimd_extensions
+ shared_library(
+ 'x86-64-v2-' + ext[0],
+ ext[0] + '.c',
+ c_args: [ext[1]] + x86_64_v2_flags,
+ include_directories: babl_ext_inc,
+ link_with: babl,
+ link_args: babl_ext_link_args,
+ dependencies: babl_ext_dep,
+ name_prefix: '',
+ install: true,
+ install_dir: babl_libdir / lib_name,
+ )
+ endforeach
+
+ foreach ext : autosimd_extensions
+ shared_library(
+ 'x86-64-v3-' + ext[0],
+ ext[0] + '.c',
+ c_args: [ext[1]] + x86_64_v3_flags,
+ include_directories: babl_ext_inc,
+ link_with: babl,
+ link_args: babl_ext_link_args,
+ dependencies: babl_ext_dep,
+ name_prefix: '',
+ install: true,
+ install_dir: babl_libdir / lib_name,
+ )
+ endforeach
+
+endif
diff --git a/extensions/simple.c b/extensions/simple.c
index 627247aed..de5999c3f 100644
--- a/extensions/simple.c
+++ b/extensions/simple.c
@@ -7,8 +7,8 @@ int init (void);
static inline void
float_to_u8_x1 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst,
long samples)
{
float *src = (float *)src_char;
@@ -24,8 +24,8 @@ float_to_u8_x1 (const Babl *conversion,
static inline void
float_to_u8_x4 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst,
long samples)
{
float_to_u8_x1 (conversion, src_char, dst, samples * 4);
@@ -33,8 +33,8 @@ float_to_u8_x4 (const Babl *conversion,
static inline void
float_to_u8_x3 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst,
long samples)
{
float_to_u8_x1 (conversion, src_char, dst, samples * 3);
@@ -42,8 +42,8 @@ float_to_u8_x3 (const Babl *conversion,
static inline void
float_to_u8_x2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst,
long samples)
{
float_to_u8_x1 (conversion, src_char, dst, samples * 2);
@@ -53,8 +53,8 @@ float_to_u8_x2 (const Babl *conversion,
static inline void
float_pre_to_u8_pre (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst,
long samples)
{
float *src = (float *)src_char;
@@ -86,8 +86,8 @@ float_pre_to_u8_pre (const Babl *conversion,
static inline void
float_to_u16_x1 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
float *src = (float *)src_char;
@@ -103,24 +103,24 @@ float_to_u16_x1 (const Babl *conversion,
}
static inline void
float_to_u16_x2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
float_to_u16_x1 (conversion, src_char, dst_char, samples * 2);
}
static inline void
float_to_u16_x3 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
float_to_u16_x1 (conversion, src_char, dst_char, samples * 3);
}
static inline void
float_to_u16_x4 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
float_to_u16_x1 (conversion, src_char, dst_char, samples * 4);
@@ -128,8 +128,8 @@ float_to_u16_x4 (const Babl *conversion,
static inline void
float_pre_to_u16_pre (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
float *src = (float *)src_char;
@@ -154,8 +154,8 @@ float_pre_to_u16_pre (const Babl *conversion,
static inline void
float_pre_to_u32_pre (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
float *src = (float *)src_char;
@@ -181,8 +181,8 @@ float_pre_to_u32_pre (const Babl *conversion,
static inline void
float_to_u32_x1 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
float *src = (float *)src_char;
@@ -200,24 +200,24 @@ float_to_u32_x1 (const Babl *conversion,
}
static void
float_to_u32_x2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
float_to_u32_x1 (conversion, src_char, dst_char, samples * 2);
}
static void
float_to_u32_x3 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
float_to_u32_x1 (conversion, src_char, dst_char, samples * 3);
}
static void
float_to_u32_x4 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
float_to_u32_x1 (conversion, src_char, dst_char, samples * 4);
@@ -226,8 +226,8 @@ float_to_u32_x4 (const Babl *conversion,
static inline void
u32_to_float (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
uint32_t *src = (uint32_t *)src_char;
@@ -243,8 +243,8 @@ u32_to_float (const Babl *conversion,
static void
u32_to_float_x4 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
u32_to_float (conversion, src_char, dst_char, samples * 4);
@@ -252,8 +252,8 @@ u32_to_float_x4 (const Babl *conversion,
static void
u32_to_float_x3 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
u32_to_float (conversion, src_char, dst_char, samples * 3);
@@ -262,8 +262,8 @@ u32_to_float_x3 (const Babl *conversion,
static void
u32_to_float_x2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
u32_to_float (conversion, src_char, dst_char, samples * 2);
@@ -272,8 +272,8 @@ u32_to_float_x2 (const Babl *conversion,
static inline void
u16_to_float (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
uint16_t *src = (uint16_t *)src_char;
@@ -289,8 +289,8 @@ u16_to_float (const Babl *conversion,
static void
u16_to_float_x4 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
u16_to_float (conversion, src_char, dst_char, samples * 4);
@@ -298,8 +298,8 @@ u16_to_float_x4 (const Babl *conversion,
static void
u16_to_float_x3 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
u16_to_float (conversion, src_char, dst_char, samples * 3);
@@ -308,8 +308,8 @@ u16_to_float_x3 (const Babl *conversion,
static void
u16_to_float_x2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
u16_to_float (conversion, src_char, dst_char, samples * 2);
@@ -317,8 +317,8 @@ u16_to_float_x2 (const Babl *conversion,
static inline void
yau16_rgbaf (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
uint16_t *src = (uint16_t *)src_char;
@@ -338,8 +338,8 @@ yau16_rgbaf (const Babl *conversion,
static inline void
u8_to_float (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
uint8_t *src = (uint8_t *)src_char;
@@ -355,8 +355,8 @@ u8_to_float (const Babl *conversion,
static void
u8_to_float_x4 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
u8_to_float (conversion, src_char, dst_char, samples * 4);
@@ -364,8 +364,8 @@ u8_to_float_x4 (const Babl *conversion,
static void
u8_to_float_x3 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
u8_to_float (conversion, src_char, dst_char, samples * 3);
@@ -374,8 +374,8 @@ u8_to_float_x3 (const Babl *conversion,
static void
u8_to_float_x2 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
u8_to_float (conversion, src_char, dst_char, samples * 2);
@@ -383,8 +383,8 @@ u8_to_float_x2 (const Babl *conversion,
static inline void
yau8_rgbaf (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
uint8_t *src = (uint8_t *)src_char;
@@ -404,8 +404,8 @@ yau8_rgbaf (const Babl *conversion,
static inline void
yu8_yau8 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
uint8_t *src = (uint8_t *)src_char;
@@ -423,8 +423,8 @@ yu8_yau8 (const Babl *conversion,
static inline void
yau8_yu8 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
uint8_t *src = (uint8_t *)src_char;
@@ -442,8 +442,8 @@ yau8_yu8 (const Babl *conversion,
static inline void
yu16_yau16 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
uint16_t *src = (uint16_t *)src_char;
@@ -460,8 +460,8 @@ yu16_yau16 (const Babl *conversion,
static inline void
yau16_yu16 (const Babl *conversion,
- unsigned char *src_char,
- unsigned char *dst_char,
+ unsigned char *__restrict__ src_char,
+ unsigned char *__restrict__ dst_char,
long samples)
{
uint16_t *src = (uint16_t *)src_char;
@@ -476,9 +476,11 @@ yau16_yu16 (const Babl *conversion,
}
+#include "babl-verify-cpu.inc"
int
init (void)
{
+ BABL_VERIFY_CPU();
/* float and u8 */
babl_conversion_new (babl_format ("R'G'B'A float"),
babl_format ("R'G'B'A u8"),
diff --git a/extensions/sse-half.c b/extensions/sse-half.c
index cee397555..653d68ffd 100644
--- a/extensions/sse-half.c
+++ b/extensions/sse-half.c
@@ -313,7 +313,6 @@ init (void)
}
#endif /* defined(USE_SSE4_1) && defined(USE_F16C) && defined(ARCH_X86_64) */
-
return 0;
}
diff --git a/extensions/sse2-int8.c b/extensions/sse2-int8.c
index 6da1b5b7e..e337fd17f 100644
--- a/extensions/sse2-int8.c
+++ b/extensions/sse2-int8.c
@@ -328,7 +328,6 @@ init (void)
}
#endif
-
return 0;
}
diff --git a/extensions/sse4-int8.c b/extensions/sse4-int8.c
index d505fe511..6c61bde6c 100644
--- a/extensions/sse4-int8.c
+++ b/extensions/sse4-int8.c
@@ -222,7 +222,6 @@ init (void)
}
#endif
-
return 0;
}
diff --git a/extensions/two-table.c b/extensions/two-table.c
index 8becfee70..05c4f64b3 100644
--- a/extensions/two-table.c
+++ b/extensions/two-table.c
@@ -249,6 +249,5 @@ init (void)
"linear",
conv_yafloat_linear_yau8_gamma,
NULL);
-
return 0;
}
diff --git a/extensions/u16.c b/extensions/u16.c
index 87d2907e2..0fe479ff6 100644
--- a/extensions/u16.c
+++ b/extensions/u16.c
@@ -27,8 +27,8 @@
static void
conv_rgbu16_rgbau16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
@@ -48,8 +48,8 @@ conv_rgbu16_rgbau16 (const Babl *conversion,
static void
conv_yu16_yau16 (const Babl *conversion,
- unsigned char *src,
- unsigned char *dst,
+ unsigned char *__restrict__ src,
+ unsigned char *__restrict__ dst,
long samples)
@@ -67,8 +67,11 @@ conv_yu16_yau16 (const Babl *conversion,
int init (void);
+#include "babl-verify-cpu.inc"
int
init (void)
+{
+ BABL_VERIFY_CPU();
{
babl_conversion_new (
babl_format ("R'G'B' u16"),
@@ -111,5 +114,6 @@ init (void)
"linear",
conv_yu16_yau16,
NULL);
+}
return 0;
}
diff --git a/extensions/u32.c b/extensions/u32.c
index f9d563d0b..02964ba52 100644
--- a/extensions/u32.c
+++ b/extensions/u32.c
@@ -190,9 +190,11 @@ conv_yu32_yau32 (const Babl *conversion,
int init (void);
+#include "babl-verify-cpu.inc"
int
init (void)
{
+ BABL_VERIFY_CPU();
babl_conversion_new (
babl_format ("R'G'B'A u32"),
babl_format ("R'G'B'A u16"),
diff --git a/extensions/ycbcr.c b/extensions/ycbcr.c
index fabc44d68..1e779d7d3 100644
--- a/extensions/ycbcr.c
+++ b/extensions/ycbcr.c
@@ -32,9 +32,11 @@ static void formats (void);
int init (void);
+#include "babl-verify-cpu.inc"
int
init (void)
{
+ BABL_VERIFY_CPU();
components ();
models ();
conversions ();
diff --git a/meson.build b/meson.build
index bfa5dcfe9..10997d5d7 100644
--- a/meson.build
+++ b/meson.build
@@ -95,7 +95,10 @@ elif host_cpu_family == 'ppc64'
conf.set10('ARCH_PPC64', true)
elif host_cpu_family == 'arm'
have_arm = true
- config.set10('ARCH_ARM', true)
+ conf.set10('ARCH_ARM', true)
+elif host_cpu_family == 'aarch64'
+ have_aarch64 = true
+ conf.set10('ARCH_AARCH64', true)
endif
@@ -189,10 +192,15 @@ endif
if host_cpu_family == 'x86_64'
x86_64_v2_flags = cc.get_supported_arguments(['-march=x86-64','-msse2',
'-msse2','-msse4.1','-msse4.2','-mpopcnt','-mssse3'])
x86_64_v3_flags = x86_64_v2_flags +
cc.get_supported_arguments(['-mavx','-mavx2','-mf16c','-mfma','-mmovbe', '-mbmi', '-mbmi2'])
+
+ x86_64_v2_flags += '-DX86_64_V2'
+ x86_64_v3_flags += '-DX86_64_V3'
+
elif host_cpu_family == 'arm'
- arm_neon_flags = cc.get_supported_arguments(['-mfpu=neon'])
+ arm_neon_flags = cc.get_supported_arguments(['-mfpu=neon-vfpv4'])
+ arm_neon_flags += '-DARM_NEON'
elif host_cpu_family == 'aarch64'
- common_c_flags += cc.get_supported_arguments(['-mfpu=neon'])
+ common_c_flags += cc.get_supported_arguments(['-mfpu=neon-fp-armv8'])
endif
################################################################################
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]