[gtk/update-ci-to-f34] Confine -mf16c to a single source file

From: Matthias Clasen <matthiasc src gnome org>
To: commits-list gnome org
Cc:
Subject: [gtk/update-ci-to-f34] Confine -mf16c to a single source file
Date: Wed, 5 May 2021 23:01:16 +0000 (UTC)
commit 930ff499ee25d8606c6ca0b00084dea3ac0d1fd9
Author: Matthias Clasen <mclasen redhat com>
Date:   Wed May 5 18:58:23 2021 -0400

    Confine -mf16c to a single source file
    
    We can't use this flag for any code that may get run
    outside the __builtin_cpu_supports() check, and meson
    doesn't allow per-file cflags. So we have to split this
    code off into its own static library.

 gsk/meson.build       | 17 ++++++++++++++++-
 gsk/ngl/fp16.c        | 39 +++++--------------------------------
 gsk/ngl/fp16i.c       | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++
 gsk/ngl/fp16private.h |  6 ++++++
 meson.build           |  3 ++-
 5 files changed, 82 insertions(+), 36 deletions(-)
---
diff --git a/gsk/meson.build b/gsk/meson.build
index f351941c22..7b82108286 100644
--- a/gsk/meson.build
+++ b/gsk/meson.build
@@ -88,6 +88,10 @@ gsk_private_sources = files([
   'ngl/fp16.c',
 ])
 
+gsk_f16c_sources = files([
+  'ngl/fp16i.c',
+])
+
 gsk_public_headers = files([
   'gskcairorenderer.h',
   'gskenums.h',
@@ -209,6 +213,17 @@ gsk_deps = [
   libgdk_dep,
 ]
 
+libgsk_f16c = static_library('gsk_f16c',
+  sources: gsk_f16c_sources,
+  dependencies: gsk_deps,
+  include_directories: [ confinc, ],
+  c_args: [
+    '-DGTK_COMPILATION',
+    '-DG_LOG_DOMAIN="Gsk"',
+    '-DG_LOG_STRUCTURED=1',
+  ] + common_cflags + f16c_cflags,
+)
+
 libgsk = static_library('gsk',
   sources: [
     gsk_public_sources,
@@ -223,7 +238,7 @@ libgsk = static_library('gsk',
     '-DG_LOG_DOMAIN="Gsk"',
     '-DG_LOG_STRUCTURED=1',
   ] + common_cflags,
-  link_with: libgdk,
+  link_with: [ libgdk, libgsk_f16c]
 )
 
 # We don't have link_with: to internal static libs here on purpose, just
diff --git a/gsk/ngl/fp16.c b/gsk/ngl/fp16.c
index 1e11faafd8..100d13e997 100644
--- a/gsk/ngl/fp16.c
+++ b/gsk/ngl/fp16.c
@@ -18,14 +18,10 @@
  * SPDX-License-Identifier: LGPL-2.1-or-later
  */
 
-#include <config.h>
+#include "config.h"
 
 #include "fp16private.h"
 
-#ifdef HAVE_F16C
-#include <immintrin.h>
-#endif
-
 static inline guint
 as_uint (const float x)
 {
@@ -80,33 +76,6 @@ half_to_float4_c (const guint16 h[4],
 
 #ifdef HAVE_F16C
 
-#if defined(_MSC_VER) && !defined(__clang__)
-#define CAST_M128I_P(a) (__m128i const *) a
-#else
-#define CAST_M128I_P(a) (__m128i_u const *) a
-#endif
-
-static void
-float_to_half4_f16c (const float f[4],
-                     guint16     h[4])
-{
-  __m128 s = _mm_loadu_ps (f);
-  __m128i i = _mm_cvtps_ph (s, 0);
-  _mm_storel_epi64 ((__m128i*)h, i);
-}
-
-static void
-half_to_float4_f16c (const guint16 h[4],
-                     float         f[4])
-{
-  __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h));
-  __m128 s = _mm_cvtph_ps (i);
-
-  _mm_store_ps (f, s);
-}
-
-#undef CAST_M128I_P
-
 #if defined(_MSC_VER) && !defined(__clang__)
 /* based on info from https://walbourn.github.io/directxmath-f16c-and-fma/ */
 static gboolean
@@ -154,6 +123,7 @@ half_to_float4 (const guint16 h[4], float f[4])
 }
 
 #else
+
 void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4")));
 void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4")));
 
@@ -176,9 +146,10 @@ resolve_half_to_float4 (void)
   else
     return half_to_float4_c;
 }
+
 #endif
 
-#else
+#else /* ! HAVE_F16C */
 
 #if defined(__APPLE__) || (defined(_MSC_VER) && !defined(__clang__))
 // turns out aliases don't work on Darwin nor Visual Studio
@@ -204,4 +175,4 @@ void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half
 
 #endif
 
-#endif  /* GTK_HAS_F16C */
+#endif  /* HAVE_F16C */
diff --git a/gsk/ngl/fp16i.c b/gsk/ngl/fp16i.c
new file mode 100644
index 0000000000..74c5827ff8
--- /dev/null
+++ b/gsk/ngl/fp16i.c
@@ -0,0 +1,53 @@
+/* fp16i.c
+ *
+ * Copyright 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include "config.h"
+
+#include "fp16private.h"
+
+#ifdef HAVE_F16C
+#include <immintrin.h>
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define CAST_M128I_P(a) (__m128i const *) a
+#else
+#define CAST_M128I_P(a) (__m128i_u const *) a
+#endif
+
+void
+float_to_half4_f16c (const float f[4],
+                     guint16     h[4])
+{
+  __m128 s = _mm_loadu_ps (f);
+  __m128i i = _mm_cvtps_ph (s, 0);
+  _mm_storel_epi64 ((__m128i*)h, i);
+}
+
+void
+half_to_float4_f16c (const guint16 h[4],
+                     float         f[4])
+{
+  __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h));
+  __m128 s = _mm_cvtph_ps (i);
+
+  _mm_store_ps (f, s);
+}
+
+#endif  /* HAVE_F16C */
diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h
index d76f18a04f..a2c53d6c2d 100644
--- a/gsk/ngl/fp16private.h
+++ b/gsk/ngl/fp16private.h
@@ -35,6 +35,12 @@ void float_to_half4 (const float f[4],
 void half_to_float4 (const guint16 h[4],
                      float         f[4]);
 
+void float_to_half4_f16c (const float f[4],
+                          guint16     h[4]);
+
+void half_to_float4_f16c (const guint16 h[4],
+                          float         f[4]);
+
 G_END_DECLS
 
 #endif
diff --git a/meson.build b/meson.build
index d8660cbe85..9ae976d40a 100644
--- a/meson.build
+++ b/meson.build
@@ -728,7 +728,8 @@ int main () {
   if cc.compiles(f16c_prog, args: test_f16c_cflags, name: 'F16C intrinsics')
     cdata.set('HAVE_F16C', 1)
     f16c_cflags = test_f16c_cflags
-    common_cflags += test_f16c_cflags
+  else
+    f16c_cflags = []
   endif
 endif
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]