[babl] CIE: Use a faster cbrtf implementation

From: Debarshi Ray <debarshir src gnome org>
To: commits-list gnome org
Cc:
Subject: [babl] CIE: Use a faster cbrtf implementation
Date: Thu, 21 Dec 2017 22:36:31 +0000 (UTC)
commit c28478383c1a628d42866803d785e1f23baa33be
Author: Debarshi Ray <debarshir gnome org>
Date:   Thu Dec 21 10:14:53 2017 +0100

    CIE: Use a faster cbrtf implementation
    
    This is the approximate cube root of an IEEE float implementation from
    Hacker's Delight. The elimination of all conditional branches probably
    makes it a better candidate for future SIMD accelerated code paths.
    
    On an Intel i7 Haswell, it now takes 0.27s to convert a 15 megapixel
    buffer from "RGBA float" to "CIE Lab alpha float" instead of the
    earlier 0.35s. A "Y float" to "CIE L float" conversion takes 0.085s
    instead of 0.102s.
    
    Original code: http://www.hackersdelight.org/hdcodetxt/acbrt.c.txt
    Permissions: http://www.hackersdelight.org/permissions.htm
    
    https://bugzilla.gnome.org/show_bug.cgi?id=791837

 extensions/CIE.c |   61 ++++++++++++-----------------------------------------
 1 files changed, 14 insertions(+), 47 deletions(-)
---
diff --git a/extensions/CIE.c b/extensions/CIE.c
index bd9e836..b6fa513 100644
--- a/extensions/CIE.c
+++ b/extensions/CIE.c
@@ -565,61 +565,28 @@ lchaba_to_rgba (const Babl *conversion,char *src,
 
 /******** begin floating point RGB/CIE color space conversions ********/
 
-/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian cygnus com.
- * Debugged and optimized by Bruce D. Evans.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
+/* origin: http://www.hackersdelight.org/hdcodetxt/acbrt.c.txt
+ * permissions: http://www.hackersdelight.org/permissions.htm
  */
 /* _cbrtf(x)
  * Return cube root of x
  */
 
-#include <math.h>
 #include <stdint.h>
 
-static const unsigned
-B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
-B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
-
-static inline float _cbrtf(float x)
+static inline float
+_cbrtf (float x)
 {
-       float r,T;
-       union {float f; uint32_t i;} u = {x};
-       uint32_t hx = u.i & 0x7fffffff;
-
-       if (hx >= 0x7f800000)  /* cbrt(NaN,INF) is itself */
-               return x + x;
-
-       /* rough cbrt to 5 bits */
-       if (hx < 0x00800000) {  /* zero or subnormal? */
-               if (hx == 0)
-                       return x;  /* cbrt(+-0) is itself */
-               u.f = x*0x1p24f;
-               hx = u.i & 0x7fffffff;
-               hx = hx/3 + B2;
-       } else
-               hx = hx/3 + B1;
-       u.i &= 0x80000000;
-       u.i |= hx;
-
-       T = u.f;
-       r = T*T*T;
-       T = T*((float)x+x+r)/(x+r+r);
-
-       r = T*T*T;
-       T = T*((float)x+x+r)/(x+r+r);
-
-       return T;
+  union { float f; uint32_t i; } u = { x };
+
+  u.i = u.i / 4 + u.i / 16;
+  u.i = u.i + u.i / 16;
+  u.i = u.i + u.i / 256;
+  u.i = 0x2a5137a0 + u.i;
+  u.f = 0.33333333f * (2.0f * u.f + x / (u.f * u.f));
+  u.f = 0.33333333f * (2.0f * u.f + x / (u.f * u.f));
+
+  return u.f;
 }
 
 static inline float
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]