[babl] CIE: Use a faster cbrtf implementation
- From: Debarshi Ray <debarshir src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] CIE: Use a faster cbrtf implementation
- Date: Thu, 21 Dec 2017 22:36:31 +0000 (UTC)
commit c28478383c1a628d42866803d785e1f23baa33be
Author: Debarshi Ray <debarshir gnome org>
Date: Thu Dec 21 10:14:53 2017 +0100
CIE: Use a faster cbrtf implementation
This is the approximate cube root of an IEEE float implementation from
Hacker's Delight. The elimination of all conditional branches probably
makes it a better candidate for future SIMD accelerated code paths.
On an Intel i7 Haswell, it now takes 0.27s to convert a 15 megapixel
buffer from "RGBA float" to "CIE Lab alpha float" instead of the
earlier 0.35s. A "Y float" to "CIE L float" conversion takes 0.085s
instead of 0.102s.
Original code: http://www.hackersdelight.org/hdcodetxt/acbrt.c.txt
Permissions: http://www.hackersdelight.org/permissions.htm
https://bugzilla.gnome.org/show_bug.cgi?id=791837
extensions/CIE.c | 61 ++++++++++++-----------------------------------------
1 files changed, 14 insertions(+), 47 deletions(-)
---
diff --git a/extensions/CIE.c b/extensions/CIE.c
index bd9e836..b6fa513 100644
--- a/extensions/CIE.c
+++ b/extensions/CIE.c
@@ -565,61 +565,28 @@ lchaba_to_rgba (const Babl *conversion,char *src,
/******** begin floating point RGB/CIE color space conversions ********/
-/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian cygnus com.
- * Debugged and optimized by Bruce D. Evans.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
+/* origin: http://www.hackersdelight.org/hdcodetxt/acbrt.c.txt
+ * permissions: http://www.hackersdelight.org/permissions.htm
*/
/* _cbrtf(x)
* Return cube root of x
*/
-#include <math.h>
#include <stdint.h>
-static const unsigned
-B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
-B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
-
-static inline float _cbrtf(float x)
+static inline float
+_cbrtf (float x)
{
- float r,T;
- union {float f; uint32_t i;} u = {x};
- uint32_t hx = u.i & 0x7fffffff;
-
- if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */
- return x + x;
-
- /* rough cbrt to 5 bits */
- if (hx < 0x00800000) { /* zero or subnormal? */
- if (hx == 0)
- return x; /* cbrt(+-0) is itself */
- u.f = x*0x1p24f;
- hx = u.i & 0x7fffffff;
- hx = hx/3 + B2;
- } else
- hx = hx/3 + B1;
- u.i &= 0x80000000;
- u.i |= hx;
-
- T = u.f;
- r = T*T*T;
- T = T*((float)x+x+r)/(x+r+r);
-
- r = T*T*T;
- T = T*((float)x+x+r)/(x+r+r);
-
- return T;
+ union { float f; uint32_t i; } u = { x };
+
+ u.i = u.i / 4 + u.i / 16;
+ u.i = u.i + u.i / 16;
+ u.i = u.i + u.i / 256;
+ u.i = 0x2a5137a0 + u.i;
+ u.f = 0.33333333f * (2.0f * u.f + x / (u.f * u.f));
+ u.f = 0.33333333f * (2.0f * u.f + x / (u.f * u.f));
+
+ return u.f;
}
static inline float
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]