[babl] extensions: make float-half extension use exact LUTs
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] extensions: make float-half extension use exact LUTs
- Date: Wed, 6 Sep 2017 22:42:53 +0000 (UTC)
commit b18f38d8129360973c360db8a53b48c56dc73408
Author: Øyvind Kolås <pippin gimp org>
Date: Thu Sep 7 00:29:59 2017 +0200
extensions: make float-half extension use exact LUTs
Both for conversion from half to float with a on load initialized table, and
with new tables an faster approach from qcms / mozilla / webkit.
extensions/float-half.c | 161 ++++++++++++++++++++++++++++++++---------------
1 files changed, 109 insertions(+), 52 deletions(-)
---
diff --git a/extensions/float-half.c b/extensions/float-half.c
index 08b7dfb..b471a02 100644
--- a/extensions/float-half.c
+++ b/extensions/float-half.c
@@ -75,7 +75,7 @@
#include "babl.h"
#include "extensions/util.h"
-static void halfp2singles(void *target, const void *source, long numel)
+static void halfp2singles_fun(void *target, const void *source, long numel)
{
uint16_t *hp = (uint16_t *) source; // Type pun input as an unsigned 16-bit int
uint32_t *xp = (uint32_t *) target; // Type pun output as an unsigned 32-bit int
@@ -122,59 +122,107 @@ static void halfp2singles(void *target, const void *source, long numel)
}
}
+static float half_float_table[65536];
+
+static void halfp2singles(void *target, const void *source, long numel)
+{
+ uint16_t *src = (uint16_t *) source;
+ float *dst = (float *) target;
+ int i;
+ for (i = 0; i < numel; i++)
+ {
+ dst[i] = half_float_table[src[i]];
+ }
+}
+
+/* from table based approach from qcms/blink/webkit */
+
+const unsigned short half_float_base_table[512] = {
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,1,2,4,8,16,32,64,128,256,
+512,1024,2048,3072,4096,5120,6144,7168,8192,9216,10240,11264,12288,13312,14336,15360,
+16384,17408,18432,19456,20480,21504,22528,23552,24576,25600,26624,27648,28672,29696,30720,31744,
+31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,
+31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,
+31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,
+31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,
+31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,
+31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,
+31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,31744,
+32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,
+32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,
+32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,
+32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,
+32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,
+32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,
+32768,32768,32768,32768,32768,32768,32768,32769,32770,32772,32776,32784,32800,32832,32896,33024,
+33280,33792,34816,35840,36864,37888,38912,39936,40960,41984,43008,44032,45056,46080,47104,48128,
+49152,50176,51200,52224,53248,54272,55296,56320,57344,58368,59392,60416,61440,62464,63488,64512,
+64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,
+64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,
+64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,
+64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,
+64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,
+64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,
+64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512,64512
+};
+
+const unsigned char half_float_shift_table[512] = {
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,23,22,21,20,19,18,17,16,15,
+14,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,13,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,23,22,21,20,19,18,17,16,15,
+14,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,
+24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,13
+};
+
+static inline unsigned short float_to_half_float(float f)
+{
+ // See Blink::Source/platform/graphics/gpu/WebGLImageConversion.cpp::convertFloatToHalfFloat() and
http://crbug.com/491784
+ union {
+ float f;
+ uint32_t u;
+ } u = {f};
+ unsigned temp = u.u;
+ unsigned signexp = (temp >> 23) & 0x1ff;
+ return half_float_base_table[signexp] + ((temp & 0x007fffff) >> half_float_shift_table[signexp]);
+}
+
static void singles2halfp(void *target, const void *source, long numel)
{
- uint16_t *hp = (uint16_t *) target; // Type pun output as an unsigned 16-bit int
- uint32_t *xp = (uint32_t *) source; // Type pun input as an unsigned 32-bit int
- uint16_t hs, he, hm;
- uint32_t x, xs, xe, xm;
- int hes;
-
- if( source == NULL || target == NULL ) { // Nothing to convert (e.g., imag part of pure real)
- return;
- }
- while( numel-- ) {
- x = *xp++;
- if( (x & 0x7FFFFFFFu) == 0 ) { // Signed zero
- *hp++ = (uint16_t) (x >> 16); // Return the signed zero
- } else { // Not zero
- xs = x & 0x80000000u; // Pick off sign bit
- xe = x & 0x7F800000u; // Pick off exponent bits
- xm = x & 0x007FFFFFu; // Pick off mantissa bits
- if( xe == 0 ) { // Denormal will underflow, return a signed zero
- *hp++ = (uint16_t) (xs >> 16);
- } else if( xe == 0x7F800000u ) { // Inf or NaN (all the exponent bits are set)
- if( xm == 0 ) { // If mantissa is zero ...
- *hp++ = (uint16_t) ((xs >> 16) | 0x7C00u); // Signed Inf
- } else {
- *hp++ = (uint16_t) 0xFE00u; // NaN, only 1st mantissa bit set
- }
- } else { // Normalized number
- hs = (uint16_t) (xs >> 16); // Sign bit
- hes = ((int)(xe >> 23)) - 127 + 15; // Exponent unbias the single, then bias the halfp
- if( hes >= 0x1F ) { // Overflow
- *hp++ = (uint16_t) ((xs >> 16) | 0x7C00u); // Signed Inf
- } else if( hes <= 0 ) { // Underflow
- if( (14 - hes) > 24 ) { // Mantissa shifted all the way off & no rounding possibility
- hm = (uint16_t) 0u; // Set mantissa to zero
- } else {
- xm |= 0x00800000u; // Add the hidden leading bit
- hm = (uint16_t) (xm >> (14 - hes)); // Mantissa
- if( (xm >> (13 - hes)) & 0x00000001u ) // Check for rounding
- hm += (uint16_t) 1u; // Round, might overflow into exp bit, but this is OK
- }
- *hp++ = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero
- } else {
- he = (uint16_t) (hes << 10); // Exponent
- hm = (uint16_t) (xm >> 13); // Mantissa
- if( xm & 0x00001000u ) // Check for rounding
- *hp++ = (hs | he | hm) + (uint16_t) 1u; // Round, might overflow to inf, this is OK
- else
- *hp++ = (hs | he | hm); // No rounding
- }
- }
- }
- }
+ const float *src = source;
+ uint8_t *dst = target;
+ int i;
+ for (i = 0; i < numel; i++)
+ dst[i] = float_to_half_float (src[i]);
}
static inline long
@@ -232,6 +280,7 @@ int init (void);
int
init (void)
{
+ int i;
const Babl *rgbaF_linear = babl_format_new (
babl_model ("RGBA"),
babl_type ("float"),
@@ -337,6 +386,14 @@ init (void)
babl_component ("Y'"),
NULL);
+ for (i = 0; i < 65536; i++)
+ {
+ uint16_t buf[2] = {i, i};
+ float fbuf[2];
+ halfp2singles_fun(fbuf, buf, 1);
+ half_float_table[i] = fbuf[0];
+ }
+
#define CONV(src, dst) \
{ \
babl_conversion_new (src ## _linear, dst ## _linear, "linear", conv_ ## src ## _ ## dst, NULL); \
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]