[babl] babl: also do auto LUTs for 3-3 bpp and 3-4bpp paths
- From: Øyvind "pippin" Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] babl: also do auto LUTs for 3-3 bpp and 3-4bpp paths
- Date: Tue, 25 Jan 2022 05:46:45 +0000 (UTC)
commit 3895c7d240edf918bed0558b0ae2f0058fce2a71
Author: Øyvind Kolås <pippin gimp org>
Date: Mon Jan 24 11:08:28 2022 +0100
babl: also do auto LUTs for 3-3 bpp and 3-4bpp paths
babl/babl-fish-path.c | 144 ++++++++++++++++++++++++++++++++++++++----------
babl/babl-fish.h | 2 +-
tools/babl-benchmark.c | 4 +-
tools/babl-lut-verify.c | 59 ++++++++++++++++++++
4 files changed, 178 insertions(+), 31 deletions(-)
---
diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c
index 4d5679e1a..f93e010ae 100644
--- a/babl/babl-fish-path.c
+++ b/babl/babl-fish-path.c
@@ -660,6 +660,8 @@ babl_fish_path2 (const Babl *source,
(source->format.model->flags & BABL_MODEL_FLAG_ASSOCIATED)==0 &&
(
(babl->fish_path.source_bpp == 4 && babl->fish_path.dest_bpp == 4)
+ ||(babl->fish_path.source_bpp == 3 && babl->fish_path.dest_bpp == 4)
+ ||(babl->fish_path.source_bpp == 3 && babl->fish_path.dest_bpp == 3)
// XXX 16bit code paths not enabled yet.
//
//|| (babl->fish_path.source_bpp == 8 && babl->fish_path.dest_bpp == 4)
@@ -756,17 +758,21 @@ babl_gc_fishes (void)
// is responsibility of higher layers
}
+#define BABL_LIKELY(x) __builtin_expect(!!(x), 1)
+#define BABL_UNLIKELY(x) __builtin_expect(!!(x), 0)
+
static int babl_fish_lut_process_maybe (const Babl *babl,
const char *source,
const char *destination,
long n,
void *data)
{
+ int source_bpp = babl->fish_path.source_bpp;
+ int dest_bpp = babl->fish_path.dest_bpp;
uint32_t *lut = (uint32_t*)babl->fish_path.u8_lut;
- ((Babl*)babl)->fish.pixels += n;
-
+ BABL(babl)->fish.pixels += n;
- if (!lut && babl->fish.pixels > 256 * 128)
+ if (BABL_UNLIKELY(!lut && babl->fish.pixels >= 128 * 256))
{
#if 0
fprintf (stderr, "building LUT for %s to %s\n",
@@ -774,44 +780,128 @@ static int babl_fish_lut_process_maybe (const Babl *babl,
babl_get_name (babl->conversion.destination));
#endif
lut = malloc (256 * 256 * 256 * 4);
+ if (source_bpp ==4)
+ {
+ for (int o = 0; o < 256 * 256 * 256; o++)
+ lut[o] = o;
+ process_conversion_path (babl->fish_path.conversion_list,
+ lut, 4,
+ lut, 4,
+ 256*256*256);
+ for (int o = 0; o < 256 * 256 * 256; o++)
+ lut[o] = lut[o] & 0x00ffffff;
+ }
+ else if (source_bpp = 3 && dest_bpp == 3)
{
- for (int o = 0; o < 256 * 256 * 256; o++)
- lut[o] = o;
- process_conversion_path (babl->fish_path.conversion_list,
- lut,
- babl->fish_path.source_bpp,
- lut,
- babl->fish_path.dest_bpp,
- 256*256*256);
+ uint8_t *temp_lut = malloc (256 * 256 * 256 * 3);
+ uint8_t *temp_lut2 = malloc (256 * 256 * 256 * 3);
+ int o = 0;
+ for (int r = 0; r < 256; r++)
+ for (int g = 0; g < 256; g++)
+ for (int b = 0; b < 256; b++, o++)
+ {
+ temp_lut[o*3+0]=r;
+ temp_lut[o*3+1]=g;
+ temp_lut[o*3+2]=b;
+ }
+ process_conversion_path (babl->fish_path.conversion_list,
+ temp_lut, 3,
+ temp_lut2, 3,
+ 256*256*256);
+ babl_process (babl_fish (babl_format ("R'G'B' u8"), babl_format ("R'G'B'A u8")),
+ temp_lut2, lut, 256*256*256);
+ for (int o = 0; o < 256 * 256 * 256; o++)
+ lut[o] = lut[o] & 0x00ffffff;
+ free (temp_lut);
+ free (temp_lut2);
}
+ else if (source_bpp = 3 && dest_bpp == 4)
+ {
+ uint8_t *temp_lut = malloc (256 * 256 * 256 * 3);
+ int o = 0;
+ for (int r = 0; r < 256; r++)
+ for (int g = 0; g < 256; g++)
+ for (int b = 0; b < 256; b++, o++)
+ {
+ temp_lut[o*3+0]=r;
+ temp_lut[o*3+1]=g;
+ temp_lut[o*3+2]=b;
+ }
+ process_conversion_path (babl->fish_path.conversion_list,
+ temp_lut, 3,
+ lut, 4,
+ 256*256*256);
+ for (int o = 0; o < 256 * 256 * 256; o++)
+ lut[o] = lut[o] & 0x00ffffff;
+ free (temp_lut);
+ }
+
if (babl->fish_path.u8_lut == NULL)
{
- (((Babl*)babl)->fish_path.u8_lut) = (uint8_t*)lut;
+ (BABL(babl)->fish_path.u8_lut) = lut;
// XXX need memory barrier?
- if ((((Babl*)babl)->fish_path.u8_lut) != (uint8_t*)lut)
+ if ((BABL(babl)->fish_path.u8_lut) != lut)
{
free (lut);
- lut = (uint32_t*)babl->fish_path.u8_lut;
+ lut = babl->fish_path.u8_lut;
}
}
else
{
free (lut);
- lut = (uint32_t*)babl->fish_path.u8_lut;
+ lut = babl->fish_path.u8_lut;
}
}
if (lut)
{
- uint32_t *src = (uint32_t*)source;
- uint32_t *dst = (uint32_t*)destination;
- lut = (uint32_t*)babl->fish_path.u8_lut;
- while (n--)
+ if (source_bpp == 4 && dest_bpp == 4)
{
- uint32_t col = *src++;
- *dst++ = lut[col & 0xffffff] | (col & 0xff000000);
+ uint32_t *src = (uint32_t*)source;
+ uint32_t *dst = (uint32_t*)destination;
+ lut = (uint32_t*)babl->fish_path.u8_lut;
+ BABL(babl)->fish_path.last_lut_use = babl_ticks ();
+ while (n--)
+ {
+ uint32_t col = *src++;
+ *dst = col & 0xff000000;
+ *dst |= lut[col & 0xffffff];
+ dst++;
+ }
+ return 1;
+ }
+ else if (source_bpp == 3 && dest_bpp == 3)
+ {
+ uint8_t *src = (uint8_t*)source;
+ uint8_t *dst = (uint8_t*)destination;
+ lut = (uint32_t*)babl->fish_path.u8_lut;
+ BABL(babl)->fish_path.last_lut_use = babl_ticks ();
+ while (n--)
+ {
+ uint32_t col = src[0]*256*256+src[1]*256+src[2];
+ uint32_t val = lut[col];
+ dst[2]=(val >> 16) & 0xff;
+ dst[1]=(val >> 8) & 0xff;
+ dst[0]=val & 0xff;
+ dst+=3;
+ src+=3;
+ }
+ return 1;
+ }
+ else if (source_bpp == 3 && dest_bpp == 4)
+ {
+ uint8_t *src = (uint8_t*)source;
+ uint32_t *dst = (uint32_t*)destination;
+ lut = (uint32_t*)babl->fish_path.u8_lut;
+ BABL(babl)->fish_path.last_lut_use = babl_ticks ();
+ while (n--)
+ {
+ uint32_t col = src[0]*256*256+src[1]*256+src[2];
+ *dst = lut[col];
+ dst++;
+ src+=3;
+ }
+ return 1;
}
- BABL(babl)->fish_path.last_lut_use = babl_ticks ();
- return 1;
}
return 0;
}
@@ -826,17 +916,15 @@ babl_fish_path_process (const Babl *babl,
if (babl->fish_path.is_u8_color_conv)
{
if (babl_fish_lut_process_maybe (babl,
- source,
- destination,
- n,
+ source, destination, n,
data))
- return;
+ return;
}
else
{
static long conv_counter = 0;
conv_counter+=n;
- if (conv_counter > 1000 * 1000 * 10) // possibly run gc every 10 megapixels
+ if (conv_counter > 1000 * 1000 * 10) // run gc every 10 megapixels
{
babl_gc_fishes ();
conv_counter = 0;
diff --git a/babl/babl-fish.h b/babl/babl-fish.h
index dfe07a3d7..0ad9101b1 100644
--- a/babl/babl-fish.h
+++ b/babl/babl-fish.h
@@ -70,7 +70,7 @@ typedef struct
int source_bpp;
int dest_bpp;
unsigned int is_u8_color_conv:1; // keep track of count, and make
- uint8_t *u8_lut;
+ uint32_t *u8_lut;
long last_lut_use;
BablList *conversion_list;
} BablFishPath;
diff --git a/tools/babl-benchmark.c b/tools/babl-benchmark.c
index f2a90f32c..895cfc2d6 100644
--- a/tools/babl-benchmark.c
+++ b/tools/babl-benchmark.c
@@ -25,8 +25,8 @@
#define random rand
#endif
-int ITERATIONS = 20;
-#define N_PIXELS (512*256) // a too small batch makes the test set live
+int ITERATIONS = 5;
+#define N_PIXELS (1024*1024) // a too small batch makes the test set live
// in l2 cache skewing results
// we could also add a cache purger..
diff --git a/tools/babl-lut-verify.c b/tools/babl-lut-verify.c
index 5f7968252..65a6d00f6 100644
--- a/tools/babl-lut-verify.c
+++ b/tools/babl-lut-verify.c
@@ -61,6 +61,57 @@ test_u8_premul (void)
}
+static double
+test_rgb (void)
+{
+ uint8_t *src = malloc (PIXELS*4);
+ uint8_t *dst = malloc (PIXELS*4);
+ uint8_t *dst2 = malloc (PIXELS*4);
+ double error = 0.0;
+
+ for (int i = 0; i < PIXELS; i++)
+ for (int c = 0; c < 4; c++)
+ src[i*4+c] = random();
+
+ babl_process (
+ babl_fish (
+ babl_format_with_space ("R'G'B' u8", babl_space("Apple")),
+ babl_format_with_space ("R'G'B' u8", babl_space("ProPhoto"))),
+ src, dst, PIXELS);
+ babl_process (
+ babl_fish (
+ babl_format_with_space ("R'G'B' u8", babl_space("Apple")),
+ babl_format_with_space ("R'G'B' u8", babl_space("ProPhoto"))),
+ src, dst2, PIXELS);
+ babl_process (
+ babl_fish (
+ babl_format_with_space ("R'G'B' u8", babl_space("Apple")),
+ babl_format_with_space ("R'G'B' u8", babl_space("ProPhoto"))),
+ src, dst2, PIXELS);
+ babl_process (
+ babl_fish (
+ babl_format_with_space ("R'G'B' u8", babl_space("Apple")),
+ babl_format_with_space ("R'G'B' u8", babl_space("ProPhoto"))),
+ src, dst2, PIXELS);
+
+ for (int i = 0; i < PIXELS; i++)
+ {
+ error += sqrt ((dst[i*3+0] - dst2[i*3+0])*
+ (dst[i*3+0] - dst2[i*3+0])+
+ (dst[i*3+1] - dst2[i*3+1])*
+ (dst[i*3+1] - dst2[i*3+1])+
+ (dst[i*3+2] - dst2[i*3+2])*
+ (dst[i*3+2] - dst2[i*3+2]));
+ }
+
+ free (src);
+ free (dst);
+ free (dst2);
+
+ return error;
+}
+
+
static double
test_u8 (void)
{
@@ -391,6 +442,14 @@ int main (int argc, char **argv)
else
fprintf (stdout, "OK\n");
+ fprintf (stdout, "R'G'B u8 ");
+ error = test_rgb ();
+ if (error != 0.0)
+ fprintf (stdout, "%.20f\n", error/(PIXELS*4));
+ else
+ fprintf (stdout, "OK\n");
+
+
fprintf (stdout, "u8 premul ");
error = test_u8_premul ();
if (error != 0.0)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]