[evolution-ews] Update LZX decompressor



commit e0aea9c9679429b5e55e8a1c3cc16349593c5075
Author: David Woodhouse <David Woodhouse intel com>
Date:   Tue May 14 15:33:15 2013 +0100

    Update LZX decompressor
    
    Merge with upstream libmspack.
    
    This fixes a number of bugs which were found when submitting the changes for
    LZX DELTA support to upstream. Especially with the larger window sizes.
    
    This hasn't been merged into upstream yet, but it should be there shortly.

 src/addressbook/lzx/ews-oal-decompress.c |   43 ++--
 src/addressbook/lzx/lzx.h                |   41 +++-
 src/addressbook/lzx/lzxd.c               |  318 ++++++++++++++++++++----------
 src/addressbook/lzx/readbits.h           |    1 +
 src/addressbook/lzx/readhuff.h           |    9 +-
 5 files changed, 268 insertions(+), 144 deletions(-)
---
diff --git a/src/addressbook/lzx/ews-oal-decompress.c b/src/addressbook/lzx/ews-oal-decompress.c
index 1d902a6..c07d383 100644
--- a/src/addressbook/lzx/ews-oal-decompress.c
+++ b/src/addressbook/lzx/ews-oal-decompress.c
@@ -224,11 +224,16 @@ oal_decompress_v4_full_detail_file (const gchar *filename,
                        else if (window_bits > 25)
                                window_bits = 25;
 
-                       lzs = lzxd_init (input, output, NULL, 0, window_bits,
-                                        0, 16, lzx_b->ucomp_size);
+                       lzs = lzxd_init (input, output, window_bits,
+                                        0, 16, lzx_b->ucomp_size, 1);
+                       if (!lzs) {
+                               g_set_error_literal (&err, g_quark_from_string ("lzx"), 1, "decompression 
failed (lzxd_init)");
+                               ret = FALSE;
+                               goto exit;
+                       }
 
                        if (lzxd_decompress (lzs, lzs->length) != LZX_ERR_OK) {
-                               g_set_error_literal (&err, g_quark_from_string ("lzx"), 1, "decompression 
failed");
+                               g_set_error_literal (&err, g_quark_from_string ("lzx"), 1, "decompression 
failed (lzxd_decompress)");
                                ret = FALSE;
                                goto exit;
                        }
@@ -355,8 +360,6 @@ oal_apply_binpatch (const gchar *filename, const gchar *orig_filename,
        FILE *input = NULL, *output = NULL, *orig_input = NULL;
        gboolean ret = TRUE;
        GError *err = NULL;
-       unsigned char *ref_data = NULL;
-       int ref_data_len = 0;
 
        input = fopen (filename, "rb");
        if (!input) {
@@ -403,19 +406,6 @@ oal_apply_binpatch (const gchar *filename, const gchar *orig_filename,
                /* note the file offset */
                offset = ftell (input);
                
-               if (lzx_b->source_size > ref_data_len) {
-                       free(ref_data);
-                       ref_data_len = lzx_b->source_size;
-                       ref_data = malloc(ref_data_len);
-               }
-
-               printf("read %x\n", lzx_b->source_size);
-               if (fread(ref_data, 1, lzx_b->source_size, orig_input) != lzx_b->source_size) {
-                       perror("Failed to read original source data\n");
-                       ret = FALSE;
-                       goto exit;
-               }
-
                /* The window size should be the smallest power of two between 2^17 and 2^25 that is
                   greater than or equal to the sum of the size of the reference data rounded up to
                   a multiple of 32768 and the size of the subject data. Since we have no reference
@@ -429,11 +419,20 @@ oal_apply_binpatch (const gchar *filename, const gchar *orig_filename,
                else if (window_bits > 25)
                        window_bits = 25;
 
-               lzs = lzxd_init (input, output, ref_data, lzx_b->source_size,
-                                window_bits, 0, 16, lzx_b->ucomp_size);
-
+               lzs = lzxd_init (input, output,
+                                window_bits, 0, 16, lzx_b->ucomp_size, 1);
+               if (!lzs) {
+                       g_set_error_literal (&err, g_quark_from_string ("lzx"), 1, "decompression failed 
(lzxd_init)");
+                       ret = FALSE;
+                       goto exit;
+               }
+               if (lzxd_set_reference_data(lzs, orig_input, lzx_b->source_size)) {
+                       g_set_error_literal (&err, g_quark_from_string ("lzx"), 1, "decompression failed 
(lzxd_set_reference_data)");
+                       ret = FALSE;
+                       goto exit;
+               }
                if (lzxd_decompress (lzs, lzs->length) != LZX_ERR_OK) {
-                       g_set_error_literal (&err, g_quark_from_string ("lzx"), 1, "decompression failed");
+                       g_set_error_literal (&err, g_quark_from_string ("lzx"), 1, "decompression failed 
(lzxd_decompress)");
                        ret = FALSE;
                        goto exit;
                }
diff --git a/src/addressbook/lzx/lzx.h b/src/addressbook/lzx/lzx.h
index e489f25..ceb1264 100644
--- a/src/addressbook/lzx/lzx.h
+++ b/src/addressbook/lzx/lzx.h
@@ -1,5 +1,5 @@
 /* This file is part of libmspack.
- * (C) 2003-2004 Stuart Caie.
+ * (C) 2003-2013 Stuart Caie.
  *
  * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
  * by Microsoft Corporation.
@@ -39,7 +39,7 @@
 /* LZX huffman defines: tweak tablebits as desired */
 #define LZX_PRETREE_MAXSYMBOLS  (LZX_PRETREE_NUM_ELEMENTS)
 #define LZX_PRETREE_TABLEBITS   (6)
-#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)
+#define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 290*8)
 #define LZX_MAINTREE_TABLEBITS  (12)
 #define LZX_LENGTH_MAXSYMBOLS   (LZX_NUM_SECONDARY_LENGTHS+1)
 #define LZX_LENGTH_TABLEBITS    (12)
@@ -85,6 +85,8 @@ struct lzxd_stream {
 
   unsigned char *window;          /* decoding window                         */
   unsigned int   window_size;     /* window size                             */
+  unsigned int   ref_data_size;   /* LZX DELTA reference data size           */
+  unsigned int   num_offsets;     /* number of match_offset entries in table */
   unsigned int   window_posn;     /* decompression offset within window      */
   unsigned int   frame_posn;      /* current frame offset within in window   */
   unsigned int   frame;           /* the number of 32kb frames processed     */
@@ -100,8 +102,8 @@ struct lzxd_stream {
   unsigned char  intel_started;   /* has intel E8 decoding started?          */
   unsigned char  block_type;      /* type of the current block               */
   unsigned char  header_read;     /* have we started decoding at all yet?    */
-  unsigned char  posn_slots;      /* how many posn slots in stream?          */
   unsigned char  input_end;       /* have we reached the end of input?       */
+  unsigned char  is_delta;        /* does stream follow LZX DELTA spec?      */
 
   int error;
 
@@ -124,6 +126,7 @@ struct lzxd_stream {
                                (LZX_LENGTH_MAXSYMBOLS * 2)];
   unsigned short ALIGNED_table [(1 << LZX_ALIGNED_TABLEBITS) +
                                (LZX_ALIGNED_MAXSYMBOLS * 2)];
+  unsigned char LENGTH_empty;
 
   /* this is used purely for doing the intel E8 transform */
   unsigned char  e8_buf[LZX_FRAME_SIZE];
@@ -140,12 +143,14 @@ struct lzxd_stream {
  * @param input              an input stream with the LZX data.
  * @param output             an output stream to write the decoded data to.
  * @param window_bits        the size of the decoding window, which must be
- *                           between 15 and 21 inclusive.
+ *                           between 15 and 21 inclusive for regular LZX
+ *                           data, or between 17 and 25 inclusive for
+ *                           LZX DELTA data.
  * @param reset_interval     the interval at which the LZX bitstream is
  *                           reset, in multiples of LZX frames (32678
  *                           bytes), e.g. a value of 2 indicates the input
  *                           stream resets after every 65536 output bytes.
- *                           A value of 0 indicates that the bistream never
+ *                           A value of 0 indicates that the bitstream never
  *                           resets, such as in CAB LZX streams.
  * @param input_buffer_size  the number of bytes to use as an input
  *                           bitstream buffer.
@@ -161,26 +166,44 @@ struct lzxd_stream {
  *                           lzxd_set_output_length() once it is
  *                           known. If never set, 4 of the final 6 bytes
  *                           of the output stream may be incorrect.
+ * @param is_delta           should be zero for all regular LZX data,
+ *                           non-zero for LZX DELTA encoded data.
  * @return a pointer to an initialised lzxd_stream structure, or NULL if
  * there was not enough memory or parameters to the function were wrong.
  */
 extern struct lzxd_stream *lzxd_init(FILE *input,
                                     FILE *output,
-                                    unsigned char *ref_data, int ref_data_len,
                                     int window_bits,
                                     int reset_interval,
                                     int input_buffer_size,
-                                    off_t output_length);
+                                    off_t output_length,
+                                     char is_delta);
 
 /* see description of output_length in lzxd_init() */
 extern void lzxd_set_output_length(struct lzxd_stream *lzx,
                                   off_t output_length);
 
 /**
+ * Reads LZX DELTA reference data into the window and allows
+ * lzxd_decompress() to reference it.
+ *
+ * Call this before the first call to lzxd_decompress().
+
+ * @param lzx    the LZX stream to apply this reference data to
+ * @param input  an input file handle to read reference data
+ * @param length the length of the reference data. Cannot be longer
+ *               than the LZX window size.
+ * @return an error code, or LZX_ERR_OK if successful
+ */
+extern int lzxd_set_reference_data(struct lzxd_stream *lzx,
+                                   FILE *input,
+                                   unsigned int length);
+
+/**
  * Decompresses entire or partial LZX streams.
  *
  * The number of bytes of data that should be decompressed is given as the
- * out_bytes parameter.�If more bytes are decoded than are needed, they
+ * out_bytes parameter. If more bytes are decoded than are needed, they
  * will be kept over for a later invocation.
  *
  * The output bytes will be passed to the write() function given in
@@ -207,7 +230,7 @@ extern int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes);
 
 /**
  * Frees all state associated with an LZX data stream. This will call
- * free().
+ * free() using the system pointer given in lzxd_init().
  *
  * @param lzx LZX decompression state to free.
  */
diff --git a/src/addressbook/lzx/lzxd.c b/src/addressbook/lzx/lzxd.c
index 63f109e..f6bd4c9 100644
--- a/src/addressbook/lzx/lzxd.c
+++ b/src/addressbook/lzx/lzxd.c
@@ -1,5 +1,5 @@
 /* This file is part of libmspack.
- * (C) 2003-2004 Stuart Caie.
+ * (C) 2003-2013 Stuart Caie.
  *
  * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
  * by Microsoft Corporation.
@@ -69,6 +69,10 @@
  * The maximum window size has increased from 2MB to 32MB. This also
  * increases the maximum number of position slots, etc.
  *
+ * If the match length is 257 (the maximum possible), this signals
+ * a further length decoding step, that allows for matches up to
+ * 33024 bytes long.
+ *
  * The format now allows for "reference data", supplied by the caller.
  * If match offsets go further back than the number of bytes
  * decompressed so far, that is them accessing the reference data.
@@ -103,6 +107,22 @@
        return lzx->error = LZX_ERR_DECRUNCH;                   \
     }
 
+#define BUILD_TABLE_MAYBE_EMPTY(tbl) do {                              \
+    lzx->tbl##_empty = 0;                                              \
+    if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl),             \
+                          &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0)))       \
+    {                                                                  \
+       for (i = 0; i < MAXSYMBOLS(tbl); i++) {                         \
+           if (HUFF_LEN(tbl, i) > 0) {                                 \
+               D(("failed to build %s table", #tbl))                   \
+               return lzx->error = LZX_ERR_DECRUNCH;           \
+           }                                                           \
+       }                                                               \
+       /* empty tree - allow it, but don't decode symbols with it */   \
+       lzx->tbl##_empty = 1;                                           \
+    }                                                                  \
+} while (0)
+
 /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
  * first to last in the given table. The code lengths are stored in their
  * own special LZX way.
@@ -172,27 +192,70 @@ static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens,
  * a small 'position slot' number and a small offset from that slot are
  * encoded instead of one large offset.
  *
+ * The number of slots is decided by how many are needed to encode the
+ * largest offset for a given window size. This is easy when the gap between
+ * slots is less than 128Kb, it's a linear relationship. But when extra_bits
+ * reaches its limit of 17 (because LZX can only ensure reading 17 bits of
+ * data at a time), we can only jump 128Kb at a time and have to start
+ * using more and more position slots as each window size doubles.
+ *
  * position_base[] is an index to the position slot bases
  *
  * extra_bits[] states how many bits of offset-from-base data is needed.
  *
- * They are generated like so:
- * for (i = 0;        i < 4;  i++)  extra_bits[i] = 0;
- * for (i = 4, j = 0; i < 36; i+=2) extra_bits[i] = extra_bits[i+1] = j++;
- * for (i = 36;       i < 51; i++)  extra_bits[i] = 17;
- * for (i = 0, j = 0; i < 51; j += 1 << extra_bits[i++]) position_base[i] = j;
+ * They are calculated as follows:
+ * extra_bits[i] = 0 where i < 4
+ * extra_bits[i] = floor(i/2)-1 where i >= 4 && i < 36
+ * extra_bits[i] = 17 where i >= 36
+ * position_base[0] = 0
+ * position_base[i] = position_base[i-1] + (1 << extra_bits[i-1])
  */
-static const unsigned int position_base[51] = {
-  0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256,
-  384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288,
-  16384, 24576, 32768, 49152, 65536, 98304, 131072, 196608, 262144,
-  393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720,
-  1441792, 1572864, 1703936, 1835008, 1966080, 2097152
+static const unsigned int position_slots[11] = {
+    30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290
 };
-static const unsigned char extra_bits[51] = {
-  0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
-  9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
-  17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17
+static const unsigned char extra_bits[36] = {
+    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
+    9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16
+};
+static const unsigned int position_base[290] = {
+    0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512,
+    768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768,
+    49152, 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360,
+    786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
+    1835008, 1966080, 2097152, 2228224, 2359296, 2490368, 2621440, 2752512,
+    2883584, 3014656, 3145728, 3276800, 3407872, 3538944, 3670016, 3801088,
+    3932160, 4063232, 4194304, 4325376, 4456448, 4587520, 4718592, 4849664,
+    4980736, 5111808, 5242880, 5373952, 5505024, 5636096, 5767168, 5898240,
+    6029312, 6160384, 6291456, 6422528, 6553600, 6684672, 6815744, 6946816,
+    7077888, 7208960, 7340032, 7471104, 7602176, 7733248, 7864320, 7995392,
+    8126464, 8257536, 8388608, 8519680, 8650752, 8781824, 8912896, 9043968,
+    9175040, 9306112, 9437184, 9568256, 9699328, 9830400, 9961472, 10092544,
+    10223616, 10354688, 10485760, 10616832, 10747904, 10878976, 11010048,
+    11141120, 11272192, 11403264, 11534336, 11665408, 11796480, 11927552,
+    12058624, 12189696, 12320768, 12451840, 12582912, 12713984, 12845056,
+    12976128, 13107200, 13238272, 13369344, 13500416, 13631488, 13762560,
+    13893632, 14024704, 14155776, 14286848, 14417920, 14548992, 14680064,
+    14811136, 14942208, 15073280, 15204352, 15335424, 15466496, 15597568,
+    15728640, 15859712, 15990784, 16121856, 16252928, 16384000, 16515072,
+    16646144, 16777216, 16908288, 17039360, 17170432, 17301504, 17432576,
+    17563648, 17694720, 17825792, 17956864, 18087936, 18219008, 18350080,
+    18481152, 18612224, 18743296, 18874368, 19005440, 19136512, 19267584,
+    19398656, 19529728, 19660800, 19791872, 19922944, 20054016, 20185088,
+    20316160, 20447232, 20578304, 20709376, 20840448, 20971520, 21102592,
+    21233664, 21364736, 21495808, 21626880, 21757952, 21889024, 22020096,
+    22151168, 22282240, 22413312, 22544384, 22675456, 22806528, 22937600,
+    23068672, 23199744, 23330816, 23461888, 23592960, 23724032, 23855104,
+    23986176, 24117248, 24248320, 24379392, 24510464, 24641536, 24772608,
+    24903680, 25034752, 25165824, 25296896, 25427968, 25559040, 25690112,
+    25821184, 25952256, 26083328, 26214400, 26345472, 26476544, 26607616,
+    26738688, 26869760, 27000832, 27131904, 27262976, 27394048, 27525120,
+    27656192, 27787264, 27918336, 28049408, 28180480, 28311552, 28442624,
+    28573696, 28704768, 28835840, 28966912, 29097984, 29229056, 29360128,
+    29491200, 29622272, 29753344, 29884416, 30015488, 30146560, 30277632,
+    30408704, 30539776, 30670848, 30801920, 30932992, 31064064, 31195136,
+    31326208, 31457280, 31588352, 31719424, 31850496, 31981568, 32112640,
+    32243712, 32374784, 32505856, 32636928, 32768000, 32899072, 33030144,
+    33161216, 33292288, 33423360
 };
 
 static void lzxd_reset_state(struct lzxd_stream *lzx) {
@@ -211,39 +274,39 @@ static void lzxd_reset_state(struct lzxd_stream *lzx) {
 }
 
 /*-------- main LZX code --------*/
-static int pos_slots[9] = {34, 36, 38, 42, 50, 66, 98, 162, 290};
 
 struct lzxd_stream *lzxd_init(FILE *input,
                              FILE *output,
-                             unsigned char *ref_data, int ref_data_len,
                              int window_bits,
                              int reset_interval,
                              int input_buffer_size,
-                             off_t output_length)
+                             off_t output_length,
+                             char is_delta)
 {
   unsigned int window_size = 1 << window_bits;
   struct lzxd_stream *lzx;
 
-  /* LZX supports window sizes of 2^17 (128Kb) through 2^25 (32Mb) */
-  if (window_bits < 17 || window_bits > 26) return NULL;
-
-  if (ref_data_len > window_size) return NULL;
+  /* LZX DELTA window sizes are between 2^17 (128KiB) and 2^25 (32MiB),
+   * regular LZX windows are between 2^15 (32KiB) and 2^21 (2MiB)
+   */
+  if (is_delta) {
+      if (window_bits < 17 || window_bits > 25) return NULL;
+  }
+  else {
+      if (window_bits < 15 || window_bits > 21) return NULL;
+  }
 
   input_buffer_size = (input_buffer_size + 1) & -2;
   if (!input_buffer_size) return NULL;
 
   /* allocate decompression state */
-  if (!(lzx = malloc(sizeof(struct lzxd_stream)))) {
+  if (!(lzx = (struct lzxd_stream *) malloc(sizeof(struct lzxd_stream)))) {
     return NULL;
   }
 
   /* allocate decompression window and input buffer */
-  lzx->window = malloc((size_t) window_size);
-
-  if (ref_data_len)
-         memcpy(lzx->window + window_size - ref_data_len, ref_data, ref_data_len);
-
-  lzx->inbuf  = malloc((size_t) input_buffer_size);
+  lzx->window = (unsigned char *) malloc((size_t) window_size);
+  lzx->inbuf  = (unsigned char *) malloc((size_t) input_buffer_size);
   if (!lzx->window || !lzx->inbuf) {
     free(lzx->window);
     free(lzx->inbuf);
@@ -259,6 +322,7 @@ struct lzxd_stream *lzxd_init(FILE *input,
 
   lzx->inbuf_size      = input_buffer_size;
   lzx->window_size     = 1 << window_bits;
+  lzx->ref_data_size   = 0;
   lzx->window_posn     = 0;
   lzx->frame_posn      = 0;
   lzx->frame           = 0;
@@ -267,10 +331,8 @@ struct lzxd_stream *lzxd_init(FILE *input,
   lzx->intel_curpos    = 0;
   lzx->intel_started   = 0;
   lzx->error           = LZX_ERR_OK;
-
-  /* window bits:    17  18  19  20  21 22 23 24  25
-   * position slots: 34  36  38  42  50 66 98 162 290 */
-  lzx->posn_slots      = pos_slots[window_bits-17];
+  lzx->num_offsets     = position_slots[window_bits - 15] << 3;
+  lzx->is_delta        = is_delta;
 
   lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0];
   lzxd_reset_state(lzx);
@@ -278,6 +340,40 @@ struct lzxd_stream *lzxd_init(FILE *input,
   return lzx;
 }
 
+int lzxd_set_reference_data(struct lzxd_stream *lzx,
+                           FILE *input,
+                           unsigned int length)
+{
+    if (!lzx) return LZX_ERR_ARGS;
+
+    if (!lzx->is_delta) {
+        D(("only LZX DELTA streams support reference data"))
+        return LZX_ERR_ARGS;
+    }
+    if (lzx->offset) {
+       D(("too late to set reference data after decoding starts"))
+       return LZX_ERR_ARGS;
+    }
+    if (length > lzx->window_size) {
+       D(("reference length (%u) is longer than the window", length))
+       return LZX_ERR_ARGS;
+    }
+    if (length > 0 && (!input)) {
+        D(("length > 0 but no input"))
+        return LZX_ERR_ARGS;
+    }
+
+    lzx->ref_data_size = length;
+    if (length > 0) {
+        /* copy reference data */
+        unsigned char *pos = &lzx->window[lzx->window_size - length];
+       int bytes = fread(pos, 1, length, input);
+       if (bytes < length) return LZX_ERR_READ;
+    }
+    lzx->ref_data_size = length;
+    return LZX_ERR_OK;
+}
+
 void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) {
   if (lzx) lzx->length = out_bytes;
 }
@@ -323,8 +419,6 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
   end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1;
 
   while (lzx->frame < end_frame) {
-    int chunk_size;
-
     /* have we reached the reset interval? (if there is one?) */
     if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) {
       if (lzx->block_remaining) {
@@ -339,8 +433,11 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
       R2 = lzx->R2;
     }
 
-    /* LZXD format has the chunk_size. not present in lzx format */
-    READ_BITS (chunk_size, 16);
+    /* LZX DELTA format has chunk_size, not present in LZX format */
+    if (lzx->is_delta) {
+      ENSURE_BITS(16);
+      REMOVE_BITS(16);
+    }
 
     /* read header if necessary */
     if (!lzx->header_read) {
@@ -376,7 +473,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
        READ_BITS(lzx->block_type, 3);
        READ_BITS(i, 16); READ_BITS(j, 8);
        lzx->block_remaining = lzx->block_length = (i << 8) | j;
-       D(("new block type %d len %u", lzx->block_type, lzx->block_length))
+       /*D(("new block t%d len %u", lzx->block_type, lzx->block_length))*/
 
        /* read individual block headers */
        switch (lzx->block_type) {
@@ -388,13 +485,13 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
        case LZX_BLOCKTYPE_VERBATIM:
          /* read lengths of and build main huffman decoding tree */
          READ_LENGTHS(MAINTREE, 0, 256);
-         READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + (lzx->posn_slots << 3));
+         READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + lzx->num_offsets);
          BUILD_TABLE(MAINTREE);
          /* if the literal 0xE8 is anywhere in the block... */
          if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1;
          /* read lengths of and build lengths huffman decoding tree */
          READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
-         BUILD_TABLE(LENGTH);
+         BUILD_TABLE_MAYBE_EMPTY(LENGTH);
          break;
 
        case LZX_BLOCKTYPE_UNCOMPRESSED:
@@ -417,7 +514,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
          break;
 
        default:
-         D(("bad block type \n"))
+         D(("bad block type"))
          return lzx->error = LZX_ERR_DECRUNCH;
        }
       }
@@ -442,19 +539,21 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
            this_run--;
          }
          else {
-           int bit = 0, extra_len = 0;
-
            /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
            main_element -= LZX_NUM_CHARS;
 
            /* get match length */
            match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
            if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
+             if (lzx->LENGTH_empty) {
+                D(("LENGTH symbol needed but tree is empty"))
+                return lzx->error = LZX_ERR_DECRUNCH;
+              }
              READ_HUFFSYM(LENGTH, length_footer);
              match_length += length_footer;
            }
            match_length += LZX_MIN_MATCH;
-         
+
            /* get match offset */
            switch ((match_offset = (main_element >> 3))) {
            case 0: match_offset = R0;                                  break;
@@ -462,53 +561,53 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
            case 2: match_offset = R2; R2=R0;        R0 = match_offset; break;
            case 3: match_offset = 1;  R2=R1; R1=R0; R0 = match_offset; break;
            default:
-             extra = extra_bits[match_offset];
+             extra = (match_offset >= 36) ? 17 : extra_bits[match_offset];
              READ_BITS(verbatim_bits, extra);
              match_offset = position_base[match_offset] - 2 + verbatim_bits;
              R2 = R1; R1 = R0; R0 = match_offset;
            }
 
-           /* check for extra len */
-           if (match_length == 257) {
-               READ_BITS (bit, 1);
-               if (bit) {
-                       bit = 0;
-                       READ_BITS (bit, 1);
-                       if (bit) {
-                               bit = 0;
-                               READ_BITS (bit, 1);
-                               if (bit) {
-                                       /* 111 */
-                                       READ_BITS (extra_len, 15);
-                               } else {
-                                       /* 110 */       
-                                       READ_BITS (extra_len, 12);
-                                       extra_len += 256 + 1024;
-                               }
-                       } else {
-                               /* 10 */
-                               READ_BITS (extra_len, 10);
-                               extra_len += 256;
-                       }
-               } else {
-                       /* 0 */
-                       READ_BITS (extra_len, 8);
+           /* LZX DELTA uses max match length to signal even longer match */
+           if (match_length == LZX_MAX_MATCH && lzx->is_delta) {
+               int extra_len = 0;
+               ENSURE_BITS(3); /* 4 entry huffman tree */
+               if (PEEK_BITS(1) == 0) {
+                   REMOVE_BITS(1); /* '0' -> 8 extra length bits */
+                   READ_BITS(extra_len, 8);
+               }
+               else if (PEEK_BITS(2) == 2) {
+                   REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */
+                   READ_BITS(extra_len, 10);
+                   extra_len += 0x100;
+               }
+               else if (PEEK_BITS(3) == 6) {
+                   REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */
+                   READ_BITS(extra_len, 12);
+                   extra_len += 0x500;
+               }
+               else {
+                   REMOVE_BITS(3); /* '111' -> 15 extra length bits */
+                   READ_BITS(extra_len, 15);
                }
+               match_length += extra_len;
            }
 
-           match_length += extra_len;
-
            if ((window_posn + match_length) > lzx->window_size) {
-             D(("match ran over window wrap %lu %d ", ftell (lzx->input), match_length))
-             lzx->o_ptr = &lzx->window[lzx->frame_posn];
+             D(("match ran over window wrap"))
              return lzx->error = LZX_ERR_DECRUNCH;
-           } 
-
+           }
+           
            /* copy match */
            rundest = &window[window_posn];
            i = match_length;
            /* does match offset wrap the window? */
            if (match_offset > window_posn) {
+             if (match_offset > lzx->offset &&
+                 (match_offset - window_posn) > lzx->ref_data_size)
+             {
+               D(("match offset beyond LZX stream"))
+               return lzx->error = LZX_ERR_DECRUNCH;
+             }
              /* j = length from match offset to end of window */
              j = match_offset - window_posn;
              if (j > (int) lzx->window_size) {
@@ -527,10 +626,10 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
              runsrc = rundest - match_offset;
              while (i-- > 0) *rundest++ = *runsrc++;
            }
+
            this_run    -= match_length;
            window_posn += match_length;
          }
-
        } /* while (this_run > 0) */
        break;
 
@@ -543,14 +642,16 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
            this_run--;
          }
          else {
-           int bit = 0, extra_len = 0;
-           
            /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
            main_element -= LZX_NUM_CHARS;
 
            /* get match length */
            match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
            if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
+              if (lzx->LENGTH_empty) {
+                D(("LENGTH symbol needed but tree is empty"))
+                return lzx->error = LZX_ERR_DECRUNCH;
+              } 
              READ_HUFFSYM(LENGTH, length_footer);
              match_length += length_footer;
            }
@@ -562,7 +663,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
            case 1: match_offset = R1; R1 = R0; R0 = match_offset; break;
            case 2: match_offset = R2; R2 = R0; R0 = match_offset; break;
            default:
-             extra = extra_bits[match_offset];
+             extra = (match_offset >= 36) ? 17 : extra_bits[match_offset];
              match_offset = position_base[match_offset] - 2;
              if (extra > 3) {
                /* verbatim and aligned bits */
@@ -590,31 +691,27 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
              R2 = R1; R1 = R0; R0 = match_offset;
            }
 
-           /* check for extra len */
-           if (match_length == 257) {
-               READ_BITS (bit, 1);
-               if (bit) {
-                       bit = 0;
-                       READ_BITS (bit, 1);
-                       if (bit) {
-                               bit = 0;
-                               READ_BITS (bit, 1);
-                               if (bit) {
-                                       /* 111 */
-                                       READ_BITS (extra_len, 15);
-                               } else {
-                                       /* 110 */       
-                                       READ_BITS (extra_len, 12);
-                                       extra_len += 256 + 1024;
-                               }
-                       } else {
-                               /* 10 */
-                               READ_BITS (extra_len, 10);
-                               extra_len += 256;
-                       }
-               } else {
-                       /* 0 */
-                       READ_BITS (extra_len, 8);
+           /* LZX DELTA uses max match length to signal even longer match */
+           if (match_length == LZX_MAX_MATCH && lzx->is_delta) {
+               int extra_len = 0;
+               ENSURE_BITS(3); /* 4 entry huffman tree */
+               if (PEEK_BITS(1) == 0) {
+                   REMOVE_BITS(1); /* '0' -> 8 extra length bits */
+                   READ_BITS(extra_len, 8);
+               }
+               else if (PEEK_BITS(2) == 2) {
+                   REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */
+                   READ_BITS(extra_len, 10);
+                   extra_len += 0x100;
+               }
+               else if (PEEK_BITS(3) == 6) {
+                   REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */
+                   READ_BITS(extra_len, 12);
+                   extra_len += 0x500;
+               }
+               else {
+                   REMOVE_BITS(3); /* '111' -> 15 extra length bits */
+                   READ_BITS(extra_len, 15);
                }
                match_length += extra_len;
            }
@@ -623,13 +720,18 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
              D(("match ran over window wrap"))
              return lzx->error = LZX_ERR_DECRUNCH;
            }
-           
 
            /* copy match */
            rundest = &window[window_posn];
            i = match_length;
            /* does match offset wrap the window? */
            if (match_offset > window_posn) {
+             if (match_offset > lzx->offset &&
+                 (match_offset - window_posn) > lzx->ref_data_size)
+             {
+               D(("match offset beyond LZX stream"))
+               return lzx->error = LZX_ERR_DECRUNCH;
+             }
              /* j = length from match offset to end of window */
              j = match_offset - window_posn;
              if (j > (int) lzx->window_size) {
@@ -702,7 +804,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) {
 
     /* check that we've used all of the previous frame first */
     if (lzx->o_ptr != lzx->o_end) {
-      D(("%d avail bytes, new %d frame", (int)(lzx->o_end-lzx->o_ptr), frame_size))
+      D(("%ld avail bytes, new %d frame", lzx->o_end-lzx->o_ptr, frame_size))
       return lzx->error = LZX_ERR_DECRUNCH;
     }
 
diff --git a/src/addressbook/lzx/readbits.h b/src/addressbook/lzx/readbits.h
index 0f78142..568d2bd 100644
--- a/src/addressbook/lzx/readbits.h
+++ b/src/addressbook/lzx/readbits.h
@@ -183,6 +183,7 @@ static int read_input(BITS_TYPE *p) {
      * so fake 2 more bytes at the end of input */
     if (read == 0) {
        if (p->input_end) {
+           D(("out of input bytes"))
            return p->error = LZX_ERR_READ;
        }
        else {
diff --git a/src/addressbook/lzx/readhuff.h b/src/addressbook/lzx/readhuff.h
index 4861530..bb15c0a 100644
--- a/src/addressbook/lzx/readhuff.h
+++ b/src/addressbook/lzx/readhuff.h
@@ -29,6 +29,9 @@
 # define HUFF_MAXBITS 16
 #endif
 
+/* Decodes the next huffman symbol from the input bitstream into var.
+ * Do not use this macro on a table unless build_decode_table() succeeded.
+ */
 #define READ_HUFFSYM(tbl, var) do {                    \
     ENSURE_BITS(HUFF_MAXBITS);                         \
     sym = HUFF_TABLE(tbl, PEEK_BITS(TABLEBITS(tbl)));  \
@@ -165,10 +168,6 @@ static int make_decode_table(unsigned int nsyms, unsigned int nbits,
     }
 
     /* full table? */
-    if (pos == table_mask) return 0;
-
-    /* either erroneous table, or all elements are 0 - let's find out. */
-    for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;
-    return 0;
+    return (pos == table_mask) ? 0 : 1;
 }
 #endif


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]