summaryrefslogtreecommitdiff
path: root/extmod/uzlib/tinflate.c
diff options
context:
space:
mode:
authorPaul Sokolovsky <pfalcon@users.sourceforge.net>2018-11-30 23:36:49 +0300
committerDamien George <damien.p.george@gmail.com>2019-01-27 10:59:30 +1100
commitb1cca8fbe0f554cd8e497bb30f04e71a611004c9 (patch)
tree73f2fe4aa7362cd901f58030d5b9ed01eaa2b3a9 /extmod/uzlib/tinflate.c
parent3e25d611ef3185b68558a20057d50b0d18dc67a0 (diff)
extmod/uzlib: Update uzlib to v2.9.2.
Major changes include robust parsing of erroneous compressed streams and updated API.
Diffstat (limited to 'extmod/uzlib/tinflate.c')
-rw-r--r--extmod/uzlib/tinflate.c172
1 files changed, 139 insertions, 33 deletions
diff --git a/extmod/uzlib/tinflate.c b/extmod/uzlib/tinflate.c
index 21558af5b..b93bc1fa3 100644
--- a/extmod/uzlib/tinflate.c
+++ b/extmod/uzlib/tinflate.c
@@ -1,11 +1,11 @@
/*
- * tinflate - tiny inflate
+ * uzlib - tiny deflate/inflate library (deflate, gzip, zlib)
*
* Copyright (c) 2003 by Joergen Ibsen / Jibz
* All Rights Reserved
* http://www.ibsensoftware.com/
*
- * Copyright (c) 2014-2016 by Paul Sokolovsky
+ * Copyright (c) 2014-2018 by Paul Sokolovsky
*
* This software is provided 'as-is', without any express
* or implied warranty. In no event will the authors be
@@ -35,6 +35,15 @@
#include <assert.h>
#include "tinf.h"
+#define UZLIB_DUMP_ARRAY(heading, arr, size) \
+ { \
+ printf("%s", heading); \
+ for (int i = 0; i < size; ++i) { \
+ printf(" %d", (arr)[i]); \
+ } \
+ printf("\n"); \
+ }
+
uint32_t tinf_get_le_uint32(TINF_DATA *d);
uint32_t tinf_get_be_uint32(TINF_DATA *d);
@@ -149,6 +158,13 @@ static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned
/* scan symbol lengths, and sum code length counts */
for (i = 0; i < num; ++i) t->table[lengths[i]]++;
+ #if UZLIB_CONF_DEBUG_LOG >= 2
+ UZLIB_DUMP_ARRAY("codelen counts:", t->table, TINF_ARRAY_SIZE(t->table));
+ #endif
+
+ /* In the lengths array, 0 means unused code. So, t->table[0] now contains
+ number of unused codes. But table's purpose is to contain # of codes of
+ particular length, and there're 0 codes of length 0. */
t->table[0] = 0;
/* compute offset table for distribution sort */
@@ -158,6 +174,10 @@ static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned
sum += t->table[i];
}
+ #if UZLIB_CONF_DEBUG_LOG >= 2
+ UZLIB_DUMP_ARRAY("codelen offsets:", offs, TINF_ARRAY_SIZE(offs));
+ #endif
+
/* create code->symbol translation table (symbols sorted by code) */
for (i = 0; i < num; ++i)
{
@@ -171,10 +191,28 @@ static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned
unsigned char uzlib_get_byte(TINF_DATA *d)
{
- if (d->source) {
+ /* If end of source buffer is not reached, return next byte from source
+ buffer. */
+ if (d->source < d->source_limit) {
return *d->source++;
}
- return d->readSource(d);
+
+ /* Otherwise if there's callback and we haven't seen EOF yet, try to
+ read next byte using it. (Note: the callback can also update ->source
+ and ->source_limit). */
+ if (d->readSource && !d->eof) {
+ int val = d->readSource(d);
+ if (val >= 0) {
+ return (unsigned char)val;
+ }
+ }
+
+ /* Otherwise, we hit EOF (either from ->readSource() or from exhaustion
+ of the buffer), and it will be "sticky", i.e. further calls to this
+ function will end up here too. */
+ d->eof = true;
+
+ return 0;
}
uint32_t tinf_get_le_uint32(TINF_DATA *d)
@@ -182,7 +220,7 @@ uint32_t tinf_get_le_uint32(TINF_DATA *d)
uint32_t val = 0;
int i;
for (i = 4; i--;) {
- val = val >> 8 | uzlib_get_byte(d) << 24;
+ val = val >> 8 | ((uint32_t)uzlib_get_byte(d)) << 24;
}
return val;
}
@@ -245,21 +283,31 @@ static int tinf_decode_symbol(TINF_DATA *d, TINF_TREE *t)
cur = 2*cur + tinf_getbit(d);
- ++len;
+ if (++len == TINF_ARRAY_SIZE(t->table)) {
+ return TINF_DATA_ERROR;
+ }
sum += t->table[len];
cur -= t->table[len];
} while (cur >= 0);
- return t->trans[sum + cur];
+ sum += cur;
+ #if UZLIB_CONF_PARANOID_CHECKS
+ if (sum < 0 || sum >= TINF_ARRAY_SIZE(t->trans)) {
+ return TINF_DATA_ERROR;
+ }
+ #endif
+
+ return t->trans[sum];
}
/* given a data stream, decode dynamic trees from it */
-static void tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
+static int tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
{
+ /* code lengths for 288 literal/len symbols and 32 dist symbols */
unsigned char lengths[288+32];
- unsigned int hlit, hdist, hclen;
+ unsigned int hlit, hdist, hclen, hlimit;
unsigned int i, num, length;
/* get 5 bits HLIT (257-286) */
@@ -286,53 +334,75 @@ static void tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
tinf_build_tree(lt, lengths, 19);
/* decode code lengths for the dynamic trees */
- for (num = 0; num < hlit + hdist; )
+ hlimit = hlit + hdist;
+ for (num = 0; num < hlimit; )
{
int sym = tinf_decode_symbol(d, lt);
+ unsigned char fill_value = 0;
+ int lbits, lbase = 3;
+
+ /* error decoding */
+ if (sym < 0) return sym;
switch (sym)
{
case 16:
/* copy previous code length 3-6 times (read 2 bits) */
- {
- unsigned char prev = lengths[num - 1];
- for (length = tinf_read_bits(d, 2, 3); length; --length)
- {
- lengths[num++] = prev;
- }
- }
+ if (num == 0) return TINF_DATA_ERROR;
+ fill_value = lengths[num - 1];
+ lbits = 2;
break;
case 17:
/* repeat code length 0 for 3-10 times (read 3 bits) */
- for (length = tinf_read_bits(d, 3, 3); length; --length)
- {
- lengths[num++] = 0;
- }
+ lbits = 3;
break;
case 18:
/* repeat code length 0 for 11-138 times (read 7 bits) */
- for (length = tinf_read_bits(d, 7, 11); length; --length)
- {
- lengths[num++] = 0;
- }
+ lbits = 7;
+ lbase = 11;
break;
default:
/* values 0-15 represent the actual code lengths */
lengths[num++] = sym;
- break;
+ /* continue the for loop */
+ continue;
}
+
+ /* special code length 16-18 are handled here */
+ length = tinf_read_bits(d, lbits, lbase);
+ if (num + length > hlimit) return TINF_DATA_ERROR;
+ for (; length; --length)
+ {
+ lengths[num++] = fill_value;
+ }
+ }
+
+ #if UZLIB_CONF_DEBUG_LOG >= 2
+ printf("lit code lengths (%d):", hlit);
+ UZLIB_DUMP_ARRAY("", lengths, hlit);
+ printf("dist code lengths (%d):", hdist);
+ UZLIB_DUMP_ARRAY("", lengths + hlit, hdist);
+ #endif
+
+ #if UZLIB_CONF_PARANOID_CHECKS
+ /* Check that there's "end of block" symbol */
+ if (lengths[256] == 0) {
+ return TINF_DATA_ERROR;
}
+ #endif
/* build dynamic trees */
tinf_build_tree(lt, lengths, hlit);
tinf_build_tree(dt, lengths + hlit, hdist);
+
+ return TINF_OK;
}
/* ----------------------------- *
* -- block inflate functions -- *
* ----------------------------- */
-/* given a stream and two trees, inflate a block of data */
+/* given a stream and two trees, inflate next byte of output */
static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
{
if (d->curlen == 0) {
@@ -341,6 +411,10 @@ static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
int sym = tinf_decode_symbol(d, lt);
//printf("huff sym: %02x\n", sym);
+ if (d->eof) {
+ return TINF_DATA_ERROR;
+ }
+
/* literal byte */
if (sym < 256) {
TINF_PUT(d, sym);
@@ -354,21 +428,45 @@ static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
/* substring from sliding dictionary */
sym -= 257;
+ if (sym >= 29) {
+ return TINF_DATA_ERROR;
+ }
+
/* possibly get more bits from length code */
d->curlen = tinf_read_bits(d, length_bits[sym], length_base[sym]);
dist = tinf_decode_symbol(d, dt);
+ if (dist >= 30) {
+ return TINF_DATA_ERROR;
+ }
+
/* possibly get more bits from distance code */
offs = tinf_read_bits(d, dist_bits[dist], dist_base[dist]);
+
+ /* calculate and validate actual LZ offset to use */
if (d->dict_ring) {
if (offs > d->dict_size) {
return TINF_DICT_ERROR;
}
+ /* Note: unlike full-dest-in-memory case below, we don't
+ try to catch offset which points to not yet filled
+ part of the dictionary here. Doing so would require
+ keeping another variable to track "filled in" size
+ of the dictionary. Appearance of such an offset cannot
+ lead to accessing memory outside of the dictionary
+ buffer, and clients which don't want to leak unrelated
+ information, should explicitly initialize dictionary
+ buffer passed to uzlib. */
+
d->lzOff = d->dict_idx - offs;
if (d->lzOff < 0) {
d->lzOff += d->dict_size;
}
} else {
+ /* catch trying to point before the start of dest buffer */
+ if (offs > d->dest - d->destStart) {
+ return TINF_DATA_ERROR;
+ }
d->lzOff = -offs;
}
}
@@ -387,7 +485,7 @@ static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
return TINF_OK;
}
-/* inflate an uncompressed block of data */
+/* inflate next byte from uncompressed block of data */
static int tinf_inflate_uncompressed_block(TINF_DATA *d)
{
if (d->curlen == 0) {
@@ -440,6 +538,7 @@ void uzlib_init(void)
/* initialize decompression structure */
void uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen)
{
+ d->eof = 0;
d->bitcount = 0;
d->bfinal = 0;
d->btype = -1;
@@ -449,7 +548,7 @@ void uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen)
d->curlen = 0;
}
-/* inflate next byte of compressed stream */
+/* inflate next output bytes from compressed stream */
int uzlib_uncompress(TINF_DATA *d)
{
do {
@@ -463,14 +562,19 @@ next_blk:
/* read block type (2 bits) */
d->btype = tinf_read_bits(d, 2, 0);
- //printf("Started new block: type=%d final=%d\n", d->btype, d->bfinal);
+ #if UZLIB_CONF_DEBUG_LOG >= 1
+ printf("Started new block: type=%d final=%d\n", d->btype, d->bfinal);
+ #endif
if (d->btype == 1) {
/* build fixed huffman trees */
tinf_build_fixed_trees(&d->ltree, &d->dtree);
} else if (d->btype == 2) {
/* decode trees from stream */
- tinf_decode_trees(d, &d->ltree, &d->dtree);
+ res = tinf_decode_trees(d, &d->ltree, &d->dtree);
+ if (res != TINF_OK) {
+ return res;
+ }
}
}
@@ -483,7 +587,7 @@ next_blk:
break;
case 1:
case 2:
- /* decompress block with fixed/dyanamic huffman trees */
+ /* decompress block with fixed/dynamic huffman trees */
/* trees were decoded previously, so it's the same routine for both */
res = tinf_inflate_block_data(d, &d->ltree, &d->dtree);
break;
@@ -501,11 +605,13 @@ next_blk:
return res;
}
- } while (--d->destSize);
+ } while (d->dest < d->dest_limit);
return TINF_OK;
}
+/* inflate next output bytes from compressed stream, updating
+ checksum, and at the end of stream, verify it */
int uzlib_uncompress_chksum(TINF_DATA *d)
{
int res;