diff options
author | Paul Sokolovsky <pfalcon@users.sourceforge.net> | 2018-11-30 23:36:49 +0300 |
---|---|---|
committer | Damien George <damien.p.george@gmail.com> | 2019-01-27 10:59:30 +1100 |
commit | b1cca8fbe0f554cd8e497bb30f04e71a611004c9 (patch) | |
tree | 73f2fe4aa7362cd901f58030d5b9ed01eaa2b3a9 /extmod/uzlib/tinflate.c | |
parent | 3e25d611ef3185b68558a20057d50b0d18dc67a0 (diff) |
extmod/uzlib: Update uzlib to v2.9.2.
Major changes include robust parsing of erroneous compressed streams and
updated API.
Diffstat (limited to 'extmod/uzlib/tinflate.c')
-rw-r--r-- | extmod/uzlib/tinflate.c | 172 |
1 files changed, 139 insertions, 33 deletions
diff --git a/extmod/uzlib/tinflate.c b/extmod/uzlib/tinflate.c index 21558af5b..b93bc1fa3 100644 --- a/extmod/uzlib/tinflate.c +++ b/extmod/uzlib/tinflate.c @@ -1,11 +1,11 @@ /* - * tinflate - tiny inflate + * uzlib - tiny deflate/inflate library (deflate, gzip, zlib) * * Copyright (c) 2003 by Joergen Ibsen / Jibz * All Rights Reserved * http://www.ibsensoftware.com/ * - * Copyright (c) 2014-2016 by Paul Sokolovsky + * Copyright (c) 2014-2018 by Paul Sokolovsky * * This software is provided 'as-is', without any express * or implied warranty. In no event will the authors be @@ -35,6 +35,15 @@ #include <assert.h> #include "tinf.h" +#define UZLIB_DUMP_ARRAY(heading, arr, size) \ + { \ + printf("%s", heading); \ + for (int i = 0; i < size; ++i) { \ + printf(" %d", (arr)[i]); \ + } \ + printf("\n"); \ + } + uint32_t tinf_get_le_uint32(TINF_DATA *d); uint32_t tinf_get_be_uint32(TINF_DATA *d); @@ -149,6 +158,13 @@ static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned /* scan symbol lengths, and sum code length counts */ for (i = 0; i < num; ++i) t->table[lengths[i]]++; + #if UZLIB_CONF_DEBUG_LOG >= 2 + UZLIB_DUMP_ARRAY("codelen counts:", t->table, TINF_ARRAY_SIZE(t->table)); + #endif + + /* In the lengths array, 0 means unused code. So, t->table[0] now contains + number of unused codes. But table's purpose is to contain # of codes of + particular length, and there're 0 codes of length 0. */ t->table[0] = 0; /* compute offset table for distribution sort */ @@ -158,6 +174,10 @@ static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned sum += t->table[i]; } + #if UZLIB_CONF_DEBUG_LOG >= 2 + UZLIB_DUMP_ARRAY("codelen offsets:", offs, TINF_ARRAY_SIZE(offs)); + #endif + /* create code->symbol translation table (symbols sorted by code) */ for (i = 0; i < num; ++i) { @@ -171,10 +191,28 @@ static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned unsigned char uzlib_get_byte(TINF_DATA *d) { - if (d->source) { + /* If end of source buffer is not reached, return next byte from source + buffer. */ + if (d->source < d->source_limit) { return *d->source++; } - return d->readSource(d); + + /* Otherwise if there's callback and we haven't seen EOF yet, try to + read next byte using it. (Note: the callback can also update ->source + and ->source_limit). */ + if (d->readSource && !d->eof) { + int val = d->readSource(d); + if (val >= 0) { + return (unsigned char)val; + } + } + + /* Otherwise, we hit EOF (either from ->readSource() or from exhaustion + of the buffer), and it will be "sticky", i.e. further calls to this + function will end up here too. */ + d->eof = true; + + return 0; } uint32_t tinf_get_le_uint32(TINF_DATA *d) @@ -182,7 +220,7 @@ uint32_t tinf_get_le_uint32(TINF_DATA *d) uint32_t val = 0; int i; for (i = 4; i--;) { - val = val >> 8 | uzlib_get_byte(d) << 24; + val = val >> 8 | ((uint32_t)uzlib_get_byte(d)) << 24; } return val; } @@ -245,21 +283,31 @@ static int tinf_decode_symbol(TINF_DATA *d, TINF_TREE *t) cur = 2*cur + tinf_getbit(d); - ++len; + if (++len == TINF_ARRAY_SIZE(t->table)) { + return TINF_DATA_ERROR; + } sum += t->table[len]; cur -= t->table[len]; } while (cur >= 0); - return t->trans[sum + cur]; + sum += cur; + #if UZLIB_CONF_PARANOID_CHECKS + if (sum < 0 || sum >= TINF_ARRAY_SIZE(t->trans)) { + return TINF_DATA_ERROR; + } + #endif + + return t->trans[sum]; } /* given a data stream, decode dynamic trees from it */ -static void tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt) +static int tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt) { + /* code lengths for 288 literal/len symbols and 32 dist symbols */ unsigned char lengths[288+32]; - unsigned int hlit, hdist, hclen; + unsigned int hlit, hdist, hclen, hlimit; unsigned int i, num, length; /* get 5 bits HLIT (257-286) */ @@ -286,53 +334,75 @@ static void tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt) tinf_build_tree(lt, lengths, 19); /* decode code lengths for the dynamic trees */ - for (num = 0; num < hlit + hdist; ) + hlimit = hlit + hdist; + for (num = 0; num < hlimit; ) { int sym = tinf_decode_symbol(d, lt); + unsigned char fill_value = 0; + int lbits, lbase = 3; + + /* error decoding */ + if (sym < 0) return sym; switch (sym) { case 16: /* copy previous code length 3-6 times (read 2 bits) */ - { - unsigned char prev = lengths[num - 1]; - for (length = tinf_read_bits(d, 2, 3); length; --length) - { - lengths[num++] = prev; - } - } + if (num == 0) return TINF_DATA_ERROR; + fill_value = lengths[num - 1]; + lbits = 2; break; case 17: /* repeat code length 0 for 3-10 times (read 3 bits) */ - for (length = tinf_read_bits(d, 3, 3); length; --length) - { - lengths[num++] = 0; - } + lbits = 3; break; case 18: /* repeat code length 0 for 11-138 times (read 7 bits) */ - for (length = tinf_read_bits(d, 7, 11); length; --length) - { - lengths[num++] = 0; - } + lbits = 7; + lbase = 11; break; default: /* values 0-15 represent the actual code lengths */ lengths[num++] = sym; - break; + /* continue the for loop */ + continue; } + + /* special code length 16-18 are handled here */ + length = tinf_read_bits(d, lbits, lbase); + if (num + length > hlimit) return TINF_DATA_ERROR; + for (; length; --length) + { + lengths[num++] = fill_value; + } + } + + #if UZLIB_CONF_DEBUG_LOG >= 2 + printf("lit code lengths (%d):", hlit); + UZLIB_DUMP_ARRAY("", lengths, hlit); + printf("dist code lengths (%d):", hdist); + UZLIB_DUMP_ARRAY("", lengths + hlit, hdist); + #endif + + #if UZLIB_CONF_PARANOID_CHECKS + /* Check that there's "end of block" symbol */ + if (lengths[256] == 0) { + return TINF_DATA_ERROR; } + #endif /* build dynamic trees */ tinf_build_tree(lt, lengths, hlit); tinf_build_tree(dt, lengths + hlit, hdist); + + return TINF_OK; } /* ----------------------------- * * -- block inflate functions -- * * ----------------------------- */ -/* given a stream and two trees, inflate a block of data */ +/* given a stream and two trees, inflate next byte of output */ static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt) { if (d->curlen == 0) { @@ -341,6 +411,10 @@ static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt) int sym = tinf_decode_symbol(d, lt); //printf("huff sym: %02x\n", sym); + if (d->eof) { + return TINF_DATA_ERROR; + } + /* literal byte */ if (sym < 256) { TINF_PUT(d, sym); @@ -354,21 +428,45 @@ static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt) /* substring from sliding dictionary */ sym -= 257; + if (sym >= 29) { + return TINF_DATA_ERROR; + } + /* possibly get more bits from length code */ d->curlen = tinf_read_bits(d, length_bits[sym], length_base[sym]); dist = tinf_decode_symbol(d, dt); + if (dist >= 30) { + return TINF_DATA_ERROR; + } + /* possibly get more bits from distance code */ offs = tinf_read_bits(d, dist_bits[dist], dist_base[dist]); + + /* calculate and validate actual LZ offset to use */ if (d->dict_ring) { if (offs > d->dict_size) { return TINF_DICT_ERROR; } + /* Note: unlike full-dest-in-memory case below, we don't + try to catch offset which points to not yet filled + part of the dictionary here. Doing so would require + keeping another variable to track "filled in" size + of the dictionary. Appearance of such an offset cannot + lead to accessing memory outside of the dictionary + buffer, and clients which don't want to leak unrelated + information, should explicitly initialize dictionary + buffer passed to uzlib. */ + d->lzOff = d->dict_idx - offs; if (d->lzOff < 0) { d->lzOff += d->dict_size; } } else { + /* catch trying to point before the start of dest buffer */ + if (offs > d->dest - d->destStart) { + return TINF_DATA_ERROR; + } d->lzOff = -offs; } } @@ -387,7 +485,7 @@ static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt) return TINF_OK; } -/* inflate an uncompressed block of data */ +/* inflate next byte from uncompressed block of data */ static int tinf_inflate_uncompressed_block(TINF_DATA *d) { if (d->curlen == 0) { @@ -440,6 +538,7 @@ void uzlib_init(void) /* initialize decompression structure */ void uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen) { + d->eof = 0; d->bitcount = 0; d->bfinal = 0; d->btype = -1; @@ -449,7 +548,7 @@ void uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen) d->curlen = 0; } -/* inflate next byte of compressed stream */ +/* inflate next output bytes from compressed stream */ int uzlib_uncompress(TINF_DATA *d) { do { @@ -463,14 +562,19 @@ next_blk: /* read block type (2 bits) */ d->btype = tinf_read_bits(d, 2, 0); - //printf("Started new block: type=%d final=%d\n", d->btype, d->bfinal); + #if UZLIB_CONF_DEBUG_LOG >= 1 + printf("Started new block: type=%d final=%d\n", d->btype, d->bfinal); + #endif if (d->btype == 1) { /* build fixed huffman trees */ tinf_build_fixed_trees(&d->ltree, &d->dtree); } else if (d->btype == 2) { /* decode trees from stream */ - tinf_decode_trees(d, &d->ltree, &d->dtree); + res = tinf_decode_trees(d, &d->ltree, &d->dtree); + if (res != TINF_OK) { + return res; + } } } @@ -483,7 +587,7 @@ next_blk: break; case 1: case 2: - /* decompress block with fixed/dyanamic huffman trees */ + /* decompress block with fixed/dynamic huffman trees */ /* trees were decoded previously, so it's the same routine for both */ res = tinf_inflate_block_data(d, &d->ltree, &d->dtree); break; @@ -501,11 +605,13 @@ next_blk: return res; } - } while (--d->destSize); + } while (d->dest < d->dest_limit); return TINF_OK; } +/* inflate next output bytes from compressed stream, updating + checksum, and at the end of stream, verify it */ int uzlib_uncompress_chksum(TINF_DATA *d) { int res; |