summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/library/deflate.rst93
-rw-r--r--extmod/moddeflate.c31
-rw-r--r--lib/uzlib/header.c4
3 files changed, 74 insertions, 54 deletions
diff --git a/docs/library/deflate.rst b/docs/library/deflate.rst
index 9752af592..fad577870 100644
--- a/docs/library/deflate.rst
+++ b/docs/library/deflate.rst
@@ -41,9 +41,15 @@ Classes
to 1024 bytes. Valid values are ``5`` to ``15`` inclusive (corresponding to
window sizes of 32 to 32k bytes).
- If *wbits* is set to ``0`` (the default), then a window size of 256 bytes
- will be used (corresponding to *wbits* set to ``8``), except when
- :ref:`decompressing a zlib stream <deflate_wbits_zlib>`.
+ If *wbits* is set to ``0`` (the default), then for compression a window size
+ of 256 bytes will be used (as if *wbits* was set to 8). For decompression, it
+ depends on the format:
+
+ * ``RAW`` will use 256 bytes (corresponding to *wbits* set to 8).
+ * ``ZLIB`` (or ``AUTO`` with zlib detected) will use the value from the zlib
+ header.
+ * ``GZIP`` (or ``AUTO`` with gzip detected) will use 32 kilobytes
+ (corresponding to *wbits* set to 15).
See the :ref:`window size <deflate_wbits>` notes below for more information
about the window size, zlib, and gzip streams.
@@ -134,44 +140,43 @@ Deflate window size
-------------------
The window size limits how far back in the stream the (de)compressor can
-reference. Increasing the window size will improve compression, but will
-require more memory.
-
-However, just because a given window size is used for compression, this does not
-mean that the stream will require the same size window for decompression, as
-the stream may not reference data as far back as the window allows (for example,
-if the length of the input is smaller than the window size).
-
-If the decompressor uses a smaller window size than necessary for the input data
-stream, it will fail mid-way through decompression with :exc:`OSError`.
-
-.. _deflate_wbits_zlib:
-
-The zlib format includes a header which specifies the window size used to
-compress the data (which due to the above, may be larger than the size required
-for the decompressor).
-
-If this header value is lower than the specified *wbits* value, then the header
-value will be used instead in order to reduce the memory allocation size. If
-the *wbits* parameter is zero (the default), then the header value will only be
-used if it is less than the maximum value of ``15`` (which is default value
-used by most compressors [#f1]_).
-
-In other words, if the source zlib stream has been compressed with a custom window
-size (i.e. less than ``15``), then using the default *wbits* parameter of zero
-will decompress any such stream.
-
-The gzip file format does not include the window size in the header.
-Additionally, most compressor libraries (including CPython's implementation
-of :class:`gzip.GzipFile`) will default to the maximum possible window size.
-This makes it difficult to decompress most gzip streams on MicroPython unless
-your board has a lot of free RAM.
-
-If you control the source of the compressed data, then prefer to use the zlib
-format, with a window size that is suitable for your target device.
-
-.. rubric:: Footnotes
-
-.. [#f1] The assumption here is that if the header value is the default used by
- most compressors, then nothing is known about the likely required window
- size and we should ignore it.
+reference. Increasing the window size will improve compression, but will require
+more memory and make the compressor slower.
+
+If an input stream was compressed a given window size, then `DeflateIO`
+using a smaller window size will fail mid-way during decompression with
+:exc:`OSError`, but only if a back-reference actually refers back further
+than the decompressor's window size. This means it may be possible to decompress
+with a smaller window size. For example, this would trivially be the case if the
+original uncompressed data is shorter than the window size.
+
+Decompression
+~~~~~~~~~~~~~
+
+The zlib format includes a header which specifies the window size that was used
+to compress the data. This indicates the maximum window size required to
+decompress this stream. If this header value is less than the specified *wbits*
+value (or if *wbits* is unset), then the header value will be used.
+
+The gzip format does not include the window size in the header, and assumes that
+all gzip compressors (e.g. the ``gzip`` utility, or CPython's implementation of
+:class:`gzip.GzipFile`) use the maximum window size of 32kiB. For this reason,
+if the *wbits* parameter is not set, the decompressor will use a 32 kiB window
+size (corresponding to *wbits* set to 15). This means that to be able to
+decompress an arbitrary gzip stream, you must have at least this much RAM
+available. If you control the source data, consider instead using the zlib
+format with a smaller window size.
+
+The raw format has no header and therefore does not include any information
+about the window size. If *wbits* is not set, then it will default to a window
+size of 256 bytes, which may not be large enough for a given stream. Therefore
+it is recommended that you should always explicitly set *wbits* if using the raw
+format.
+
+Compression
+~~~~~~~~~~~
+
+For compression, MicroPython will default to a window size of 256 bytes for all
+formats. This provides a reasonable amount of compression with minimal memory
+usage and fast compression time, and will generate output that will work with
+any decompressor.
diff --git a/extmod/moddeflate.c b/extmod/moddeflate.c
index 1d8a8acf7..560ee3f0a 100644
--- a/extmod/moddeflate.c
+++ b/extmod/moddeflate.c
@@ -54,6 +54,8 @@ typedef enum {
DEFLATEIO_FORMAT_MAX = DEFLATEIO_FORMAT_GZIP,
} deflateio_format_t;
+// This is used when the wbits is unset in the DeflateIO constructor. Default
+// to the smallest window size (faster compression, less RAM usage, etc).
const int DEFLATEIO_DEFAULT_WBITS = 8;
typedef struct {
@@ -114,24 +116,32 @@ STATIC bool deflateio_init_read(mp_obj_deflateio_t *self) {
// Don't modify self->window_bits as it may also be used for write.
int wbits = self->window_bits;
- // Parse the header if we're in NONE/ZLIB/GZIP modes.
- if (self->format != DEFLATEIO_FORMAT_RAW) {
- int header_wbits = wbits;
+ if (self->format == DEFLATEIO_FORMAT_RAW) {
+ if (wbits == 0) {
+ // The docs recommends always setting wbits explicitly when using
+ // RAW, but we still allow a default.
+ wbits = DEFLATEIO_DEFAULT_WBITS;
+ }
+ } else {
+ // Parse the header if we're in NONE/ZLIB/GZIP modes.
+ int header_wbits;
int header_type = uzlib_parse_zlib_gzip_header(&self->read->decomp, &header_wbits);
+ if (header_type < 0) {
+ // Stream header was invalid.
+ return false;
+ }
if ((self->format == DEFLATEIO_FORMAT_ZLIB && header_type != UZLIB_HEADER_ZLIB) || (self->format == DEFLATEIO_FORMAT_GZIP && header_type != UZLIB_HEADER_GZIP)) {
+ // Not what we expected.
return false;
}
- if (wbits == 0 && header_wbits < 15) {
- // If the header specified something lower than the default, then
- // use that instead.
+ // header_wbits will either be 15 (gzip) or 8-15 (zlib).
+ if (wbits == 0 || header_wbits < wbits) {
+ // If the header specified something lower, then use that instead.
+ // No point doing a bigger allocation than we need to.
wbits = header_wbits;
}
}
- if (wbits == 0) {
- wbits = DEFLATEIO_DEFAULT_WBITS;
- }
-
size_t window_len = 1 << wbits;
self->read->window = m_new(uint8_t, window_len);
@@ -163,6 +173,7 @@ STATIC bool deflateio_init_write(mp_obj_deflateio_t *self) {
int wbits = self->window_bits;
if (wbits == 0) {
+ // Same default wbits for all formats.
wbits = DEFLATEIO_DEFAULT_WBITS;
}
size_t window_len = 1 << wbits;
diff --git a/lib/uzlib/header.c b/lib/uzlib/header.c
index edd2b08ab..9c48d9139 100644
--- a/lib/uzlib/header.c
+++ b/lib/uzlib/header.c
@@ -108,6 +108,10 @@ int uzlib_parse_zlib_gzip_header(uzlib_uncomp_t *d, int *wbits)
d->checksum_type = UZLIB_CHKSUM_CRC;
d->checksum = ~0;
+ /* gzip does not include the window size in the header, as it is expected that a
+ compressor will use wbits=15 (32kiB).*/
+ *wbits = 15;
+
return UZLIB_HEADER_GZIP;
} else {
/* check checksum */