1 files changed, 75 insertions, 0 deletions
diff --git a/py/qstr.c b/py/qstr.c
index c3d78bfda..2b1cea405 100644
--- a/py/qstr.c
+++ b/py/qstr.c
@@ -310,3 +310,78 @@ void qstr_dump_data(void) {
     QSTR_EXIT();
 }
 #endif
+
+#if MICROPY_ROM_TEXT_COMPRESSION
+
+#ifdef NO_QSTR
+
+// If NO_QSTR is set, it means we're doing QSTR extraction.
+// So we won't yet have "genhdr/compressed.data.h"
+
+#else
+
+// Emit the compressed_string_data string.
+#define MP_COMPRESSED_DATA(x) STATIC const char *compressed_string_data = x;
+#define MP_MATCH_COMPRESSED(a, b)
+#include "genhdr/compressed.data.h"
+#undef MP_COMPRESSED_DATA
+#undef MP_MATCH_COMPRESSED
+
+#endif // NO_QSTR
+
+// This implements the "common word" compression scheme (see makecompresseddata.py) where the most
+// common 128 words in error messages are replaced by their index into the list of common words.
+
+// The compressed string data is delimited by setting high bit in the final char of each word.
+// e.g. aaaa<0x80|a>bbbbbb<0x80|b>....
+// This method finds the n'th string.
+STATIC const byte *find_uncompressed_string(uint8_t n) {
+    const byte *c = (byte *)compressed_string_data;
+    while (n > 0) {
+        while ((*c & 0x80) == 0) {
+            ++c;
+        }
+        ++c;
+        --n;
+    }
+    return c;
+}
+
+// Given a compressed string in src, decompresses it into dst.
+// dst must be large enough (use MP_MAX_UNCOMPRESSED_TEXT_LEN+1).
+void mp_decompress_rom_string(byte *dst, const mp_rom_error_text_t src_chr) {
+    // Skip past the 0xff marker.
+    const byte *src = (byte *)src_chr + 1;
+    // Need to add spaces around compressed words, except for the first (i.e. transition from 1<->2).
+    // 0 = start, 1 = compressed, 2 = regular.
+    int state = 0;
+    while (*src) {
+        if ((byte) * src >= 128) {
+            if (state != 0) {
+                *dst++ = ' ';
+            }
+            state = 1;
+
+            // High bit set, replace with common word.
+            const byte *word = find_uncompressed_string(*src & 0x7f);
+            // The word is terminated by the final char having its high bit set.
+            while ((*word & 0x80) == 0) {
+                *dst++ = *word++;
+            }
+            *dst++ = (*word & 0x7f);
+        } else {
+            // Otherwise just copy one char.
+            if (state == 1) {
+                *dst++ = ' ';
+            }
+            state = 2;
+
+            *dst++ = *src;
+        }
+        ++src;
+    }
+    // Add null-terminator.
+    *dst = 0;
+}
+
+#endif // MICROPY_ROM_TEXT_COMPRESSION