summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Epler <jepler@gmail.com>2021-03-27 19:19:51 -0500
committerDamien George <damien@micropython.org>2021-04-06 13:36:42 +1000
commit172fb5230a3943eeb6fbbb4de1dc56b16e2a7637 (patch)
tree1a5021f7ec290da5ae9b593a8e69b346cf54813a
parentd35f12f5caf2f6eef08670b61dbe2a3a1b387042 (diff)
extmod/re1.5: Check and report byte overflow errors in _compilecode.
The generated regex code is limited in the range of jumps and counts, and this commit checks all cases which can overflow given the right kind of input regex, and returns an error in such a case. This change assumes that the results that overflow an int8_t do not overflow a platform int. Closes: #7078 Signed-off-by: Jeff Epler <jepler@gmail.com>
-rw-r--r--extmod/re1.5/compilecode.c31
-rw-r--r--tests/extmod/ure_limit.py34
-rw-r--r--tests/extmod/ure_limit.py.exp7
3 files changed, 61 insertions, 11 deletions
diff --git a/extmod/re1.5/compilecode.c b/extmod/re1.5/compilecode.c
index c4d12af87..add4f6ac2 100644
--- a/extmod/re1.5/compilecode.c
+++ b/extmod/re1.5/compilecode.c
@@ -8,11 +8,20 @@
((code ? memmove(code + at + num, code + at, pc - at) : 0), pc += num)
#define REL(at, to) (to - at - 2)
#define EMIT(at, byte) (code ? (code[at] = byte) : (at))
+#define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err))
#define PC (prog->bytelen)
+static void _emit_checked(int at, char *code, int val, bool *err) {
+ *err |= val != (int8_t)val;
+ if (code) {
+ code[at] = val;
+ }
+}
+
static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
{
char *code = sizecode ? NULL : prog->insts;
+ bool err = false;
int start = PC;
int term = PC;
int alt_label = 0;
@@ -64,7 +73,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
}
EMIT(PC++, *re);
}
- EMIT(term + 1, cnt);
+ EMIT_CHECKED(term + 1, cnt);
break;
}
case '(': {
@@ -75,7 +84,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
if (capture) {
sub = ++prog->sub;
EMIT(PC++, Save);
- EMIT(PC++, 2 * sub);
+ EMIT_CHECKED(PC++, 2 * sub);
prog->len++;
} else {
re += 2;
@@ -86,7 +95,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
if (capture) {
EMIT(PC++, Save);
- EMIT(PC++, 2 * sub + 1);
+ EMIT_CHECKED(PC++, 2 * sub + 1);
prog->len++;
}
@@ -101,7 +110,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
} else {
EMIT(term, Split);
}
- EMIT(term + 1, REL(term, PC));
+ EMIT_CHECKED(term + 1, REL(term, PC));
prog->len++;
term = PC;
break;
@@ -109,7 +118,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
if (PC == term) return NULL; // nothing to repeat
INSERT_CODE(term, 2, PC);
EMIT(PC, Jmp);
- EMIT(PC + 1, REL(PC, term));
+ EMIT_CHECKED(PC + 1, REL(PC, term));
PC += 2;
if (re[1] == '?') {
EMIT(term, RSplit);
@@ -117,7 +126,7 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
} else {
EMIT(term, Split);
}
- EMIT(term + 1, REL(term, PC));
+ EMIT_CHECKED(term + 1, REL(term, PC));
prog->len += 2;
term = PC;
break;
@@ -129,20 +138,20 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
} else {
EMIT(PC, RSplit);
}
- EMIT(PC + 1, REL(PC, term));
+ EMIT_CHECKED(PC + 1, REL(PC, term));
PC += 2;
prog->len++;
term = PC;
break;
case '|':
if (alt_label) {
- EMIT(alt_label, REL(alt_label, PC) + 1);
+ EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
}
INSERT_CODE(start, 2, PC);
EMIT(PC++, Jmp);
alt_label = PC++;
EMIT(start, Split);
- EMIT(start + 1, REL(start, PC));
+ EMIT_CHECKED(start + 1, REL(start, PC));
prog->len += 2;
term = PC;
break;
@@ -160,9 +169,9 @@ static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
}
if (alt_label) {
- EMIT(alt_label, REL(alt_label, PC) + 1);
+ EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
}
- return re;
+ return err ? NULL : re;
}
int re1_5_sizecode(const char *re)
diff --git a/tests/extmod/ure_limit.py b/tests/extmod/ure_limit.py
new file mode 100644
index 000000000..99c6a818e
--- /dev/null
+++ b/tests/extmod/ure_limit.py
@@ -0,0 +1,34 @@
+# Test overflow in ure.compile output code.
+
+try:
+ import ure as re
+except ImportError:
+ print("SKIP")
+ raise SystemExit
+
+
+def test_re(r):
+ try:
+ re.compile(r)
+ except:
+ print("Error")
+
+
+# too many chars in []
+test_re("[" + "a" * 256 + "]")
+
+# too many groups
+test_re("(a)" * 256)
+
+# jump too big for ?
+test_re("(" + "a" * 62 + ")?")
+
+# jump too big for *
+test_re("(" + "a" * 60 + ".)*")
+test_re("(" + "a" * 60 + "..)*")
+
+# jump too big for +
+test_re("(" + "a" * 62 + ")+")
+
+# jump too big for |
+test_re("b" * 63 + "|a")
diff --git a/tests/extmod/ure_limit.py.exp b/tests/extmod/ure_limit.py.exp
new file mode 100644
index 000000000..8353be536
--- /dev/null
+++ b/tests/extmod/ure_limit.py.exp
@@ -0,0 +1,7 @@
+Error
+Error
+Error
+Error
+Error
+Error
+Error