diff options
author | Damien George <damien@micropython.org> | 2021-07-12 16:33:19 +1000 |
---|---|---|
committer | Damien George <damien@micropython.org> | 2021-07-12 16:36:26 +1000 |
commit | d1d172f536247df4716b76b105a875477ea75d65 (patch) | |
tree | 9a3ce8a5f35dd3e7799cdd833bf63710659767c9 /extmod/re1.5/compilecode.c | |
parent | e2f0b181f94c7f0843102b02912680d916b0b7d0 (diff) |
lib/re1.5: Move re1.5 code from extmod to lib.
It's third-party code, and not necessarily tied to extmod.
Signed-off-by: Damien George <damien@micropython.org>
Diffstat (limited to 'extmod/re1.5/compilecode.c')
-rw-r--r-- | extmod/re1.5/compilecode.c | 229 |
1 files changed, 0 insertions, 229 deletions
diff --git a/extmod/re1.5/compilecode.c b/extmod/re1.5/compilecode.c deleted file mode 100644 index add4f6ac2..000000000 --- a/extmod/re1.5/compilecode.c +++ /dev/null @@ -1,229 +0,0 @@ -// Copyright 2014 Paul Sokolovsky. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "re1.5.h" - -#define INSERT_CODE(at, num, pc) \ - ((code ? memmove(code + at + num, code + at, pc - at) : 0), pc += num) -#define REL(at, to) (to - at - 2) -#define EMIT(at, byte) (code ? (code[at] = byte) : (at)) -#define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err)) -#define PC (prog->bytelen) - -static void _emit_checked(int at, char *code, int val, bool *err) { - *err |= val != (int8_t)val; - if (code) { - code[at] = val; - } -} - -static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) -{ - char *code = sizecode ? NULL : prog->insts; - bool err = false; - int start = PC; - int term = PC; - int alt_label = 0; - - for (; *re && *re != ')'; re++) { - switch (*re) { - case '\\': - re++; - if (!*re) return NULL; // Trailing backslash - if ((*re | 0x20) == 'd' || (*re | 0x20) == 's' || (*re | 0x20) == 'w') { - term = PC; - EMIT(PC++, NamedClass); - EMIT(PC++, *re); - prog->len++; - break; - } - MP_FALLTHROUGH - default: - term = PC; - EMIT(PC++, Char); - EMIT(PC++, *re); - prog->len++; - break; - case '.': - term = PC; - EMIT(PC++, Any); - prog->len++; - break; - case '[': { - int cnt; - term = PC; - re++; - if (*re == '^') { - EMIT(PC++, ClassNot); - re++; - } else { - EMIT(PC++, Class); - } - PC++; // Skip # of pair byte - prog->len++; - for (cnt = 0; *re != ']'; re++, cnt++) { - if (*re == '\\') { - ++re; - } - if (!*re) return NULL; - EMIT(PC++, *re); - if (re[1] == '-' && re[2] != ']') { - re += 2; - } - EMIT(PC++, *re); - } - EMIT_CHECKED(term + 1, cnt); - break; - } - case '(': { - term = PC; - int sub = 0; - int capture = re[1] != '?' || re[2] != ':'; - - if (capture) { - sub = ++prog->sub; - EMIT(PC++, Save); - EMIT_CHECKED(PC++, 2 * sub); - prog->len++; - } else { - re += 2; - } - - re = _compilecode(re + 1, prog, sizecode); - if (re == NULL || *re != ')') return NULL; // error, or no matching paren - - if (capture) { - EMIT(PC++, Save); - EMIT_CHECKED(PC++, 2 * sub + 1); - prog->len++; - } - - break; - } - case '?': - if (PC == term) return NULL; // nothing to repeat - INSERT_CODE(term, 2, PC); - if (re[1] == '?') { - EMIT(term, RSplit); - re++; - } else { - EMIT(term, Split); - } - EMIT_CHECKED(term + 1, REL(term, PC)); - prog->len++; - term = PC; - break; - case '*': - if (PC == term) return NULL; // nothing to repeat - INSERT_CODE(term, 2, PC); - EMIT(PC, Jmp); - EMIT_CHECKED(PC + 1, REL(PC, term)); - PC += 2; - if (re[1] == '?') { - EMIT(term, RSplit); - re++; - } else { - EMIT(term, Split); - } - EMIT_CHECKED(term + 1, REL(term, PC)); - prog->len += 2; - term = PC; - break; - case '+': - if (PC == term) return NULL; // nothing to repeat - if (re[1] == '?') { - EMIT(PC, Split); - re++; - } else { - EMIT(PC, RSplit); - } - EMIT_CHECKED(PC + 1, REL(PC, term)); - PC += 2; - prog->len++; - term = PC; - break; - case '|': - if (alt_label) { - EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1); - } - INSERT_CODE(start, 2, PC); - EMIT(PC++, Jmp); - alt_label = PC++; - EMIT(start, Split); - EMIT_CHECKED(start + 1, REL(start, PC)); - prog->len += 2; - term = PC; - break; - case '^': - EMIT(PC++, Bol); - prog->len++; - term = PC; - break; - case '$': - EMIT(PC++, Eol); - prog->len++; - term = PC; - break; - } - } - - if (alt_label) { - EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1); - } - return err ? NULL : re; -} - -int re1_5_sizecode(const char *re) -{ - ByteProg dummyprog = { - // Save 0, Save 1, Match; more bytes for "search" (vs "match") prefix code - .bytelen = 5 + NON_ANCHORED_PREFIX - }; - - if (_compilecode(re, &dummyprog, /*sizecode*/1) == NULL) return -1; - - return dummyprog.bytelen; -} - -int re1_5_compilecode(ByteProg *prog, const char *re) -{ - prog->len = 0; - prog->bytelen = 0; - prog->sub = 0; - - // Add code to implement non-anchored operation ("search"), - // for anchored operation ("match"), this code will be just skipped. - // TODO: Implement search in much more efficient manner - prog->insts[prog->bytelen++] = RSplit; - prog->insts[prog->bytelen++] = 3; - prog->insts[prog->bytelen++] = Any; - prog->insts[prog->bytelen++] = Jmp; - prog->insts[prog->bytelen++] = -5; - prog->len += 3; - - prog->insts[prog->bytelen++] = Save; - prog->insts[prog->bytelen++] = 0; - prog->len++; - - re = _compilecode(re, prog, /*sizecode*/0); - if (re == NULL || *re) return 1; - - prog->insts[prog->bytelen++] = Save; - prog->insts[prog->bytelen++] = 1; - prog->len++; - - prog->insts[prog->bytelen++] = Match; - prog->len++; - - return 0; -} - -#if 0 -int main(int argc, char *argv[]) -{ - int pc = 0; - ByteProg *code = re1_5_compilecode(argv[1]); - re1_5_dumpcode(code); -} -#endif |