diff options
Diffstat (limited to 'extmod')
| -rw-r--r-- | extmod/re1.5/charclass.c | 33 | ||||
| -rw-r--r-- | extmod/re1.5/compilecode.c | 229 | ||||
| -rw-r--r-- | extmod/re1.5/dumpcode.c | 65 | ||||
| -rw-r--r-- | extmod/re1.5/re1.5.h | 154 | ||||
| -rw-r--r-- | extmod/re1.5/recursiveloop.c | 87 |
5 files changed, 0 insertions, 568 deletions
diff --git a/extmod/re1.5/charclass.c b/extmod/re1.5/charclass.c deleted file mode 100644 index 7f6388c93..000000000 --- a/extmod/re1.5/charclass.c +++ /dev/null @@ -1,33 +0,0 @@ -#include "re1.5.h" - -int _re1_5_classmatch(const char *pc, const char *sp) -{ - // pc points to "cnt" byte after opcode - int is_positive = (pc[-1] == Class); - int cnt = *pc++; - while (cnt--) { - if (*sp >= *pc && *sp <= pc[1]) return is_positive; - pc += 2; - } - return !is_positive; -} - -int _re1_5_namedclassmatch(const char *pc, const char *sp) -{ - // pc points to name of class - int off = (*pc >> 5) & 1; - if ((*pc | 0x20) == 'd') { - if (!(*sp >= '0' && *sp <= '9')) { - off ^= 1; - } - } else if ((*pc | 0x20) == 's') { - if (!(*sp == ' ' || (*sp >= '\t' && *sp <= '\r'))) { - off ^= 1; - } - } else { // w - if (!((*sp >= 'A' && *sp <= 'Z') || (*sp >= 'a' && *sp <= 'z') || (*sp >= '0' && *sp <= '9') || *sp == '_')) { - off ^= 1; - } - } - return off; -} diff --git a/extmod/re1.5/compilecode.c b/extmod/re1.5/compilecode.c deleted file mode 100644 index add4f6ac2..000000000 --- a/extmod/re1.5/compilecode.c +++ /dev/null @@ -1,229 +0,0 @@ -// Copyright 2014 Paul Sokolovsky. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "re1.5.h" - -#define INSERT_CODE(at, num, pc) \ - ((code ? memmove(code + at + num, code + at, pc - at) : 0), pc += num) -#define REL(at, to) (to - at - 2) -#define EMIT(at, byte) (code ? (code[at] = byte) : (at)) -#define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err)) -#define PC (prog->bytelen) - -static void _emit_checked(int at, char *code, int val, bool *err) { - *err |= val != (int8_t)val; - if (code) { - code[at] = val; - } -} - -static const char *_compilecode(const char *re, ByteProg *prog, int sizecode) -{ - char *code = sizecode ? NULL : prog->insts; - bool err = false; - int start = PC; - int term = PC; - int alt_label = 0; - - for (; *re && *re != ')'; re++) { - switch (*re) { - case '\\': - re++; - if (!*re) return NULL; // Trailing backslash - if ((*re | 0x20) == 'd' || (*re | 0x20) == 's' || (*re | 0x20) == 'w') { - term = PC; - EMIT(PC++, NamedClass); - EMIT(PC++, *re); - prog->len++; - break; - } - MP_FALLTHROUGH - default: - term = PC; - EMIT(PC++, Char); - EMIT(PC++, *re); - prog->len++; - break; - case '.': - term = PC; - EMIT(PC++, Any); - prog->len++; - break; - case '[': { - int cnt; - term = PC; - re++; - if (*re == '^') { - EMIT(PC++, ClassNot); - re++; - } else { - EMIT(PC++, Class); - } - PC++; // Skip # of pair byte - prog->len++; - for (cnt = 0; *re != ']'; re++, cnt++) { - if (*re == '\\') { - ++re; - } - if (!*re) return NULL; - EMIT(PC++, *re); - if (re[1] == '-' && re[2] != ']') { - re += 2; - } - EMIT(PC++, *re); - } - EMIT_CHECKED(term + 1, cnt); - break; - } - case '(': { - term = PC; - int sub = 0; - int capture = re[1] != '?' || re[2] != ':'; - - if (capture) { - sub = ++prog->sub; - EMIT(PC++, Save); - EMIT_CHECKED(PC++, 2 * sub); - prog->len++; - } else { - re += 2; - } - - re = _compilecode(re + 1, prog, sizecode); - if (re == NULL || *re != ')') return NULL; // error, or no matching paren - - if (capture) { - EMIT(PC++, Save); - EMIT_CHECKED(PC++, 2 * sub + 1); - prog->len++; - } - - break; - } - case '?': - if (PC == term) return NULL; // nothing to repeat - INSERT_CODE(term, 2, PC); - if (re[1] == '?') { - EMIT(term, RSplit); - re++; - } else { - EMIT(term, Split); - } - EMIT_CHECKED(term + 1, REL(term, PC)); - prog->len++; - term = PC; - break; - case '*': - if (PC == term) return NULL; // nothing to repeat - INSERT_CODE(term, 2, PC); - EMIT(PC, Jmp); - EMIT_CHECKED(PC + 1, REL(PC, term)); - PC += 2; - if (re[1] == '?') { - EMIT(term, RSplit); - re++; - } else { - EMIT(term, Split); - } - EMIT_CHECKED(term + 1, REL(term, PC)); - prog->len += 2; - term = PC; - break; - case '+': - if (PC == term) return NULL; // nothing to repeat - if (re[1] == '?') { - EMIT(PC, Split); - re++; - } else { - EMIT(PC, RSplit); - } - EMIT_CHECKED(PC + 1, REL(PC, term)); - PC += 2; - prog->len++; - term = PC; - break; - case '|': - if (alt_label) { - EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1); - } - INSERT_CODE(start, 2, PC); - EMIT(PC++, Jmp); - alt_label = PC++; - EMIT(start, Split); - EMIT_CHECKED(start + 1, REL(start, PC)); - prog->len += 2; - term = PC; - break; - case '^': - EMIT(PC++, Bol); - prog->len++; - term = PC; - break; - case '$': - EMIT(PC++, Eol); - prog->len++; - term = PC; - break; - } - } - - if (alt_label) { - EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1); - } - return err ? NULL : re; -} - -int re1_5_sizecode(const char *re) -{ - ByteProg dummyprog = { - // Save 0, Save 1, Match; more bytes for "search" (vs "match") prefix code - .bytelen = 5 + NON_ANCHORED_PREFIX - }; - - if (_compilecode(re, &dummyprog, /*sizecode*/1) == NULL) return -1; - - return dummyprog.bytelen; -} - -int re1_5_compilecode(ByteProg *prog, const char *re) -{ - prog->len = 0; - prog->bytelen = 0; - prog->sub = 0; - - // Add code to implement non-anchored operation ("search"), - // for anchored operation ("match"), this code will be just skipped. - // TODO: Implement search in much more efficient manner - prog->insts[prog->bytelen++] = RSplit; - prog->insts[prog->bytelen++] = 3; - prog->insts[prog->bytelen++] = Any; - prog->insts[prog->bytelen++] = Jmp; - prog->insts[prog->bytelen++] = -5; - prog->len += 3; - - prog->insts[prog->bytelen++] = Save; - prog->insts[prog->bytelen++] = 0; - prog->len++; - - re = _compilecode(re, prog, /*sizecode*/0); - if (re == NULL || *re) return 1; - - prog->insts[prog->bytelen++] = Save; - prog->insts[prog->bytelen++] = 1; - prog->len++; - - prog->insts[prog->bytelen++] = Match; - prog->len++; - - return 0; -} - -#if 0 -int main(int argc, char *argv[]) -{ - int pc = 0; - ByteProg *code = re1_5_compilecode(argv[1]); - re1_5_dumpcode(code); -} -#endif diff --git a/extmod/re1.5/dumpcode.c b/extmod/re1.5/dumpcode.c deleted file mode 100644 index d7781d849..000000000 --- a/extmod/re1.5/dumpcode.c +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2014 Paul Sokolovsky. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "re1.5.h" - -void re1_5_dumpcode(ByteProg *prog) -{ - int pc = 0; - char *code = prog->insts; - while (pc < prog->bytelen) { - printf("%2d: ", pc); - switch(code[pc++]) { - default: - assert(0); -// re1_5_fatal("printprog"); - case Split: - printf("split %d (%d)\n", pc + (signed char)code[pc] + 1, (signed char)code[pc]); - pc++; - break; - case RSplit: - printf("rsplit %d (%d)\n", pc + (signed char)code[pc] + 1, (signed char)code[pc]); - pc++; - break; - case Jmp: - printf("jmp %d (%d)\n", pc + (signed char)code[pc] + 1, (signed char)code[pc]); - pc++; - break; - case Char: - printf("char %c\n", code[pc++]); - break; - case Any: - printf("any\n"); - break; - case Class: - case ClassNot: { - int num = code[pc]; - printf("class%s %d", (code[pc - 1] == ClassNot ? "not" : ""), num); - pc++; - while (num--) { - printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]); - pc += 2; - } - printf("\n"); - break; - } - case NamedClass: - printf("namedclass %c\n", code[pc++]); - break; - case Match: - printf("match\n"); - break; - case Save: - printf("save %d\n", (unsigned char)code[pc++]); - break; - case Bol: - printf("assert bol\n"); - break; - case Eol: - printf("assert eol\n"); - break; - } - } - printf("Bytes: %d, insts: %d\n", prog->bytelen, prog->len); -} diff --git a/extmod/re1.5/re1.5.h b/extmod/re1.5/re1.5.h deleted file mode 100644 index ba6f97b74..000000000 --- a/extmod/re1.5/re1.5.h +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright 2007-2009 Russ Cox. All Rights Reserved. -// Copyright 2014 Paul Sokolovsky. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#ifndef _RE1_5_REGEXP__H -#define _RE1_5_REGEXP__H - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#include <assert.h> - -#define nil ((void*)0) -#define nelem(x) (sizeof(x)/sizeof((x)[0])) - -typedef struct Regexp Regexp; -typedef struct Prog Prog; -typedef struct ByteProg ByteProg; -typedef struct Inst Inst; -typedef struct Subject Subject; - -struct Regexp -{ - int type; - int n; - int ch; - Regexp *left; - Regexp *right; -}; - -enum /* Regexp.type */ -{ - Alt = 1, - Cat, - Lit, - Dot, - Paren, - Quest, - Star, - Plus, -}; - -Regexp *parse(char*); -Regexp *reg(int type, Regexp *left, Regexp *right); -void printre(Regexp*); -#ifndef re1_5_fatal -void re1_5_fatal(char*); -#endif -#ifndef re1_5_stack_chk -#define re1_5_stack_chk() -#endif -void *mal(int); - -struct Prog -{ - Inst *start; - int len; -}; - -struct ByteProg -{ - int bytelen; - int len; - int sub; - char insts[0]; -}; - -struct Inst -{ - int opcode; - int c; - int n; - Inst *x; - Inst *y; - int gen; // global state, oooh! -}; - -enum /* Inst.opcode */ -{ - // Instructions which consume input bytes (and thus fail if none left) - CONSUMERS = 1, - Char = CONSUMERS, - Any, - Class, - ClassNot, - NamedClass, - - ASSERTS = 0x50, - Bol = ASSERTS, - Eol, - - // Instructions which take relative offset as arg - JUMPS = 0x60, - Jmp = JUMPS, - Split, - RSplit, - - // Other (special) instructions - Save = 0x7e, - Match = 0x7f, -}; - -#define inst_is_consumer(inst) ((inst) < ASSERTS) -#define inst_is_jump(inst) ((inst) & 0x70 == JUMPS) - -Prog *compile(Regexp*); -void printprog(Prog*); - -extern int gen; - -enum { - MAXSUB = 20 -}; - -typedef struct Sub Sub; - -struct Sub -{ - int ref; - int nsub; - const char *sub[MAXSUB]; -}; - -Sub *newsub(int n); -Sub *incref(Sub*); -Sub *copy(Sub*); -Sub *update(Sub*, int, const char*); -void decref(Sub*); - -struct Subject { - const char *begin; - const char *end; -}; - - -#define NON_ANCHORED_PREFIX 5 -#define HANDLE_ANCHORED(bytecode, is_anchored) ((is_anchored) ? (bytecode) + NON_ANCHORED_PREFIX : (bytecode)) - -int re1_5_backtrack(ByteProg*, Subject*, const char**, int, int); -int re1_5_pikevm(ByteProg*, Subject*, const char**, int, int); -int re1_5_recursiveloopprog(ByteProg*, Subject*, const char**, int, int); -int re1_5_recursiveprog(ByteProg*, Subject*, const char**, int, int); -int re1_5_thompsonvm(ByteProg*, Subject*, const char**, int, int); - -int re1_5_sizecode(const char *re); -int re1_5_compilecode(ByteProg *prog, const char *re); -void re1_5_dumpcode(ByteProg *prog); -void cleanmarks(ByteProg *prog); -int _re1_5_classmatch(const char *pc, const char *sp); -int _re1_5_namedclassmatch(const char *pc, const char *sp); - -#endif /*_RE1_5_REGEXP__H*/ diff --git a/extmod/re1.5/recursiveloop.c b/extmod/re1.5/recursiveloop.c deleted file mode 100644 index f8cb92629..000000000 --- a/extmod/re1.5/recursiveloop.c +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2007-2009 Russ Cox. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "re1.5.h" - -static int -recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp) -{ - const char *old; - int off; - - re1_5_stack_chk(); - - for(;;) { - if(inst_is_consumer(*pc)) { - // If we need to match a character, but there's none left, it's fail - if(sp >= input->end) - return 0; - } - switch(*pc++) { - case Char: - if(*sp != *pc++) - return 0; - MP_FALLTHROUGH - case Any: - sp++; - continue; - case Class: - case ClassNot: - if (!_re1_5_classmatch(pc, sp)) - return 0; - pc += *(unsigned char*)pc * 2 + 1; - sp++; - continue; - case NamedClass: - if (!_re1_5_namedclassmatch(pc, sp)) - return 0; - pc++; - sp++; - continue; - case Match: - return 1; - case Jmp: - off = (signed char)*pc++; - pc = pc + off; - continue; - case Split: - off = (signed char)*pc++; - if(recursiveloop(pc, sp, input, subp, nsubp)) - return 1; - pc = pc + off; - continue; - case RSplit: - off = (signed char)*pc++; - if(recursiveloop(pc + off, sp, input, subp, nsubp)) - return 1; - continue; - case Save: - off = (unsigned char)*pc++; - if(off >= nsubp) { - continue; - } - old = subp[off]; - subp[off] = sp; - if(recursiveloop(pc, sp, input, subp, nsubp)) - return 1; - subp[off] = old; - return 0; - case Bol: - if(sp != input->begin) - return 0; - continue; - case Eol: - if(sp != input->end) - return 0; - continue; - } - re1_5_fatal("recursiveloop"); - } -} - -int -re1_5_recursiveloopprog(ByteProg *prog, Subject *input, const char **subp, int nsubp, int is_anchored) -{ - return recursiveloop(HANDLE_ANCHORED(prog->insts, is_anchored), input->begin, input, subp, nsubp); -} |
