summaryrefslogtreecommitdiff
path: root/extmod
diff options
context:
space:
mode:
Diffstat (limited to 'extmod')
-rw-r--r--extmod/re1.5/charclass.c33
-rw-r--r--extmod/re1.5/compilecode.c229
-rw-r--r--extmod/re1.5/dumpcode.c65
-rw-r--r--extmod/re1.5/re1.5.h154
-rw-r--r--extmod/re1.5/recursiveloop.c87
5 files changed, 0 insertions, 568 deletions
diff --git a/extmod/re1.5/charclass.c b/extmod/re1.5/charclass.c
deleted file mode 100644
index 7f6388c93..000000000
--- a/extmod/re1.5/charclass.c
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "re1.5.h"
-
-int _re1_5_classmatch(const char *pc, const char *sp)
-{
- // pc points to "cnt" byte after opcode
- int is_positive = (pc[-1] == Class);
- int cnt = *pc++;
- while (cnt--) {
- if (*sp >= *pc && *sp <= pc[1]) return is_positive;
- pc += 2;
- }
- return !is_positive;
-}
-
-int _re1_5_namedclassmatch(const char *pc, const char *sp)
-{
- // pc points to name of class
- int off = (*pc >> 5) & 1;
- if ((*pc | 0x20) == 'd') {
- if (!(*sp >= '0' && *sp <= '9')) {
- off ^= 1;
- }
- } else if ((*pc | 0x20) == 's') {
- if (!(*sp == ' ' || (*sp >= '\t' && *sp <= '\r'))) {
- off ^= 1;
- }
- } else { // w
- if (!((*sp >= 'A' && *sp <= 'Z') || (*sp >= 'a' && *sp <= 'z') || (*sp >= '0' && *sp <= '9') || *sp == '_')) {
- off ^= 1;
- }
- }
- return off;
-}
diff --git a/extmod/re1.5/compilecode.c b/extmod/re1.5/compilecode.c
deleted file mode 100644
index add4f6ac2..000000000
--- a/extmod/re1.5/compilecode.c
+++ /dev/null
@@ -1,229 +0,0 @@
-// Copyright 2014 Paul Sokolovsky.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "re1.5.h"
-
-#define INSERT_CODE(at, num, pc) \
- ((code ? memmove(code + at + num, code + at, pc - at) : 0), pc += num)
-#define REL(at, to) (to - at - 2)
-#define EMIT(at, byte) (code ? (code[at] = byte) : (at))
-#define EMIT_CHECKED(at, byte) (_emit_checked(at, code, byte, &err))
-#define PC (prog->bytelen)
-
-static void _emit_checked(int at, char *code, int val, bool *err) {
- *err |= val != (int8_t)val;
- if (code) {
- code[at] = val;
- }
-}
-
-static const char *_compilecode(const char *re, ByteProg *prog, int sizecode)
-{
- char *code = sizecode ? NULL : prog->insts;
- bool err = false;
- int start = PC;
- int term = PC;
- int alt_label = 0;
-
- for (; *re && *re != ')'; re++) {
- switch (*re) {
- case '\\':
- re++;
- if (!*re) return NULL; // Trailing backslash
- if ((*re | 0x20) == 'd' || (*re | 0x20) == 's' || (*re | 0x20) == 'w') {
- term = PC;
- EMIT(PC++, NamedClass);
- EMIT(PC++, *re);
- prog->len++;
- break;
- }
- MP_FALLTHROUGH
- default:
- term = PC;
- EMIT(PC++, Char);
- EMIT(PC++, *re);
- prog->len++;
- break;
- case '.':
- term = PC;
- EMIT(PC++, Any);
- prog->len++;
- break;
- case '[': {
- int cnt;
- term = PC;
- re++;
- if (*re == '^') {
- EMIT(PC++, ClassNot);
- re++;
- } else {
- EMIT(PC++, Class);
- }
- PC++; // Skip # of pair byte
- prog->len++;
- for (cnt = 0; *re != ']'; re++, cnt++) {
- if (*re == '\\') {
- ++re;
- }
- if (!*re) return NULL;
- EMIT(PC++, *re);
- if (re[1] == '-' && re[2] != ']') {
- re += 2;
- }
- EMIT(PC++, *re);
- }
- EMIT_CHECKED(term + 1, cnt);
- break;
- }
- case '(': {
- term = PC;
- int sub = 0;
- int capture = re[1] != '?' || re[2] != ':';
-
- if (capture) {
- sub = ++prog->sub;
- EMIT(PC++, Save);
- EMIT_CHECKED(PC++, 2 * sub);
- prog->len++;
- } else {
- re += 2;
- }
-
- re = _compilecode(re + 1, prog, sizecode);
- if (re == NULL || *re != ')') return NULL; // error, or no matching paren
-
- if (capture) {
- EMIT(PC++, Save);
- EMIT_CHECKED(PC++, 2 * sub + 1);
- prog->len++;
- }
-
- break;
- }
- case '?':
- if (PC == term) return NULL; // nothing to repeat
- INSERT_CODE(term, 2, PC);
- if (re[1] == '?') {
- EMIT(term, RSplit);
- re++;
- } else {
- EMIT(term, Split);
- }
- EMIT_CHECKED(term + 1, REL(term, PC));
- prog->len++;
- term = PC;
- break;
- case '*':
- if (PC == term) return NULL; // nothing to repeat
- INSERT_CODE(term, 2, PC);
- EMIT(PC, Jmp);
- EMIT_CHECKED(PC + 1, REL(PC, term));
- PC += 2;
- if (re[1] == '?') {
- EMIT(term, RSplit);
- re++;
- } else {
- EMIT(term, Split);
- }
- EMIT_CHECKED(term + 1, REL(term, PC));
- prog->len += 2;
- term = PC;
- break;
- case '+':
- if (PC == term) return NULL; // nothing to repeat
- if (re[1] == '?') {
- EMIT(PC, Split);
- re++;
- } else {
- EMIT(PC, RSplit);
- }
- EMIT_CHECKED(PC + 1, REL(PC, term));
- PC += 2;
- prog->len++;
- term = PC;
- break;
- case '|':
- if (alt_label) {
- EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
- }
- INSERT_CODE(start, 2, PC);
- EMIT(PC++, Jmp);
- alt_label = PC++;
- EMIT(start, Split);
- EMIT_CHECKED(start + 1, REL(start, PC));
- prog->len += 2;
- term = PC;
- break;
- case '^':
- EMIT(PC++, Bol);
- prog->len++;
- term = PC;
- break;
- case '$':
- EMIT(PC++, Eol);
- prog->len++;
- term = PC;
- break;
- }
- }
-
- if (alt_label) {
- EMIT_CHECKED(alt_label, REL(alt_label, PC) + 1);
- }
- return err ? NULL : re;
-}
-
-int re1_5_sizecode(const char *re)
-{
- ByteProg dummyprog = {
- // Save 0, Save 1, Match; more bytes for "search" (vs "match") prefix code
- .bytelen = 5 + NON_ANCHORED_PREFIX
- };
-
- if (_compilecode(re, &dummyprog, /*sizecode*/1) == NULL) return -1;
-
- return dummyprog.bytelen;
-}
-
-int re1_5_compilecode(ByteProg *prog, const char *re)
-{
- prog->len = 0;
- prog->bytelen = 0;
- prog->sub = 0;
-
- // Add code to implement non-anchored operation ("search"),
- // for anchored operation ("match"), this code will be just skipped.
- // TODO: Implement search in much more efficient manner
- prog->insts[prog->bytelen++] = RSplit;
- prog->insts[prog->bytelen++] = 3;
- prog->insts[prog->bytelen++] = Any;
- prog->insts[prog->bytelen++] = Jmp;
- prog->insts[prog->bytelen++] = -5;
- prog->len += 3;
-
- prog->insts[prog->bytelen++] = Save;
- prog->insts[prog->bytelen++] = 0;
- prog->len++;
-
- re = _compilecode(re, prog, /*sizecode*/0);
- if (re == NULL || *re) return 1;
-
- prog->insts[prog->bytelen++] = Save;
- prog->insts[prog->bytelen++] = 1;
- prog->len++;
-
- prog->insts[prog->bytelen++] = Match;
- prog->len++;
-
- return 0;
-}
-
-#if 0
-int main(int argc, char *argv[])
-{
- int pc = 0;
- ByteProg *code = re1_5_compilecode(argv[1]);
- re1_5_dumpcode(code);
-}
-#endif
diff --git a/extmod/re1.5/dumpcode.c b/extmod/re1.5/dumpcode.c
deleted file mode 100644
index d7781d849..000000000
--- a/extmod/re1.5/dumpcode.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright 2014 Paul Sokolovsky.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "re1.5.h"
-
-void re1_5_dumpcode(ByteProg *prog)
-{
- int pc = 0;
- char *code = prog->insts;
- while (pc < prog->bytelen) {
- printf("%2d: ", pc);
- switch(code[pc++]) {
- default:
- assert(0);
-// re1_5_fatal("printprog");
- case Split:
- printf("split %d (%d)\n", pc + (signed char)code[pc] + 1, (signed char)code[pc]);
- pc++;
- break;
- case RSplit:
- printf("rsplit %d (%d)\n", pc + (signed char)code[pc] + 1, (signed char)code[pc]);
- pc++;
- break;
- case Jmp:
- printf("jmp %d (%d)\n", pc + (signed char)code[pc] + 1, (signed char)code[pc]);
- pc++;
- break;
- case Char:
- printf("char %c\n", code[pc++]);
- break;
- case Any:
- printf("any\n");
- break;
- case Class:
- case ClassNot: {
- int num = code[pc];
- printf("class%s %d", (code[pc - 1] == ClassNot ? "not" : ""), num);
- pc++;
- while (num--) {
- printf(" 0x%02x-0x%02x", code[pc], code[pc + 1]);
- pc += 2;
- }
- printf("\n");
- break;
- }
- case NamedClass:
- printf("namedclass %c\n", code[pc++]);
- break;
- case Match:
- printf("match\n");
- break;
- case Save:
- printf("save %d\n", (unsigned char)code[pc++]);
- break;
- case Bol:
- printf("assert bol\n");
- break;
- case Eol:
- printf("assert eol\n");
- break;
- }
- }
- printf("Bytes: %d, insts: %d\n", prog->bytelen, prog->len);
-}
diff --git a/extmod/re1.5/re1.5.h b/extmod/re1.5/re1.5.h
deleted file mode 100644
index ba6f97b74..000000000
--- a/extmod/re1.5/re1.5.h
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2007-2009 Russ Cox. All Rights Reserved.
-// Copyright 2014 Paul Sokolovsky.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#ifndef _RE1_5_REGEXP__H
-#define _RE1_5_REGEXP__H
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <assert.h>
-
-#define nil ((void*)0)
-#define nelem(x) (sizeof(x)/sizeof((x)[0]))
-
-typedef struct Regexp Regexp;
-typedef struct Prog Prog;
-typedef struct ByteProg ByteProg;
-typedef struct Inst Inst;
-typedef struct Subject Subject;
-
-struct Regexp
-{
- int type;
- int n;
- int ch;
- Regexp *left;
- Regexp *right;
-};
-
-enum /* Regexp.type */
-{
- Alt = 1,
- Cat,
- Lit,
- Dot,
- Paren,
- Quest,
- Star,
- Plus,
-};
-
-Regexp *parse(char*);
-Regexp *reg(int type, Regexp *left, Regexp *right);
-void printre(Regexp*);
-#ifndef re1_5_fatal
-void re1_5_fatal(char*);
-#endif
-#ifndef re1_5_stack_chk
-#define re1_5_stack_chk()
-#endif
-void *mal(int);
-
-struct Prog
-{
- Inst *start;
- int len;
-};
-
-struct ByteProg
-{
- int bytelen;
- int len;
- int sub;
- char insts[0];
-};
-
-struct Inst
-{
- int opcode;
- int c;
- int n;
- Inst *x;
- Inst *y;
- int gen; // global state, oooh!
-};
-
-enum /* Inst.opcode */
-{
- // Instructions which consume input bytes (and thus fail if none left)
- CONSUMERS = 1,
- Char = CONSUMERS,
- Any,
- Class,
- ClassNot,
- NamedClass,
-
- ASSERTS = 0x50,
- Bol = ASSERTS,
- Eol,
-
- // Instructions which take relative offset as arg
- JUMPS = 0x60,
- Jmp = JUMPS,
- Split,
- RSplit,
-
- // Other (special) instructions
- Save = 0x7e,
- Match = 0x7f,
-};
-
-#define inst_is_consumer(inst) ((inst) < ASSERTS)
-#define inst_is_jump(inst) ((inst) & 0x70 == JUMPS)
-
-Prog *compile(Regexp*);
-void printprog(Prog*);
-
-extern int gen;
-
-enum {
- MAXSUB = 20
-};
-
-typedef struct Sub Sub;
-
-struct Sub
-{
- int ref;
- int nsub;
- const char *sub[MAXSUB];
-};
-
-Sub *newsub(int n);
-Sub *incref(Sub*);
-Sub *copy(Sub*);
-Sub *update(Sub*, int, const char*);
-void decref(Sub*);
-
-struct Subject {
- const char *begin;
- const char *end;
-};
-
-
-#define NON_ANCHORED_PREFIX 5
-#define HANDLE_ANCHORED(bytecode, is_anchored) ((is_anchored) ? (bytecode) + NON_ANCHORED_PREFIX : (bytecode))
-
-int re1_5_backtrack(ByteProg*, Subject*, const char**, int, int);
-int re1_5_pikevm(ByteProg*, Subject*, const char**, int, int);
-int re1_5_recursiveloopprog(ByteProg*, Subject*, const char**, int, int);
-int re1_5_recursiveprog(ByteProg*, Subject*, const char**, int, int);
-int re1_5_thompsonvm(ByteProg*, Subject*, const char**, int, int);
-
-int re1_5_sizecode(const char *re);
-int re1_5_compilecode(ByteProg *prog, const char *re);
-void re1_5_dumpcode(ByteProg *prog);
-void cleanmarks(ByteProg *prog);
-int _re1_5_classmatch(const char *pc, const char *sp);
-int _re1_5_namedclassmatch(const char *pc, const char *sp);
-
-#endif /*_RE1_5_REGEXP__H*/
diff --git a/extmod/re1.5/recursiveloop.c b/extmod/re1.5/recursiveloop.c
deleted file mode 100644
index f8cb92629..000000000
--- a/extmod/re1.5/recursiveloop.c
+++ /dev/null
@@ -1,87 +0,0 @@
-// Copyright 2007-2009 Russ Cox. All Rights Reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "re1.5.h"
-
-static int
-recursiveloop(char *pc, const char *sp, Subject *input, const char **subp, int nsubp)
-{
- const char *old;
- int off;
-
- re1_5_stack_chk();
-
- for(;;) {
- if(inst_is_consumer(*pc)) {
- // If we need to match a character, but there's none left, it's fail
- if(sp >= input->end)
- return 0;
- }
- switch(*pc++) {
- case Char:
- if(*sp != *pc++)
- return 0;
- MP_FALLTHROUGH
- case Any:
- sp++;
- continue;
- case Class:
- case ClassNot:
- if (!_re1_5_classmatch(pc, sp))
- return 0;
- pc += *(unsigned char*)pc * 2 + 1;
- sp++;
- continue;
- case NamedClass:
- if (!_re1_5_namedclassmatch(pc, sp))
- return 0;
- pc++;
- sp++;
- continue;
- case Match:
- return 1;
- case Jmp:
- off = (signed char)*pc++;
- pc = pc + off;
- continue;
- case Split:
- off = (signed char)*pc++;
- if(recursiveloop(pc, sp, input, subp, nsubp))
- return 1;
- pc = pc + off;
- continue;
- case RSplit:
- off = (signed char)*pc++;
- if(recursiveloop(pc + off, sp, input, subp, nsubp))
- return 1;
- continue;
- case Save:
- off = (unsigned char)*pc++;
- if(off >= nsubp) {
- continue;
- }
- old = subp[off];
- subp[off] = sp;
- if(recursiveloop(pc, sp, input, subp, nsubp))
- return 1;
- subp[off] = old;
- return 0;
- case Bol:
- if(sp != input->begin)
- return 0;
- continue;
- case Eol:
- if(sp != input->end)
- return 0;
- continue;
- }
- re1_5_fatal("recursiveloop");
- }
-}
-
-int
-re1_5_recursiveloopprog(ByteProg *prog, Subject *input, const char **subp, int nsubp, int is_anchored)
-{
- return recursiveloop(HANDLE_ANCHORED(prog->insts, is_anchored), input->begin, input, subp, nsubp);
-}