diff options
| author | Damien George <damien.p.george@gmail.com> | 2014-10-11 18:55:44 +0100 | 
|---|---|---|
| committer | Damien George <damien.p.george@gmail.com> | 2014-10-11 18:55:44 +0100 | 
| commit | 37671c9a973d20248048343cb168c50f6bbca959 (patch) | |
| tree | 1bac386c924a0a8bd289bd4a36f3af9974283a33 /extmod/re1.5/regexp.h | |
| parent | 1ce916aefdaee7bca2bcf2bc4ee7e1eb75002f67 (diff) | |
| parent | dd5ee9ff9c67575897ccfa8b4e8b10abecbe9800 (diff) | |
Merge branch 'pfalcon-modure'
Diffstat (limited to 'extmod/re1.5/regexp.h')
| -rw-r--r-- | extmod/re1.5/regexp.h | 138 | 
1 files changed, 138 insertions, 0 deletions
| diff --git a/extmod/re1.5/regexp.h b/extmod/re1.5/regexp.h new file mode 100644 index 000000000..78492eb0f --- /dev/null +++ b/extmod/re1.5/regexp.h @@ -0,0 +1,138 @@ +// Copyright 2007-2009 Russ Cox.  All Rights Reserved. +// Copyright 2014 Paul Sokolovsky. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <assert.h> + +#define nil ((void*)0) +#define nelem(x) (sizeof(x)/sizeof((x)[0])) + +typedef struct Regexp Regexp; +typedef struct Prog Prog; +typedef struct ByteProg ByteProg; +typedef struct Inst Inst; +typedef struct Subject Subject; + +struct Regexp +{ +	int type; +	int n; +	int ch; +	Regexp *left; +	Regexp *right; +}; + +enum	/* Regexp.type */ +{ +	Alt = 1, +	Cat, +	Lit, +	Dot, +	Paren, +	Quest, +	Star, +	Plus, +}; + +Regexp *parse(char*); +Regexp *reg(int type, Regexp *left, Regexp *right); +void printre(Regexp*); +#ifndef re1_5_fatal +void re1_5_fatal(char*); +#endif +void *mal(int); + +struct Prog +{ +	Inst *start; +	int len; +}; + +struct ByteProg +{ +	int bytelen; +	int len; +	int sub; +	char insts[0]; +}; + +struct Inst +{ +	int opcode; +	int c; +	int n; +	Inst *x; +	Inst *y; +	int gen;	// global state, oooh! +}; + +enum	/* Inst.opcode */ +{ +	// Instructions which consume input bytes (and thus fail if none left) +	CONSUMERS = 1, +	Char = CONSUMERS, +	Any, +	ASSERTS = 0x50, +	Bol = ASSERTS, +	Eol, +	// Instructions which take relative offset as arg +	JUMPS = 0x60, +	Jmp = JUMPS, +	Split, +	RSplit, +	// Other (special) instructions +	Save = 0x7e, +	Match = 0x7f, +}; + +#define inst_is_consumer(inst) ((inst) < ASSERTS) +#define inst_is_jump(inst) ((inst) & 0x70 == JUMPS) + +Prog *compile(Regexp*); +void printprog(Prog*); + +extern int gen; + +enum { +	MAXSUB = 20 +}; + +typedef struct Sub Sub; + +struct Sub +{ +	int ref; +	int nsub; +	const char *sub[MAXSUB]; +}; + +Sub *newsub(int n); +Sub *incref(Sub*); +Sub *copy(Sub*); +Sub *update(Sub*, int, const char*); +void decref(Sub*); + +struct Subject { +	const char *begin; +	const char *end; +}; + + +#define NON_ANCHORED_PREFIX 5 +#define HANDLE_ANCHORED(bytecode, is_anchored) ((is_anchored) ? (bytecode) + NON_ANCHORED_PREFIX : (bytecode)) + +int re1_5_backtrack(ByteProg*, Subject*, const char**, int, int); +int re1_5_pikevm(ByteProg*, Subject*, const char**, int, int); +int re1_5_recursiveloopprog(ByteProg*, Subject*, const char**, int, int); +int re1_5_recursiveprog(ByteProg*, Subject*, const char**, int, int); +int re1_5_thompsonvm(ByteProg*, Subject*, const char**, int, int); + +int re1_5_sizecode(const char *re); +int re1_5_compilecode(ByteProg *prog, const char *re); +void re1_5_dumpcode(ByteProg *prog); +void cleanmarks(ByteProg *prog); | 
