summaryrefslogtreecommitdiff
path: root/src/include/parser
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2009-11-09 18:38:48 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2009-11-09 18:38:48 +0000
commit10bcfa189bedaeaa6bfe8d7841ed3b17f23c0df4 (patch)
tree70b98c6fd252fb828a393d830322f64b37cd5e81 /src/include/parser
parent2ace38d226246b83e5cc4d8f4063a82a485ddc95 (diff)
Re-refactor the core scanner's API, in order to get out from under the problem
of different parsers having different YYSTYPE unions that they want to use with it. I defined a new union core_YYSTYPE that is just the (very short) list of semantic values returned by the core scanner. I had originally worried that this would require an extra interface layer, but actually we can have parser.c's base_yylex (formerly filtered_base_yylex) take care of that at no extra cost. Names associated with the core scanner are now "core_yy_foo", with "base_yy_foo" being used in the core Bison parser and the parser.c interface layer. This solves the last serious stumbling block to eliminating plpgsql's separate lexer. One restriction that will still be present is that plpgsql and the core will have to agree on the token numbers assigned to tokens that can be returned by the core lexer. Since Bison doesn't seem willing to accept external assignments of those numbers, we'll have to live with decreeing that core and plpgsql grammars declare these tokens first and in the same order.
Diffstat (limited to 'src/include/parser')
-rw-r--r--src/include/parser/gramparse.h80
-rw-r--r--src/include/parser/scanner.h120
2 files changed, 131 insertions, 69 deletions
diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h
index 09c99091361..41774028b5b 100644
--- a/src/include/parser/gramparse.h
+++ b/src/include/parser/gramparse.h
@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.49 2009/11/05 23:24:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.50 2009/11/09 18:38:48 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -20,20 +20,11 @@
#define GRAMPARSE_H
#include "nodes/parsenodes.h"
-#include "parser/keywords.h"
+#include "parser/scanner.h"
/*
- * We track token locations in terms of byte offsets from the start of the
- * source string, not the column number/line number representation that
- * bison uses by default. Also, to minimize overhead we track only one
- * location (usually the first token location) for each construct, not
- * the beginning and ending locations as bison does by default. It's
- * therefore sufficient to make YYLTYPE an int.
- */
-#define YYLTYPE int
-
-/*
- * After defining YYLTYPE, it's safe to include gram.h.
+ * NB: include gram.h only AFTER including scanner.h, because scanner.h
+ * is what #defines YYLTYPE.
*/
#include "parser/gram.h"
@@ -44,63 +35,25 @@
typedef struct base_yy_extra_type
{
/*
- * The string the lexer is physically scanning. We keep this mainly so
- * that we can cheaply compute the offset of the current token (yytext).
+ * Fields used by the core scanner.
*/
- char *scanbuf;
- Size scanbuflen;
+ core_yy_extra_type core_yy_extra;
/*
- * The keyword list to use.
- */
- const ScanKeyword *keywords;
- int num_keywords;
-
- /*
- * literalbuf is used to accumulate literal values when multiple rules
- * are needed to parse a single literal. Call startlit() to reset buffer
- * to empty, addlit() to add text. NOTE: the string in literalbuf is
- * NOT necessarily null-terminated, but there always IS room to add a
- * trailing null at offset literallen. We store a null only when we
- * need it.
- */
- char *literalbuf; /* palloc'd expandable buffer */
- int literallen; /* actual current string length */
- int literalalloc; /* current allocated buffer size */
-
- int xcdepth; /* depth of nesting in slash-star comments */
- char *dolqstart; /* current $foo$ quote start string */
-
- /* first part of UTF16 surrogate pair for Unicode escapes */
- int32 utf16_first_part;
-
- /* state variables for literal-lexing warnings */
- bool warn_on_first_escape;
- bool saw_non_ascii;
-
- /*
- * State variables for filtered_base_yylex().
+ * State variables for base_yylex().
*/
bool have_lookahead; /* is lookahead info valid? */
int lookahead_token; /* one-token lookahead */
- YYSTYPE lookahead_yylval; /* yylval for lookahead token */
+ core_YYSTYPE lookahead_yylval; /* yylval for lookahead token */
YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
/*
- * State variables that belong to the grammar, not the lexer. It's
- * simpler to keep these here than to invent a separate structure.
- * These fields are unused/undefined if the lexer is invoked on its own.
+ * State variables that belong to the grammar.
*/
-
List *parsetree; /* final parse result is delivered here */
} base_yy_extra_type;
/*
- * The type of yyscanner is opaque outside scan.l.
- */
-typedef void *base_yyscan_t;
-
-/*
* In principle we should use yyget_extra() to fetch the yyextra field
* from a yyscanner struct. However, flex always puts that field first,
* and this is sufficiently performance-critical to make it seem worth
@@ -110,22 +63,11 @@ typedef void *base_yyscan_t;
/* from parser.c */
-extern int filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
- base_yyscan_t yyscanner);
-
-/* from scan.l */
-extern base_yyscan_t scanner_init(const char *str,
- base_yy_extra_type *yyext,
- const ScanKeyword *keywords,
- int num_keywords);
-extern void scanner_finish(base_yyscan_t yyscanner);
extern int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
- base_yyscan_t yyscanner);
-extern int scanner_errposition(int location, base_yyscan_t yyscanner);
-extern void scanner_yyerror(const char *message, base_yyscan_t yyscanner);
+ core_yyscan_t yyscanner);
/* from gram.y */
extern void parser_init(base_yy_extra_type *yyext);
-extern int base_yyparse(base_yyscan_t yyscanner);
+extern int base_yyparse(core_yyscan_t yyscanner);
#endif /* GRAMPARSE_H */
diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h
new file mode 100644
index 00000000000..ccab1db862c
--- /dev/null
+++ b/src/include/parser/scanner.h
@@ -0,0 +1,120 @@
+/*-------------------------------------------------------------------------
+ *
+ * scanner.h
+ * API for the core scanner (flex machine)
+ *
+ * The core scanner is also used by PL/pgsql, so we provide a public API
+ * for it. However, the rest of the backend is only expected to use the
+ * higher-level API provided by parser.h.
+ *
+ *
+ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/parser/scanner.h,v 1.1 2009/11/09 18:38:48 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef SCANNER_H
+#define SCANNER_H
+
+#include "parser/keywords.h"
+
+/*
+ * The scanner returns extra data about scanned tokens in this union type.
+ * Note that this is a subset of the fields used in YYSTYPE of the bison
+ * parsers built atop the scanner.
+ */
+typedef union core_YYSTYPE
+{
+ int ival; /* for integer literals */
+ char *str; /* for identifiers and non-integer literals */
+ const char *keyword; /* canonical spelling of keywords */
+} core_YYSTYPE;
+
+/*
+ * We track token locations in terms of byte offsets from the start of the
+ * source string, not the column number/line number representation that
+ * bison uses by default. Also, to minimize overhead we track only one
+ * location (usually the first token location) for each construct, not
+ * the beginning and ending locations as bison does by default. It's
+ * therefore sufficient to make YYLTYPE an int.
+ */
+#define YYLTYPE int
+
+/*
+ * Another important component of the scanner's API is the token code numbers.
+ * However, those are not defined in this file, because bison insists on
+ * defining them for itself. The token codes used by the core scanner are
+ * the ASCII characters plus these:
+ * %token <str> IDENT FCONST SCONST BCONST XCONST Op
+ * %token <ival> ICONST PARAM
+ * %token TYPECAST DOT_DOT COLON_EQUALS
+ * The above token definitions *must* be the first ones declared in any
+ * bison parser built atop this scanner, so that they will have consistent
+ * numbers assigned to them (specifically, IDENT = 258 and so on).
+ */
+
+/*
+ * The YY_EXTRA data that a flex scanner allows us to pass around.
+ * Private state needed by the core scanner goes here. Note that the actual
+ * yy_extra struct may be larger and have this as its first component, thus
+ * allowing the calling parser to keep some fields of its own in YY_EXTRA.
+ */
+typedef struct core_yy_extra_type
+{
+ /*
+ * The string the scanner is physically scanning. We keep this mainly so
+ * that we can cheaply compute the offset of the current token (yytext).
+ */
+ char *scanbuf;
+ Size scanbuflen;
+
+ /*
+ * The keyword list to use.
+ */
+ const ScanKeyword *keywords;
+ int num_keywords;
+
+ /*
+ * literalbuf is used to accumulate literal values when multiple rules
+ * are needed to parse a single literal. Call startlit() to reset buffer
+ * to empty, addlit() to add text. NOTE: the string in literalbuf is
+ * NOT necessarily null-terminated, but there always IS room to add a
+ * trailing null at offset literallen. We store a null only when we
+ * need it.
+ */
+ char *literalbuf; /* palloc'd expandable buffer */
+ int literallen; /* actual current string length */
+ int literalalloc; /* current allocated buffer size */
+
+ int xcdepth; /* depth of nesting in slash-star comments */
+ char *dolqstart; /* current $foo$ quote start string */
+
+ /* first part of UTF16 surrogate pair for Unicode escapes */
+ int32 utf16_first_part;
+
+ /* state variables for literal-lexing warnings */
+ bool warn_on_first_escape;
+ bool saw_non_ascii;
+} core_yy_extra_type;
+
+/*
+ * The type of yyscanner is opaque outside scan.l.
+ */
+typedef void *core_yyscan_t;
+
+
+/* Entry points in parser/scan.l */
+extern core_yyscan_t scanner_init(const char *str,
+ core_yy_extra_type *yyext,
+ const ScanKeyword *keywords,
+ int num_keywords);
+extern void scanner_finish(core_yyscan_t yyscanner);
+extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
+ core_yyscan_t yyscanner);
+extern int scanner_errposition(int location, core_yyscan_t yyscanner);
+extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner);
+
+#endif /* SCANNER_H */