summaryrefslogtreecommitdiff
path: root/py/parse.c
diff options
context:
space:
mode:
authorDamien George <damien.p.george@gmail.com>2014-05-25 22:06:06 +0100
committerDamien George <damien.p.george@gmail.com>2014-05-25 22:06:06 +0100
commit5042bce8fb268849ba1afcbeb946b840318c4e49 (patch)
tree590b0225d9b29e7f1a7de71942ef30c5b79745d0 /py/parse.c
parent3aaabd11a044701991ca929f873983a679841199 (diff)
py: Don't automatically intern strings in parser.
This completes non-automatic interning of strings in the parser, so that doc strings don't take up RAM. It complicates the parser and compiler, and bloats stmhal by about 300 bytes. It's complicated because now there are 2 kinds of parse-nodes that can be strings: interned leaves and non-interned structs.
Diffstat (limited to 'py/parse.c')
-rw-r--r--py/parse.c95
1 files changed, 54 insertions, 41 deletions
diff --git a/py/parse.c b/py/parse.c
index b9a1e404d..b255ebd35 100644
--- a/py/parse.c
+++ b/py/parse.c
@@ -28,7 +28,7 @@
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
-#include <memory.h>
+#include <string.h>
#include "misc.h"
#include "mpconfig.h"
@@ -71,7 +71,7 @@ enum {
#include "grammar.h"
#undef DEF_RULE
RULE_maximum_number_of,
- RULE_string,
+ RULE_string, // special node for non-interned string
};
#define or(n) (RULE_ACT_OR | n)
@@ -172,26 +172,26 @@ mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
return (mp_parse_node_t)(kind | (arg << 5));
}
-uint mp_parse_node_free(mp_parse_node_t pn) {
- uint cnt = 0;
+void mp_parse_node_free(mp_parse_node_t pn) {
if (MP_PARSE_NODE_IS_STRUCT(pn)) {
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
uint rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
+ if (rule_id == RULE_string) {
+ return;
+ }
bool adjust = ADD_BLANK_NODE(rule_id);
if (adjust) {
n--;
}
for (uint i = 0; i < n; i++) {
- cnt += mp_parse_node_free(pns->nodes[i]);
+ mp_parse_node_free(pns->nodes[i]);
}
if (adjust) {
n++;
}
m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
- cnt++;
}
- return cnt;
}
#if MICROPY_DEBUG_PRINTERS
@@ -220,19 +220,21 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) {
case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
default: assert(0);
}
- } else if (MP_PARSE_NODE_IS_STRING(pn)) {
- mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
- printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
} else {
+ // node must be a mp_parse_node_struct_t
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
- uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
+ if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
+ printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
+ } else {
+ uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
#ifdef USE_RULE_NAME
- printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n);
+ printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n);
#else
- printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), n);
+ printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), n);
#endif
- for (uint i = 0; i < n; i++) {
- mp_parse_node_print(pns->nodes[i], indent + 2);
+ for (uint i = 0; i < n; i++) {
+ mp_parse_node_print(pns->nodes[i], indent + 2);
+ }
}
}
}
@@ -279,7 +281,20 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
parser->result_stack[parser->result_stack_top++] = pn;
}
-STATIC void push_string(parser_t *parser, int src_line, const char *str, uint len);
+STATIC void push_result_string(parser_t *parser, int src_line, const char *str, uint len) {
+ mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2);
+ if (pn == NULL) {
+ memory_error(parser);
+ return;
+ }
+ pn->source_line = src_line;
+ pn->kind_num_nodes = RULE_string | (2 << 8);
+ char *p = m_new(char, len);
+ memcpy(p, str, len);
+ pn->nodes[0] = (machine_int_t)p;
+ pn->nodes[1] = len;
+ push_result_node(parser, (mp_parse_node_t)pn);
+}
STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
const mp_token_t *tok = mp_lexer_cur(lex);
@@ -326,10 +341,24 @@ STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
}
} else if (tok->kind == MP_TOKEN_STRING) {
-printf("Pushing string\n");
- push_string(parser, mp_lexer_cur(lex)->src_line, tok->str, tok->len);
- return;
-// pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn(tok->str, tok->len));
+ // Don't automatically intern all strings. doc strings (which are usually large)
+ // will be discarded by the compiler, and so we shouldn't intern them.
+ qstr qst = MP_QSTR_NULL;
+ if (tok->len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
+ // intern short strings
+ qst = qstr_from_strn(tok->str, tok->len);
+ } else {
+ // check if this string is already interned
+ qst = qstr_find_strn((const byte*)tok->str, tok->len);
+ }
+ if (qst != MP_QSTR_NULL) {
+ // qstr exists, make a leaf node
+ pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qst);
+ } else {
+ // not interned, make a node holding a pointer to the string data
+ push_result_string(parser, mp_lexer_cur(lex)->src_line, tok->str, tok->len);
+ return;
+ }
} else if (tok->kind == MP_TOKEN_BYTES) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len));
} else {
@@ -338,21 +367,6 @@ printf("Pushing string\n");
push_result_node(parser, pn);
}
-STATIC void push_string(parser_t *parser, int src_line, const char *str, uint len) {
- mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2);
- if (pn == NULL) {
- memory_error(parser);
- return;
- }
- pn->source_line = src_line;
- pn->kind_num_nodes = RULE_string | (2 << 8);
- char *p = m_new(char, len);
- memcpy(p, str, len);
- pn->nodes[0] = (machine_int_t)p;
- pn->nodes[1] = len;
- push_result_node(parser, (mp_parse_node_t)pn);
-}
-
STATIC void push_result_rule(parser_t *parser, int src_line, const rule_t *rule, int num_args) {
mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args);
if (pn == NULL) {
@@ -541,14 +555,13 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
}
}
-#if 1 && !MICROPY_ENABLE_DOC_STRING
- // this code discards lonely statement, such as doc strings
- // problem is that doc strings have already been interned, so this doesn't really help reduce RAM usage
+#if !MICROPY_EMIT_CPYTHON && !MICROPY_ENABLE_DOC_STRING
+ // this code discards lonely statements, such as doc strings
if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
mp_parse_node_t p = peek_result(&parser, 1);
- if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRING(p)) {
- pop_result(parser);
- pop_result(parser);
+ if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
+ pop_result(&parser);
+ pop_result(&parser);
push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
break;
}