Allow qstr's with non-ident chars, construct good identifier for them.

Also, add qstr's for string appearing in unix REPL loop, gross effect
being less allocations for each command run.
This commit is contained in:
Paul Sokolovsky 2014-01-24 00:22:00 +02:00
parent d552db426b
commit ab5d08280b
3 changed files with 22 additions and 12 deletions

View File

@ -1,5 +1,6 @@
import argparse
import re
from htmlentitydefs import codepoint2name
# this must match the equivalent function in qstr.c
def compute_hash(qstr):
@ -10,7 +11,7 @@ def compute_hash(qstr):
def do_work(infiles):
# read the qstrs in from the input files
qstrs = []
qstrs = {}
for infile in infiles:
with open(infile, 'rt') as f:
line_number = 0
@ -23,28 +24,29 @@ def do_work(infiles):
continue
# verify line is of the correct form
match = re.match(r'Q\(([0-9A-Za-z_]+)\)$', line)
match = re.match(r'Q\((.+)\)$', line)
if not match:
print('({}:{}) bad qstr format, got {}'.format(infile, line_number, line))
return False
# get the qstr value
qstr = match.group(1)
ident = re.sub(r'[^A-Za-z0-9_]', lambda s: "_" + codepoint2name[ord(s.group(0))] + "_", qstr)
# don't add duplicates
if qstr in qstrs:
if ident in qstrs:
continue
# add the qstr to the list
qstrs.append(qstr)
qstrs[ident] = qstr
# process the qstrs, printing out the generated C header file
print('// This file was automatically generated by makeqstrdata.py')
print('')
for qstr in qstrs:
for ident, qstr in qstrs.items():
qhash = compute_hash(qstr)
qlen = len(qstr)
print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}" "{}")'.format(qstr, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qstr))
print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qstr))
return True

View File

@ -80,3 +80,11 @@ Q(sort)
Q(join)
Q(strip)
Q(format)
Q(<module>)
Q(<lambda>)
Q(<listcomp>)
Q(<dictcomp>)
Q(<setcomp>)
Q(<genexpr>)
Q(<stdin>)

View File

@ -18,7 +18,7 @@ scope_t *scope_new(scope_kind_t kind, mp_parse_node_t pn, qstr source_file, uint
scope->source_file = source_file;
switch (kind) {
case SCOPE_MODULE:
scope->simple_name = QSTR_FROM_STR_STATIC("<module>");
scope->simple_name = MP_QSTR__lt_module_gt_;
break;
case SCOPE_FUNCTION:
case SCOPE_CLASS:
@ -26,19 +26,19 @@ scope_t *scope_new(scope_kind_t kind, mp_parse_node_t pn, qstr source_file, uint
scope->simple_name = MP_PARSE_NODE_LEAF_ARG(((mp_parse_node_struct_t*)pn)->nodes[0]);
break;
case SCOPE_LAMBDA:
scope->simple_name = QSTR_FROM_STR_STATIC("<lambda>");
scope->simple_name = MP_QSTR__lt_lambda_gt_;
break;
case SCOPE_LIST_COMP:
scope->simple_name = QSTR_FROM_STR_STATIC("<listcomp>");
scope->simple_name = MP_QSTR__lt_listcomp_gt_;
break;
case SCOPE_DICT_COMP:
scope->simple_name = QSTR_FROM_STR_STATIC("<dictcomp>");
scope->simple_name = MP_QSTR__lt_dictcomp_gt_;
break;
case SCOPE_SET_COMP:
scope->simple_name = QSTR_FROM_STR_STATIC("<setcomp>");
scope->simple_name = MP_QSTR__lt_setcomp_gt_;
break;
case SCOPE_GEN_EXPR:
scope->simple_name = QSTR_FROM_STR_STATIC("<genexpr>");
scope->simple_name = MP_QSTR__lt_genexpr_gt_;
break;
default:
assert(0);