micropython/py/lexer.h
Damien d99b05282d Change object representation from 1 big union to individual structs.
A big change.  Micro Python objects are allocated as individual structs
with the first element being a pointer to the type information (which
is itself an object).  This scheme follows CPython.  Much more flexible,
not necessarily slower, uses same heap memory, and can allocate objects
statically.

Also change name prefix, from py_ to mp_ (mp for Micro Python).
2013-12-21 18:17:45 +00:00

141 lines
4.2 KiB
C

/* lexer.h -- simple tokeniser for Micro Python
*
* Uses (byte) length instead of null termination.
* Tokens are the same - UTF-8 with (byte) length.
*/
typedef enum _mp_token_kind_t {
MP_TOKEN_END, // 0
MP_TOKEN_INVALID,
MP_TOKEN_DEDENT_MISMATCH,
MP_TOKEN_LONELY_STRING_OPEN,
MP_TOKEN_NEWLINE, // 4
MP_TOKEN_INDENT, // 5
MP_TOKEN_DEDENT, // 6
MP_TOKEN_NAME, // 7
MP_TOKEN_NUMBER,
MP_TOKEN_STRING,
MP_TOKEN_BYTES,
MP_TOKEN_ELLIPSES,
MP_TOKEN_KW_FALSE, // 12
MP_TOKEN_KW_NONE,
MP_TOKEN_KW_TRUE,
MP_TOKEN_KW_AND,
MP_TOKEN_KW_AS,
MP_TOKEN_KW_ASSERT,
MP_TOKEN_KW_BREAK,
MP_TOKEN_KW_CLASS,
MP_TOKEN_KW_CONTINUE,
MP_TOKEN_KW_DEF, // 21
MP_TOKEN_KW_DEL,
MP_TOKEN_KW_ELIF,
MP_TOKEN_KW_ELSE,
MP_TOKEN_KW_EXCEPT,
MP_TOKEN_KW_FINALLY,
MP_TOKEN_KW_FOR,
MP_TOKEN_KW_FROM,
MP_TOKEN_KW_GLOBAL,
MP_TOKEN_KW_IF,
MP_TOKEN_KW_IMPORT, // 31
MP_TOKEN_KW_IN,
MP_TOKEN_KW_IS,
MP_TOKEN_KW_LAMBDA,
MP_TOKEN_KW_NONLOCAL,
MP_TOKEN_KW_NOT,
MP_TOKEN_KW_OR,
MP_TOKEN_KW_PASS,
MP_TOKEN_KW_RAISE,
MP_TOKEN_KW_RETURN,
MP_TOKEN_KW_TRY, // 41
MP_TOKEN_KW_WHILE,
MP_TOKEN_KW_WITH,
MP_TOKEN_KW_YIELD,
MP_TOKEN_OP_PLUS, // 45
MP_TOKEN_OP_MINUS,
MP_TOKEN_OP_STAR,
MP_TOKEN_OP_DBL_STAR,
MP_TOKEN_OP_SLASH,
MP_TOKEN_OP_DBL_SLASH,
MP_TOKEN_OP_PERCENT,
MP_TOKEN_OP_LESS,
MP_TOKEN_OP_DBL_LESS,
MP_TOKEN_OP_MORE,
MP_TOKEN_OP_DBL_MORE, // 55
MP_TOKEN_OP_AMPERSAND,
MP_TOKEN_OP_PIPE,
MP_TOKEN_OP_CARET,
MP_TOKEN_OP_TILDE,
MP_TOKEN_OP_LESS_EQUAL,
MP_TOKEN_OP_MORE_EQUAL,
MP_TOKEN_OP_DBL_EQUAL,
MP_TOKEN_OP_NOT_EQUAL,
MP_TOKEN_DEL_PAREN_OPEN, // 64
MP_TOKEN_DEL_PAREN_CLOSE,
MP_TOKEN_DEL_BRACKET_OPEN,
MP_TOKEN_DEL_BRACKET_CLOSE,
MP_TOKEN_DEL_BRACE_OPEN,
MP_TOKEN_DEL_BRACE_CLOSE,
MP_TOKEN_DEL_COMMA,
MP_TOKEN_DEL_COLON,
MP_TOKEN_DEL_PERIOD,
MP_TOKEN_DEL_SEMICOLON,
MP_TOKEN_DEL_AT, // 74
MP_TOKEN_DEL_EQUAL,
MP_TOKEN_DEL_PLUS_EQUAL,
MP_TOKEN_DEL_MINUS_EQUAL,
MP_TOKEN_DEL_STAR_EQUAL,
MP_TOKEN_DEL_SLASH_EQUAL,
MP_TOKEN_DEL_DBL_SLASH_EQUAL,
MP_TOKEN_DEL_PERCENT_EQUAL,
MP_TOKEN_DEL_AMPERSAND_EQUAL,
MP_TOKEN_DEL_PIPE_EQUAL,
MP_TOKEN_DEL_CARET_EQUAL, // 84
MP_TOKEN_DEL_DBL_MORE_EQUAL,
MP_TOKEN_DEL_DBL_LESS_EQUAL,
MP_TOKEN_DEL_DBL_STAR_EQUAL,
MP_TOKEN_DEL_MINUS_MORE,
} mp_token_kind_t;
typedef struct _mp_token_t {
const char *src_name; // name of source
uint src_line; // source line
uint src_column; // source column
mp_token_kind_t kind; // kind of token
const char *str; // string of token (valid only while this token is current token)
uint len; // (byte) length of string of token
} mp_token_t;
// the next-char function must return the next character in the stream
// it must return MP_LEXER_CHAR_EOF if end of stream
// it can be called again after returning MP_LEXER_CHAR_EOF, and in that case must return MP_LEXER_CHAR_EOF
#define MP_LEXER_CHAR_EOF (-1)
typedef unichar (*mp_lexer_stream_next_char_t)(void*);
typedef void (*mp_lexer_stream_close_t)(void*);
typedef struct _mp_lexer_t mp_lexer_t;
void mp_token_show(const mp_token_t *tok);
void mp_token_show_error_prefix(const mp_token_t *tok);
bool mp_token_show_error(const mp_token_t *tok, const char *msg);
mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
void mp_lexer_free(mp_lexer_t *lex);
void mp_lexer_to_next(mp_lexer_t *lex);
const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex);
bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind);
/* unused
bool mp_lexer_is_str(mp_lexer_t *lex, const char *str);
bool mp_lexer_opt_kind(mp_lexer_t *lex, mp_token_kind_t kind);
bool mp_lexer_opt_str(mp_lexer_t *lex, const char *str);
*/
bool mp_lexer_show_error(mp_lexer_t *lex, const char *msg);
bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg);