diff options
Diffstat (limited to 'src/libsyntax/parse/token.rs')
| -rw-r--r-- | src/libsyntax/parse/token.rs | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs new file mode 100644 index 00000000000..1c6f240cf82 --- /dev/null +++ b/src/libsyntax/parse/token.rs @@ -0,0 +1,286 @@ + +import util::interner; +import util::interner::interner; +import std::map::{hashmap, str_hash}; + +type str_num = uint; + +enum binop { + PLUS, + MINUS, + STAR, + SLASH, + PERCENT, + CARET, + AND, + OR, + SHL, + SHR, +} + +enum token { + /* Expression-operator symbols. */ + EQ, + LT, + LE, + EQEQ, + NE, + GE, + GT, + ANDAND, + OROR, + NOT, + TILDE, + BINOP(binop), + BINOPEQ(binop), + + /* Structural symbols */ + AT, + DOT, + ELLIPSIS, + COMMA, + SEMI, + COLON, + MOD_SEP, + RARROW, + LARROW, + DARROW, + LPAREN, + RPAREN, + LBRACKET, + RBRACKET, + LBRACE, + RBRACE, + POUND, + DOLLAR, + + /* Literals */ + LIT_INT(i64, ast::int_ty), + LIT_UINT(u64, ast::uint_ty), + LIT_FLOAT(str_num, ast::float_ty), + LIT_STR(str_num), + + /* Name components */ + IDENT(str_num, bool), + UNDERSCORE, + EOF, + +} + +fn binop_to_str(o: binop) -> str { + alt o { + PLUS { ret "+"; } + MINUS { ret "-"; } + STAR { ret "*"; } + SLASH { ret "/"; } + PERCENT { ret "%"; } + CARET { ret "^"; } + AND { ret "&"; } + OR { ret "|"; } + SHL { ret "<<"; } + SHR { ret ">>"; } + } +} + +fn to_str(in: interner<str>, t: token) -> str { + alt t { + EQ { ret "="; } + LT { ret "<"; } + LE { ret "<="; } + EQEQ { ret "=="; } + NE { ret "!="; } + GE { ret ">="; } + GT { ret ">"; } + NOT { ret "!"; } + TILDE { ret "~"; } + OROR { ret "||"; } + ANDAND { ret "&&"; } + BINOP(op) { ret binop_to_str(op); } + BINOPEQ(op) { ret binop_to_str(op) + "="; } + + /* Structural symbols */ + AT { + ret "@"; + } + DOT { ret "."; } + ELLIPSIS { ret "..."; } + COMMA { ret ","; } + SEMI { ret ";"; } + COLON { ret ":"; } + MOD_SEP { ret "::"; } + RARROW { ret "->"; } + LARROW { ret "<-"; } + DARROW { ret "<->"; } + LPAREN { ret "("; } + RPAREN { ret ")"; } + LBRACKET { ret "["; } + RBRACKET { ret "]"; } + LBRACE { ret "{"; } + RBRACE { ret "}"; } + POUND { ret "#"; } + DOLLAR { ret "$"; } + + /* Literals */ + LIT_INT(c, ast::ty_char) { + // FIXME: escape. + let mut tmp = "'"; + str::push_char(tmp, c as char); + str::push_char(tmp, '\''); + ret tmp; + } + LIT_INT(i, t) { + ret int::to_str(i as int, 10u) + ast_util::int_ty_to_str(t); + } + LIT_UINT(u, t) { + ret uint::to_str(u as uint, 10u) + ast_util::uint_ty_to_str(t); + } + LIT_FLOAT(s, t) { + ret interner::get::<str>(in, s) + + ast_util::float_ty_to_str(t); + } + LIT_STR(s) { // FIXME: escape. + ret "\"" + interner::get::<str>(in, s) + "\""; + } + + /* Name components */ + IDENT(s, _) { + ret interner::get::<str>(in, s); + } + UNDERSCORE { ret "_"; } + EOF { ret "<eof>"; } + } +} + + +pure fn can_begin_expr(t: token) -> bool { + alt t { + LPAREN { true } + LBRACE { true } + LBRACKET { true } + IDENT(_, _) { true } + UNDERSCORE { true } + TILDE { true } + LIT_INT(_, _) { true } + LIT_UINT(_, _) { true } + LIT_FLOAT(_, _) { true } + LIT_STR(_) { true } + POUND { true } + AT { true } + NOT { true } + BINOP(MINUS) { true } + BINOP(STAR) { true } + BINOP(AND) { true } + MOD_SEP { true } + _ { false } + } +} + +fn is_lit(t: token::token) -> bool { + ret alt t { + token::LIT_INT(_, _) { true } + token::LIT_UINT(_, _) { true } + token::LIT_FLOAT(_, _) { true } + token::LIT_STR(_) { true } + _ { false } + } +} + +fn is_ident(t: token::token) -> bool { + alt t { token::IDENT(_, _) { ret true; } _ { } } + ret false; +} + +fn is_plain_ident(t: token::token) -> bool { + ret alt t { token::IDENT(_, false) { true } _ { false } }; +} + +fn is_bar(t: token::token) -> bool { + alt t { token::BINOP(token::OR) | token::OROR { true } _ { false } } +} + +#[doc = " +All the valid words that have meaning in the Rust language. + +Rust keywords are either 'contextual' or 'restricted'. Contextual +keywords may be used as identifiers because their appearance in +the grammar is unambiguous. Restricted keywords may not appear +in positions that might otherwise contain _value identifiers_. +"] +fn keyword_table() -> hashmap<str, ()> { + let keywords = str_hash(); + for contextual_keyword_table().each_key {|word| + keywords.insert(word, ()); + } + for restricted_keyword_table().each_key {|word| + keywords.insert(word, ()); + } + ret keywords; +} + +#[doc = "Keywords that may be used as identifiers"] +fn contextual_keyword_table() -> hashmap<str, ()> { + let words = str_hash(); + let keys = [ + "as", + "bind", + "else", + "implements", + "move", + "of", + "priv", "pub", + "self", "send", "static", + "to", + "use", + "with" + ]; + for keys.each {|word| + words.insert(word, ()); + } + words +} + +#[doc = " +Keywords that may not appear in any position that might otherwise contain a +_value identifier_. Restricted keywords may still be used as other types of +identifiers. + +Reasons: + +* For some (most?), if used at the start of a line, they will cause the line + to be interpreted as a specific kind of statement, which would be confusing. + +* `true` or `false` as identifiers would always be shadowed by + the boolean constants +"] +fn restricted_keyword_table() -> hashmap<str, ()> { + let words = str_hash(); + let keys = [ + "alt", + "assert", + "be", "break", + "check", "claim", "class", "const", "cont", "copy", "crust", + "drop", + "else", "enum", "export", + "fail", "false", "fn", "for", + "if", "iface", "impl", "import", + "let", "log", "loop", + "mod", "mut", + "native", "new", + "pure", + "resource", "ret", + "true", "trait", "type", + "unchecked", "unsafe", + "while" + ]; + for keys.each {|word| + words.insert(word, ()); + } + words +} + +// Local Variables: +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: |
