diff options
| author | Graydon Hoare <graydon@mozilla.com> | 2011-12-29 14:45:18 -0800 |
|---|---|---|
| committer | Graydon Hoare <graydon@mozilla.com> | 2011-12-29 14:45:18 -0800 |
| commit | 36c55b20a89d45f394fceb5e83dd3a032e37810a (patch) | |
| tree | c57100f90f2df540829f1e6ea0520f29ef4b07ec /src/comp/syntax/parse | |
| parent | 5fd0a3be0cde0278b5927df8f670a25561eaa597 (diff) | |
| download | rust-36c55b20a89d45f394fceb5e83dd3a032e37810a.tar.gz rust-36c55b20a89d45f394fceb5e83dd3a032e37810a.zip | |
Add support in lexer for utf8 identifiers. No NFKC logic in char yet.
Diffstat (limited to 'src/comp/syntax/parse')
| -rw-r--r-- | src/comp/syntax/parse/lexer.rs | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/src/comp/syntax/parse/lexer.rs b/src/comp/syntax/parse/lexer.rs index 338c29ddb07..bee3d7783cb 100644 --- a/src/comp/syntax/parse/lexer.rs +++ b/src/comp/syntax/parse/lexer.rs @@ -309,14 +309,16 @@ fn next_token(rdr: reader) -> {tok: token::token, chpos: uint, bpos: uint} { fn next_token_inner(rdr: reader) -> token::token { let accum_str = ""; let c = rdr.curr(); - if is_alpha(c) || c == '_' { - while is_alnum(c) || c == '_' { + if char::is_XID_start(c) || c == '_' { + while char::is_XID_continue(c) { str::push_char(accum_str, c); rdr.bump(); c = rdr.curr(); } if str::eq(accum_str, "_") { ret token::UNDERSCORE; } let is_mod_name = c == ':' && rdr.next() == ':'; + + // FIXME: perform NFKC normalization here. ret token::IDENT(interner::intern::<str>(*rdr.get_interner(), accum_str), is_mod_name); } |
