diff options
| author | Charles Lew <crlf0710@gmail.com> | 2019-11-23 22:33:40 +0800 |
|---|---|---|
| committer | Charles Lew <crlf0710@gmail.com> | 2019-12-26 13:12:58 +0800 |
| commit | 0bcddfe23a4b96d970f953b99a2e4f28ece3c0d6 (patch) | |
| tree | 21f2e652d6e3d8111a7a075a2b931990c94312ca /src/librustc_parse | |
| parent | b13d65ab9274323de72539556f2c5f7eef29f4a1 (diff) | |
| download | rust-0bcddfe23a4b96d970f953b99a2e4f28ece3c0d6.tar.gz rust-0bcddfe23a4b96d970f953b99a2e4f28ece3c0d6.zip | |
Normalize identifiers in librustc_parse.
Diffstat (limited to 'src/librustc_parse')
| -rw-r--r-- | src/librustc_parse/Cargo.toml | 1 | ||||
| -rw-r--r-- | src/librustc_parse/lexer/mod.rs | 17 |
2 files changed, 16 insertions, 2 deletions
diff --git a/src/librustc_parse/Cargo.toml b/src/librustc_parse/Cargo.toml index fb5cb742ab6..73458a444f4 100644 --- a/src/librustc_parse/Cargo.toml +++ b/src/librustc_parse/Cargo.toml @@ -20,3 +20,4 @@ rustc_error_codes = { path = "../librustc_error_codes" } smallvec = { version = "1.0", features = ["union", "may_dangle"] } syntax_pos = { path = "../libsyntax_pos" } syntax = { path = "../libsyntax" } +unicode-normalization = "0.1.11" diff --git a/src/librustc_parse/lexer/mod.rs b/src/librustc_parse/lexer/mod.rs index e5d3927af86..d69cd14d544 100644 --- a/src/librustc_parse/lexer/mod.rs +++ b/src/librustc_parse/lexer/mod.rs @@ -220,8 +220,7 @@ impl<'a> StringReader<'a> { if is_raw_ident { ident_start = ident_start + BytePos(2); } - // FIXME: perform NFKC normalization here. (Issue #2253) - let sym = self.symbol_from(ident_start); + let sym = self.nfc_symbol_from(ident_start); if is_raw_ident { let span = self.mk_sp(start, self.pos); if !sym.can_be_raw() { @@ -470,6 +469,20 @@ impl<'a> StringReader<'a> { Symbol::intern(self.str_from_to(start, end)) } + /// As symbol_from, with the text normalized into Unicode NFC form. + fn nfc_symbol_from(&self, start: BytePos) -> Symbol { + use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization}; + debug!("taking an normalized ident from {:?} to {:?}", start, self.pos); + let sym = self.str_from(start); + match is_nfc_quick(sym.chars()) { + IsNormalized::Yes => Symbol::intern(sym), + _ => { + let sym_str: String = sym.chars().nfc().collect(); + Symbol::intern(&sym_str) + } + } + } + /// Slice of the source text spanning from `start` up to but excluding `end`. fn str_from_to(&self, start: BytePos, end: BytePos) -> &str { &self.src[self.src_index(start)..self.src_index(end)] |
