about summary refs log tree commit diff
path: root/src/librustc_parse
diff options
context:
space:
mode:
authorCharles Lew <crlf0710@gmail.com>2019-11-23 22:33:40 +0800
committerCharles Lew <crlf0710@gmail.com>2019-12-26 13:12:58 +0800
commit0bcddfe23a4b96d970f953b99a2e4f28ece3c0d6 (patch)
tree21f2e652d6e3d8111a7a075a2b931990c94312ca /src/librustc_parse
parentb13d65ab9274323de72539556f2c5f7eef29f4a1 (diff)
downloadrust-0bcddfe23a4b96d970f953b99a2e4f28ece3c0d6.tar.gz
rust-0bcddfe23a4b96d970f953b99a2e4f28ece3c0d6.zip
Normalize identifiers in librustc_parse.
Diffstat (limited to 'src/librustc_parse')
-rw-r--r--src/librustc_parse/Cargo.toml1
-rw-r--r--src/librustc_parse/lexer/mod.rs17
2 files changed, 16 insertions, 2 deletions
diff --git a/src/librustc_parse/Cargo.toml b/src/librustc_parse/Cargo.toml
index fb5cb742ab6..73458a444f4 100644
--- a/src/librustc_parse/Cargo.toml
+++ b/src/librustc_parse/Cargo.toml
@@ -20,3 +20,4 @@ rustc_error_codes = { path = "../librustc_error_codes" }
 smallvec = { version = "1.0", features = ["union", "may_dangle"] }
 syntax_pos = { path = "../libsyntax_pos" }
 syntax = { path = "../libsyntax" }
+unicode-normalization = "0.1.11"
diff --git a/src/librustc_parse/lexer/mod.rs b/src/librustc_parse/lexer/mod.rs
index e5d3927af86..d69cd14d544 100644
--- a/src/librustc_parse/lexer/mod.rs
+++ b/src/librustc_parse/lexer/mod.rs
@@ -220,8 +220,7 @@ impl<'a> StringReader<'a> {
                 if is_raw_ident {
                     ident_start = ident_start + BytePos(2);
                 }
-                // FIXME: perform NFKC normalization here. (Issue #2253)
-                let sym = self.symbol_from(ident_start);
+                let sym = self.nfc_symbol_from(ident_start);
                 if is_raw_ident {
                     let span = self.mk_sp(start, self.pos);
                     if !sym.can_be_raw() {
@@ -470,6 +469,20 @@ impl<'a> StringReader<'a> {
         Symbol::intern(self.str_from_to(start, end))
     }
 
+    /// As symbol_from, with the text normalized into Unicode NFC form.
+    fn nfc_symbol_from(&self, start: BytePos) -> Symbol {
+        use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
+        debug!("taking an normalized ident from {:?} to {:?}", start, self.pos);
+        let sym = self.str_from(start);
+        match is_nfc_quick(sym.chars()) {
+            IsNormalized::Yes => Symbol::intern(sym),
+            _ => {
+                let sym_str: String = sym.chars().nfc().collect();
+                Symbol::intern(&sym_str)
+            }
+        }
+    }
+
     /// Slice of the source text spanning from `start` up to but excluding `end`.
     fn str_from_to(&self, start: BytePos, end: BytePos) -> &str {
         &self.src[self.src_index(start)..self.src_index(end)]