about summary refs log tree commit diff
path: root/src/libsyntax/parse/lexer
diff options
context:
space:
mode:
authorMalo Jaffré <jaffre.malo@gmail.com>2017-08-17 20:02:13 +0200
committerMalo Jaffré <jaffre.malo@gmail.com>2017-08-17 20:03:32 +0200
commitd4e0e5228111cd47294342a60b5f8af44c65e206 (patch)
tree5364e51b1733e40ff74fb64564ff76e2603e2778 /src/libsyntax/parse/lexer
parentdd39ecf368a3cdb937e129f36a2a342d0c9358f0 (diff)
downloadrust-d4e0e5228111cd47294342a60b5f8af44c65e206.tar.gz
rust-d4e0e5228111cd47294342a60b5f8af44c65e206.zip
Accept underscores in unicode escapes
Fixes #43692.
Diffstat (limited to 'src/libsyntax/parse/lexer')
-rw-r--r--src/libsyntax/parse/lexer/mod.rs87
1 files changed, 47 insertions, 40 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 527d2e41396..a80b7a112b0 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -963,60 +963,67 @@ impl<'a> StringReader<'a> {
         true
     }
 
-    /// Scan over a \u{...} escape
+    /// Scan over a `\u{...}` escape
     ///
-    /// At this point, we have already seen the \ and the u, the { is the current character. We
-    /// will read at least one digit, and up to 6, and pass over the }.
+    /// At this point, we have already seen the `\` and the `u`, the `{` is the current character.
+    /// We will read a hex number (with `_` separators), with 1 to 6 actual digits,
+    /// and pass over the `}`.
     fn scan_unicode_escape(&mut self, delim: char) -> bool {
         self.bump(); // past the {
         let start_bpos = self.pos;
-        let mut count = 0;
-        let mut accum_int = 0;
         let mut valid = true;
 
-        while !self.ch_is('}') && count <= 6 {
-            let c = match self.ch {
-                Some(c) => c,
-                None => {
-                    panic!(self.fatal_span_(start_bpos,
-                                            self.pos,
-                                            "unterminated unicode escape (found EOF)"));
-                }
-            };
-            accum_int *= 16;
-            accum_int += c.to_digit(16).unwrap_or_else(|| {
-                if c == delim {
-                    panic!(self.fatal_span_(self.pos,
-                                            self.next_pos,
-                                            "unterminated unicode escape (needed a `}`)"));
-                } else {
-                    self.err_span_char(self.pos,
-                                       self.next_pos,
-                                       "invalid character in unicode escape",
-                                       c);
-                }
-                valid = false;
-                0
-            });
-            self.bump();
-            count += 1;
+        if let Some('_') = self.ch {
+            // disallow leading `_`
+            self.err_span_(self.pos,
+                           self.next_pos,
+                           "invalid start of unicode escape");
+            valid = false;
         }
 
+        let count = self.scan_digits(16, 16);
+
         if count > 6 {
             self.err_span_(start_bpos,
                            self.pos,
-                           "overlong unicode escape (can have at most 6 hex digits)");
+                           "overlong unicode escape (must have at most 6 hex digits)");
             valid = false;
         }
-
-        if valid && (char::from_u32(accum_int).is_none() || count == 0) {
-            self.err_span_(start_bpos,
-                           self.pos,
-                           "invalid unicode character escape");
-            valid = false;
+        loop {
+            match self.ch {
+                Some('}') => {
+                    if valid && count == 0 {
+                        self.err_span_(start_bpos,
+                                       self.pos,
+                                       "empty unicode escape (must have at least 1 hex digit)");
+                        valid = false;
+                    }
+                    self.bump(); // past the ending `}`
+                    break;
+                },
+                Some(c) => {
+                    if c == delim {
+                        self.err_span_(self.pos,
+                                       self.pos,
+                                       "unterminated unicode escape (needed a `}`)");
+                        valid = false;
+                        break;
+                    } else if valid {
+                        self.err_span_char(start_bpos,
+                                           self.pos,
+                                           "invalid character in unicode escape",
+                                           c);
+                        valid = false;
+                    }
+                },
+                None => {
+                    panic!(self.fatal_span_(start_bpos,
+                                            self.pos,
+                                            "unterminated unicode escape (found EOF)"));
+                }
+            }
+            self.bump();
         }
-
-        self.bump(); // past the ending }
         valid
     }