rollup merge of #19480: cmr/es6-escape

First half of bootstrapping https://github.com/rust-lang/rfcs/pull/446
author: Corey Richardson <corey@octayn.net> 2014-12-05 10:07:18 -0800
committer: Corey Richardson <corey@octayn.net> 2014-12-05 10:07:18 -0800
commit: 1b2b24a6afbb5ba9eee3f6594eb6926d022704ad (patch)
tree: c110e8738140a670c82a58943d8830d77346b41e /src/libsyntax/parse
parent: 7464a29a37e08626c4b9dbb479e4525e108f0ca6 (diff)
parent: 2e1a50121ef265214c5e2a7d82fe40b4928575ab (diff)
download: rust-1b2b24a6afbb5ba9eee3f6594eb6926d022704ad.tar.gz
rust-1b2b24a6afbb5ba9eee3f6594eb6926d022704ad.zip
2 files changed, 95 insertions, 8 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 57983a6dee6..27b65e0f527 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -764,6 +764,15 @@ impl<'a> StringReader<'a> {
         }
     }
 
+    // SNAP c9f6d69
+    #[allow(unused)]
+    fn old_escape_warning(&mut self, sp: Span) {
+        self.span_diagnostic
+            .span_warn(sp, "\\U00ABCD12 and \\uABCD escapes are deprecated");
+        self.span_diagnostic
+            .span_help(sp, "use \\u{ABCD12} escapes instead");
+    }
+
     /// Scan for a single (possibly escaped) byte or char
     /// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
     /// `start` is the position of `first_source_char`, which is already consumed.
@@ -782,12 +791,24 @@ impl<'a> StringReader<'a> {
                     Some(e) => {
                         return match e {
                             'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
-                            'x' => self.scan_hex_digits(2u, delim, !ascii_only),
+                            'x' => self.scan_byte_escape(delim, !ascii_only),
                             'u' if !ascii_only => {
-                                self.scan_hex_digits(4u, delim, false)
+                                if self.curr == Some('{') {
+                                    self.scan_unicode_escape(delim)
+                                } else {
+                                    let res = self.scan_hex_digits(4u, delim, false);
+                                    // SNAP c9f6d69
+                                    //let sp = codemap::mk_sp(escaped_pos, self.last_pos);
+                                    //self.old_escape_warning(sp);
+                                    res
+                                }
                             }
                             'U' if !ascii_only => {
-                                self.scan_hex_digits(8u, delim, false)
+                                let res = self.scan_hex_digits(8u, delim, false);
+                                // SNAP c9f6d69
+                                //let sp = codemap::mk_sp(escaped_pos, self.last_pos);
+                                //self.old_escape_warning(sp);
+                                res
                             }
                             '\n' if delim == '"' => {
                                 self.consume_whitespace();
@@ -848,6 +869,56 @@ impl<'a> StringReader<'a> {
         true
     }
 
+    /// Scan over a \u{...} escape
+    ///
+    /// At this point, we have already seen the \ and the u, the { is the current character. We
+    /// will read at least one digit, and up to 6, and pass over the }.
+    fn scan_unicode_escape(&mut self, delim: char) -> bool {
+        self.bump(); // past the {
+        let start_bpos = self.last_pos;
+        let mut count: uint = 0;
+        let mut accum_int = 0;
+
+        while !self.curr_is('}') && count <= 6 {
+            let c = match self.curr {
+                Some(c) => c,
+                None => {
+                    self.fatal_span_(start_bpos, self.last_pos,
+                                     "unterminated unicode escape (found EOF)");
+                }
+            };
+            accum_int *= 16;
+            accum_int += c.to_digit(16).unwrap_or_else(|| {
+                if c == delim {
+                    self.fatal_span_(self.last_pos, self.pos,
+                                     "unterminated unicode escape (needed a `}`)");
+                } else {
+                    self.fatal_span_char(self.last_pos, self.pos,
+                                   "illegal character in unicode escape", c);
+                }
+            }) as u32;
+            self.bump();
+            count += 1;
+        }
+
+        if count > 6 {
+            self.fatal_span_(start_bpos, self.last_pos,
+                          "overlong unicode escape (can have at most 6 hex digits)");
+        }
+
+        self.bump(); // past the ending }
+
+        let mut valid = count >= 1 && count <= 6;
+        if char::from_u32(accum_int).is_none() {
+            valid = false;
+        }
+
+        if !valid {
+            self.fatal_span_(start_bpos, self.last_pos, "illegal unicode character escape");
+        }
+        valid
+    }
+
     /// Scan over a float exponent.
     fn scan_float_exponent(&mut self) {
         if self.curr_is('e') || self.curr_is('E') {
@@ -1273,6 +1344,10 @@ impl<'a> StringReader<'a> {
         return token::Byte(id);
     }
 
+    fn scan_byte_escape(&mut self, delim: char, below_0x7f_only: bool) -> bool {
+        self.scan_hex_digits(2, delim, below_0x7f_only)
+    }
+
     fn scan_byte_string(&mut self) -> token::Lit {
         self.bump();
         let start = self.last_pos;
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
index b46f7cdfe22..8d0c2de048a 100644
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -393,16 +393,28 @@ pub fn char_lit(lit: &str) -> (char, int) {
     let msg = format!("lexer should have rejected a bad character escape {}", lit);
     let msg2 = msg.as_slice();
 
-    let esc: |uint| -> Option<(char, int)> = |len|
+    fn esc(len: uint, lit: &str) -> Option<(char, int)> {
         num::from_str_radix(lit.slice(2, len), 16)
         .and_then(char::from_u32)
-        .map(|x| (x, len as int));
+        .map(|x| (x, len as int))
+    }
+
+    let unicode_escape: || -> Option<(char, int)> = ||
+        if lit.as_bytes()[2] == b'{' {
+            let idx = lit.find('}').expect(msg2);
+            let subslice = lit.slice(3, idx);
+            num::from_str_radix(subslice, 16)
+                .and_then(char::from_u32)
+                .map(|x| (x, subslice.char_len() as int + 4))
+        } else {
+            esc(6, lit)
+        };
 
     // Unicode escapes
     return match lit.as_bytes()[1] as char {
-        'x' | 'X' => esc(4),
-        'u' => esc(6),
-        'U' => esc(10),
+        'x' | 'X' => esc(4, lit),
+        'u' => unicode_escape(),
+        'U' => esc(10, lit),
         _ => None,
     }.expect(msg2);
 }
author	Corey Richardson <corey@octayn.net>	2014-12-05 10:07:18 -0800
committer	Corey Richardson <corey@octayn.net>	2014-12-05 10:07:18 -0800
commit	1b2b24a6afbb5ba9eee3f6594eb6926d022704ad (patch)
tree	c110e8738140a670c82a58943d8830d77346b41e /src/libsyntax/parse
parent	7464a29a37e08626c4b9dbb479e4525e108f0ca6 (diff)
parent	2e1a50121ef265214c5e2a7d82fe40b4928575ab (diff)
download	rust-1b2b24a6afbb5ba9eee3f6594eb6926d022704ad.tar.gz rust-1b2b24a6afbb5ba9eee3f6594eb6926d022704ad.zip