Add a b"xx" byte string literal of type &'static [u8].

author: Simon Sapin <simon.sapin@exyr.org> 2014-06-07 15:32:01 +0100
committer: Simon Sapin <simon.sapin@exyr.org> 2014-06-17 23:43:18 +0200
commit: d7e01b5809cd600a30bab29da698acb3d1b52409 (patch)
tree: 286205104bc60273b9dae149b21c10a4b94a1764 /src/libsyntax/parse/lexer
parent: bccdba02960b3cd428addbc2c856065ebb81eb04 (diff)
download: rust-d7e01b5809cd600a30bab29da698acb3d1b52409.tar.gz
rust-d7e01b5809cd600a30bab29da698acb3d1b52409.zip
1 files changed, 111 insertions, 48 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 7e4cb195cea..59bcf059fcd 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -654,7 +654,8 @@ impl<'a> StringReader<'a> {
             // Note: r as in r" or r#" is part of a raw string literal,
             // b as in b' is part of a byte literal.
             // They are not identifiers, and are handled further down.
-           ('r', Some('"')) | ('r', Some('#')) | ('b', Some('\'')) => false,
+           ('r', Some('"')) | ('r', Some('#')) |
+           ('b', Some('"')) | ('b', Some('\'')) => false,
            _ => true
         } {
             let start = self.last_pos;
@@ -859,62 +860,124 @@ impl<'a> StringReader<'a> {
           }
           'b' => {
             self.bump();
-            assert!(self.curr_is('\''), "Should have been a token::IDENT");
-            self.bump();
-            let start = self.last_pos;
-
-            // the eof will be picked up by the final `'` check below
-            let mut c2 = self.curr.unwrap_or('\x00');
-            self.bump();
+            return match self.curr {
+                Some('\'') => parse_byte(self),
+                Some('"') => parse_byte_string(self),
+                _ => unreachable!()  // Should have been a token::IDENT above.
+            };
 
-            match c2 {
-                '\\' => {
-                    // '\X' for some X must be a character constant:
-                    let escaped = self.curr;
-                    let escaped_pos = self.last_pos;
-                    self.bump();
-                    match escaped {
-                        None => {}
-                        Some(e) => {
-                            c2 = match e {
-                                'n' => '\n',
-                                'r' => '\r',
-                                't' => '\t',
-                                '\\' => '\\',
-                                '\'' => '\'',
-                                '"' => '"',
-                                '0' => '\x00',
-                                'x' => self.scan_numeric_escape(2u, '\''),
-                                c2 => {
-                                    self.err_span_char(escaped_pos, self.last_pos,
-                                                       "unknown byte escape", c2);
-                                    c2
+            fn parse_byte(self_: &mut StringReader) -> token::Token {
+                self_.bump();
+                let start = self_.last_pos;
+
+                // the eof will be picked up by the final `'` check below
+                let mut c2 = self_.curr.unwrap_or('\x00');
+                self_.bump();
+
+                match c2 {
+                    '\\' => {
+                        // '\X' for some X must be a character constant:
+                        let escaped = self_.curr;
+                        let escaped_pos = self_.last_pos;
+                        self_.bump();
+                        match escaped {
+                            None => {}
+                            Some(e) => {
+                                c2 = match e {
+                                    'n' => '\n',
+                                    'r' => '\r',
+                                    't' => '\t',
+                                    '\\' => '\\',
+                                    '\'' => '\'',
+                                    '"' => '"',
+                                    '0' => '\x00',
+                                    'x' => self_.scan_numeric_escape(2u, '\''),
+                                    c2 => {
+                                        self_.err_span_char(
+                                            escaped_pos, self_.last_pos,
+                                            "unknown byte escape", c2);
+                                        c2
+                                    }
                                 }
                             }
                         }
                     }
+                    '\t' | '\n' | '\r' | '\'' => {
+                        self_.err_span_char( start, self_.last_pos,
+                            "byte constant must be escaped", c2);
+                    }
+                    _ => if c2 > '\x7F' {
+                        self_.err_span_char( start, self_.last_pos,
+                            "byte constant must be ASCII. \
+                             Use a \\xHH escape for a non-ASCII byte", c2);
+                    }
                 }
-                '\t' | '\n' | '\r' | '\'' => {
-                    self.err_span_char( start, self.last_pos,
-                        "byte constant must be escaped", c2);
-                }
-                _ if c2 > '\x7F' => {
-                    self.err_span_char( start, self.last_pos,
-                        "byte constant must be ASCII. \
-                         Use a \\xHH escape for a non-ASCII byte", c2);
+                if !self_.curr_is('\'') {
+                    // Byte offsetting here is okay because the
+                    // character before position `start` are an
+                    // ascii single quote and ascii 'b'.
+                    self_.fatal_span_verbose(
+                        start - BytePos(2), self_.last_pos,
+                        "unterminated byte constant".to_string());
                 }
-                _ => {}
+                self_.bump(); // advance curr past token
+                return token::LIT_BYTE(c2 as u8);
             }
-            if !self.curr_is('\'') {
-                self.fatal_span_verbose(
-                                   // Byte offsetting here is okay because the
-                                   // character before position `start` are an
-                                   // ascii single quote and ascii 'b'.
-                                   start - BytePos(2), self.last_pos,
-                                   "unterminated byte constant".to_string());
+
+            fn parse_byte_string(self_: &mut StringReader) -> token::Token {
+                self_.bump();
+                let start = self_.last_pos;
+                let mut value = Vec::new();
+                while !self_.curr_is('"') {
+                    if self_.is_eof() {
+                        self_.fatal_span(start, self_.last_pos,
+                                         "unterminated double quote byte string");
+                    }
+
+                    let ch = self_.curr.unwrap();
+                    self_.bump();
+                    match ch {
+                      '\\' => {
+                        if self_.is_eof() {
+                            self_.fatal_span(start, self_.last_pos,
+                                             "unterminated double quote byte string");
+                        }
+
+                        let escaped = self_.curr.unwrap();
+                        let escaped_pos = self_.last_pos;
+                        self_.bump();
+                        match escaped {
+                          'n' => value.push('\n' as u8),
+                          'r' => value.push('\r' as u8),
+                          't' => value.push('\t' as u8),
+                          '\\' => value.push('\\' as u8),
+                          '\'' => value.push('\'' as u8),
+                          '"' => value.push('"' as u8),
+                          '\n' => self_.consume_whitespace(),
+                          '0' => value.push(0),
+                          'x' => {
+                            value.push(self_.scan_numeric_escape(2u, '"') as u8);
+                          }
+                          c2 => {
+                            self_.err_span_char(escaped_pos, self_.last_pos,
+                                                "unknown byte string escape", c2);
+                          }
+                        }
+                      }
+                      _ => {
+                        if ch <= '\x7F' {
+                            value.push(ch as u8)
+                        } else {
+                            self_.err_span_char(self_.last_pos, self_.last_pos,
+                                "byte string must be ASCII. \
+                                 Use a \\xHH escape for a non-ASCII byte", ch);
+                        }
+                      }
+                    }
+                }
+                self_.bump();
+                return token::LIT_BINARY(Rc::new(value));
             }
-            self.bump(); // advance curr past token
-            return token::LIT_BYTE(c2 as u8);
           }
           '"' => {
             let mut accum_str = String::new();
author	Simon Sapin <simon.sapin@exyr.org>	2014-06-07 15:32:01 +0100
committer	Simon Sapin <simon.sapin@exyr.org>	2014-06-17 23:43:18 +0200
commit	d7e01b5809cd600a30bab29da698acb3d1b52409 (patch)
tree	286205104bc60273b9dae149b21c10a4b94a1764 /src/libsyntax/parse/lexer
parent	bccdba02960b3cd428addbc2c856065ebb81eb04 (diff)
download	rust-d7e01b5809cd600a30bab29da698acb3d1b52409.tar.gz rust-d7e01b5809cd600a30bab29da698acb3d1b52409.zip