rollup merge of #19103: huonw/literal-suffixes

Futureproof Rust for fancier suffixed literals. The Rust compiler tokenises a literal followed immediately (no whitespace) by an identifier as a single token: (for example) the text sequences `"foo"bar`, `1baz` and `1u1024` are now a single token rather than the pairs `"foo"` `bar`, `1` `baz` and `1u` `1024` respectively. The compiler rejects all such suffixes in the parser, except for the 12 numeric suffixes we have now. I'm fairly sure this will affect very few programs, since it's not currently legal to have `<literal><identifier>` in a Rust program, except in a macro invocation. Any macro invocation relying on this behaviour can simply separate the two tokens with whitespace: `foo!("bar"baz)` becomes `foo!("bar" baz)`. This implements [RFC 463](https://github.com/rust-lang/rfcs/blob/master/text/0463-future-proof-literal-suffixes.md), and so closes https://github.com/rust-lang/rust/issues/19088.
author: Jakub Bukaj <jakub@jakub.cc> 2014-11-19 22:41:05 +0100
committer: Jakub Bukaj <jakub@jakub.cc> 2014-11-19 22:41:05 +0100
commit: f71b852d3865e878b953cb280724fb2ce1203103 (patch)
tree: 4cf08dabab8901fcbac433e2d3f964abeaab2d94 /src/libsyntax/parse
parent: 00ffcca0164504ecae8453750a053e2b7206264a (diff)
parent: a11078f8c3a69f0d4ea20ca10aaf96622770615b (diff)
download: rust-f71b852d3865e878b953cb280724fb2ce1203103.tar.gz
rust-f71b852d3865e878b953cb280724fb2ce1203103.zip
4 files changed, 283 insertions, 218 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 01a66243a96..fbca4868255 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -369,6 +369,25 @@ impl<'a> StringReader<'a> {
         self.nextnextch() == Some(c)
     }
 
+    /// Eats <XID_start><XID_continue>*, if possible.
+    fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
+        if !ident_start(self.curr) {
+            return None
+        }
+        let start = self.last_pos;
+        while ident_continue(self.curr) {
+            self.bump();
+        }
+
+        self.with_str_from(start, |string| {
+            if string == "_" {
+                None
+            } else {
+                Some(token::intern(string))
+            }
+        })
+    }
+
     /// PRECONDITION: self.curr is not whitespace
     /// Eats any kind of comment.
     fn scan_comment(&mut self) -> Option<TokenAndSpan> {
@@ -638,7 +657,7 @@ impl<'a> StringReader<'a> {
     }
 
     /// Lex a LIT_INTEGER or a LIT_FLOAT
-    fn scan_number(&mut self, c: char) -> token::Token {
+    fn scan_number(&mut self, c: char) -> token::Lit {
         let mut num_digits;
         let mut base = 10;
         let start_bpos = self.last_pos;
@@ -653,19 +672,9 @@ impl<'a> StringReader<'a> {
                 '0'...'9' | '_' | '.' => {
                     num_digits = self.scan_digits(10) + 1;
                 }
-                'u' | 'i' => {
-                    self.scan_int_suffix();
-                    return token::LitInteger(self.name_from(start_bpos));
-                },
-                'f' => {
-                    let last_pos = self.last_pos;
-                    self.scan_float_suffix();
-                    self.check_float_base(start_bpos, last_pos, base);
-                    return token::LitFloat(self.name_from(start_bpos));
-                }
                 _ => {
                     // just a 0
-                    return token::LitInteger(self.name_from(start_bpos));
+                    return token::Integer(self.name_from(start_bpos));
                 }
             }
         } else if c.is_digit_radix(10) {
@@ -676,9 +685,7 @@ impl<'a> StringReader<'a> {
 
         if num_digits == 0 {
             self.err_span_(start_bpos, self.last_pos, "no valid digits found for number");
-            // eat any suffix
-            self.scan_int_suffix();
-            return token::LitInteger(token::intern("0"));
+            return token::Integer(token::intern("0"));
         }
 
         // might be a float, but don't be greedy if this is actually an
@@ -692,29 +699,20 @@ impl<'a> StringReader<'a> {
             if self.curr.unwrap_or('\0').is_digit_radix(10) {
                 self.scan_digits(10);
                 self.scan_float_exponent();
-                self.scan_float_suffix();
             }
             let last_pos = self.last_pos;
             self.check_float_base(start_bpos, last_pos, base);
-            return token::LitFloat(self.name_from(start_bpos));
-        } else if self.curr_is('f') {
-            // or it might be an integer literal suffixed as a float
-            self.scan_float_suffix();
-            let last_pos = self.last_pos;
-            self.check_float_base(start_bpos, last_pos, base);
-            return token::LitFloat(self.name_from(start_bpos));
+            return token::Float(self.name_from(start_bpos));
         } else {
             // it might be a float if it has an exponent
             if self.curr_is('e') || self.curr_is('E') {
                 self.scan_float_exponent();
-                self.scan_float_suffix();
                 let last_pos = self.last_pos;
                 self.check_float_base(start_bpos, last_pos, base);
-                return token::LitFloat(self.name_from(start_bpos));
+                return token::Float(self.name_from(start_bpos));
             }
             // but we certainly have an integer!
-            self.scan_int_suffix();
-            return token::LitInteger(self.name_from(start_bpos));
+            return token::Integer(self.name_from(start_bpos));
         }
     }
 
@@ -850,55 +848,6 @@ impl<'a> StringReader<'a> {
         true
     }
 
-    /// Scan over an int literal suffix.
-    fn scan_int_suffix(&mut self) {
-        match self.curr {
-            Some('i') | Some('u') => {
-                self.bump();
-
-                if self.curr_is('8') {
-                    self.bump();
-                } else if self.curr_is('1') {
-                    if !self.nextch_is('6') {
-                        self.err_span_(self.last_pos, self.pos,
-                                      "illegal int suffix");
-                    } else {
-                        self.bump(); self.bump();
-                    }
-                } else if self.curr_is('3') {
-                    if !self.nextch_is('2') {
-                        self.err_span_(self.last_pos, self.pos,
-                                      "illegal int suffix");
-                    } else {
-                        self.bump(); self.bump();
-                    }
-                } else if self.curr_is('6') {
-                    if !self.nextch_is('4') {
-                        self.err_span_(self.last_pos, self.pos,
-                                      "illegal int suffix");
-                    } else {
-                        self.bump(); self.bump();
-                    }
-                }
-            },
-            _ => { }
-        }
-    }
-
-    /// Scan over a float literal suffix
-    fn scan_float_suffix(&mut self) {
-        if self.curr_is('f') {
-            if (self.nextch_is('3') && self.nextnextch_is('2'))
-            || (self.nextch_is('6') && self.nextnextch_is('4')) {
-                self.bump();
-                self.bump();
-                self.bump();
-            } else {
-                self.err_span_(self.last_pos, self.pos, "illegal float suffix");
-            }
-        }
-    }
-
     /// Scan over a float exponent.
     fn scan_float_exponent(&mut self) {
         if self.curr_is('e') || self.curr_is('E') {
@@ -967,7 +916,10 @@ impl<'a> StringReader<'a> {
         }
 
         if is_dec_digit(c) {
-            return self.scan_number(c.unwrap());
+            let num = self.scan_number(c.unwrap());
+            let suffix = self.scan_optional_raw_name();
+            debug!("next_token_inner: scanned number {}, {}", num, suffix);
+            return token::Literal(num, suffix)
         }
 
         if self.read_embedded_ident {
@@ -1126,17 +1078,19 @@ impl<'a> StringReader<'a> {
             }
             let id = if valid { self.name_from(start) } else { token::intern("0") };
             self.bump(); // advance curr past token
-            return token::LitChar(id);
+            let suffix = self.scan_optional_raw_name();
+            return token::Literal(token::Char(id), suffix);
           }
           'b' => {
             self.bump();
-            return match self.curr {
+            let lit = match self.curr {
                 Some('\'') => self.scan_byte(),
                 Some('"') => self.scan_byte_string(),
                 Some('r') => self.scan_raw_byte_string(),
                 _ => unreachable!()  // Should have been a token::Ident above.
             };
-
+            let suffix = self.scan_optional_raw_name();
+            return token::Literal(lit, suffix);
           }
           '"' => {
             let start_bpos = self.last_pos;
@@ -1157,7 +1111,8 @@ impl<'a> StringReader<'a> {
             let id = if valid { self.name_from(start_bpos + BytePos(1)) }
                      else { token::intern("??") };
             self.bump();
-            return token::LitStr(id);
+            let suffix = self.scan_optional_raw_name();
+            return token::Literal(token::Str_(id), suffix);
           }
           'r' => {
             let start_bpos = self.last_pos;
@@ -1224,7 +1179,8 @@ impl<'a> StringReader<'a> {
             } else {
                 token::intern("??")
             };
-            return token::LitStrRaw(id, hash_count);
+            let suffix = self.scan_optional_raw_name();
+            return token::Literal(token::StrRaw(id, hash_count), suffix);
           }
           '-' => {
             if self.nextch_is('>') {
@@ -1293,7 +1249,7 @@ impl<'a> StringReader<'a> {
      || (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
     }
 
-    fn scan_byte(&mut self) -> token::Token {
+    fn scan_byte(&mut self) -> token::Lit {
         self.bump();
         let start = self.last_pos;
 
@@ -1314,10 +1270,10 @@ impl<'a> StringReader<'a> {
 
         let id = if valid { self.name_from(start) } else { token::intern("??") };
         self.bump(); // advance curr past token
-        return token::LitByte(id);
+        return token::Byte(id);
     }
 
-    fn scan_byte_string(&mut self) -> token::Token {
+    fn scan_byte_string(&mut self) -> token::Lit {
         self.bump();
         let start = self.last_pos;
         let mut valid = true;
@@ -1336,10 +1292,10 @@ impl<'a> StringReader<'a> {
         }
         let id = if valid { self.name_from(start) } else { token::intern("??") };
         self.bump();
-        return token::LitBinary(id);
+        return token::Binary(id);
     }
 
-    fn scan_raw_byte_string(&mut self) -> token::Token {
+    fn scan_raw_byte_string(&mut self) -> token::Lit {
         let start_bpos = self.last_pos;
         self.bump();
         let mut hash_count = 0u;
@@ -1387,8 +1343,9 @@ impl<'a> StringReader<'a> {
             self.bump();
         }
         self.bump();
-        return token::LitBinaryRaw(self.name_from_to(content_start_bpos, content_end_bpos),
-                                     hash_count);
+        return token::BinaryRaw(self.name_from_to(content_start_bpos,
+                                                  content_end_bpos),
+                                hash_count);
     }
 }
 
@@ -1535,17 +1492,17 @@ mod test {
 
     #[test] fn character_a() {
         assert_eq!(setup(&mk_sh(), "'a'".to_string()).next_token().tok,
-                   token::LitChar(token::intern("a")));
+                   token::Literal(token::Char(token::intern("a")), None));
     }
 
     #[test] fn character_space() {
         assert_eq!(setup(&mk_sh(), "' '".to_string()).next_token().tok,
-                   token::LitChar(token::intern(" ")));
+                   token::Literal(token::Char(token::intern(" ")), None));
     }
 
     #[test] fn character_escaped() {
         assert_eq!(setup(&mk_sh(), "'\\n'".to_string()).next_token().tok,
-                   token::LitChar(token::intern("\\n")));
+                   token::Literal(token::Char(token::intern("\\n")), None));
     }
 
     #[test] fn lifetime_name() {
@@ -1557,7 +1514,41 @@ mod test {
         assert_eq!(setup(&mk_sh(),
                          "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token()
                                                                  .tok,
-                   token::LitStrRaw(token::intern("\"#a\\b\x00c\""), 3));
+                   token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None));
+    }
+
+    #[test] fn literal_suffixes() {
+        macro_rules! test {
+            ($input: expr, $tok_type: ident, $tok_contents: expr) => {{
+                assert_eq!(setup(&mk_sh(), format!("{}suffix", $input)).next_token().tok,
+                           token::Literal(token::$tok_type(token::intern($tok_contents)),
+                                          Some(token::intern("suffix"))));
+                // with a whitespace separator:
+                assert_eq!(setup(&mk_sh(), format!("{} suffix", $input)).next_token().tok,
+                           token::Literal(token::$tok_type(token::intern($tok_contents)),
+                                          None));
+            }}
+        }
+
+        test!("'a'", Char, "a");
+        test!("b'a'", Byte, "a");
+        test!("\"a\"", Str_, "a");
+        test!("b\"a\"", Binary, "a");
+        test!("1234", Integer, "1234");
+        test!("0b101", Integer, "0b101");
+        test!("0xABC", Integer, "0xABC");
+        test!("1.0", Float, "1.0");
+        test!("1.0e10", Float, "1.0e10");
+
+        assert_eq!(setup(&mk_sh(), "2u".to_string()).next_token().tok,
+                   token::Literal(token::Integer(token::intern("2")),
+                                  Some(token::intern("u"))));
+        assert_eq!(setup(&mk_sh(), "r###\"raw\"###suffix".to_string()).next_token().tok,
+                   token::Literal(token::StrRaw(token::intern("raw"), 3),
+                                  Some(token::intern("suffix"))));
+        assert_eq!(setup(&mk_sh(), "br###\"raw\"###suffix".to_string()).next_token().tok,
+                   token::Literal(token::BinaryRaw(token::intern("raw"), 3),
+                                  Some(token::intern("suffix"))));
     }
 
     #[test] fn line_doc_comments() {
@@ -1573,7 +1564,7 @@ mod test {
             token::Comment => { },
             _ => panic!("expected a comment!")
         }
-        assert_eq!(lexer.next_token().tok, token::LitChar(token::intern("a")));
+        assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")), None));
     }
 
 }
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
index 3ce49b9d7a3..96659031e6a 100644
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -511,28 +511,41 @@ pub fn raw_str_lit(lit: &str) -> String {
     res
 }
 
-pub fn float_lit(s: &str) -> ast::Lit_ {
-    debug!("float_lit: {}", s);
-    // FIXME #2252: bounds checking float literals is defered until trans
-    let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
-    let s = s2.as_slice();
-
-    let mut ty = None;
-
-    if s.ends_with("f32") {
-        ty = Some(ast::TyF32);
-    } else if s.ends_with("f64") {
-        ty = Some(ast::TyF64);
-    }
+// check if `s` looks like i32 or u1234 etc.
+fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool {
+    s.len() > 1 &&
+        first_chars.contains(&s.char_at(0)) &&
+        s.slice_from(1).chars().all(|c| '0' <= c && c <= '9')
+}
 
+fn filtered_float_lit(data: token::InternedString, suffix: Option<&str>,
+                      sd: &SpanHandler, sp: Span) -> ast::Lit_ {
+    debug!("filtered_float_lit: {}, {}", data, suffix);
+    match suffix {
+        Some("f32") => ast::LitFloat(data, ast::TyF32),
+        Some("f64") => ast::LitFloat(data, ast::TyF64),
+        Some(suf) => {
+            if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) {
+                // if it looks like a width, lets try to be helpful.
+                sd.span_err(sp, &*format!("illegal width `{}` for float literal, \
+                                          valid widths are 32 and 64", suf.slice_from(1)));
+            } else {
+                sd.span_err(sp, &*format!("illegal suffix `{}` for float literal, \
+                                          valid suffixes are `f32` and `f64`", suf));
+            }
 
-    match ty {
-        Some(t) => {
-            ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t)
-        },
-        None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s))
+            ast::LitFloatUnsuffixed(data)
+        }
+        None => ast::LitFloatUnsuffixed(data)
     }
 }
+pub fn float_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
+    debug!("float_lit: {}, {}", s, suffix);
+    // FIXME #2252: bounds checking float literals is defered until trans
+    let s = s.chars().filter(|&c| c != '_').collect::<String>();
+    let data = token::intern_and_get_ident(&*s);
+    filtered_float_lit(data, suffix, sd, sp)
+}
 
 /// Parse a string representing a byte literal into its final form. Similar to `char_lit`
 pub fn byte_lit(lit: &str) -> (u8, uint) {
@@ -626,24 +639,19 @@ pub fn binary_lit(lit: &str) -> Rc<Vec<u8>> {
     Rc::new(res)
 }
 
-pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
+pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
     // s can only be ascii, byte indexing is fine
 
     let s2 = s.chars().filter(|&c| c != '_').collect::<String>();
     let mut s = s2.as_slice();
 
-    debug!("parse_integer_lit: {}", s);
-
-    if s.len() == 1 {
-        let n = (s.char_at(0)).to_digit(10).unwrap();
-        return ast::LitInt(n as u64, ast::UnsuffixedIntLit(ast::Sign::new(n)));
-    }
+    debug!("integer_lit: {}, {}", s, suffix);
 
     let mut base = 10;
     let orig = s;
     let mut ty = ast::UnsuffixedIntLit(ast::Plus);
 
-    if s.char_at(0) == '0' {
+    if s.char_at(0) == '0' && s.len() > 1 {
         match s.char_at(1) {
             'x' => base = 16,
             'o' => base = 8,
@@ -652,57 +660,56 @@ pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ {
         }
     }
 
+    // 1f64 and 2f32 etc. are valid float literals.
+    match suffix {
+        Some(suf) if looks_like_width_suffix(&['f'], suf) => {
+            match base {
+                16u => sd.span_err(sp, "hexadecimal float literal is not supported"),
+                8u => sd.span_err(sp, "octal float literal is not supported"),
+                2u => sd.span_err(sp, "binary float literal is not supported"),
+                _ => ()
+            }
+            let ident = token::intern_and_get_ident(&*s);
+            return filtered_float_lit(ident, suffix, sd, sp)
+        }
+        _ => {}
+    }
+
     if base != 10 {
         s = s.slice_from(2);
     }
 
-    let last = s.len() - 1;
-    match s.char_at(last) {
-        'i' => ty = ast::SignedIntLit(ast::TyI, ast::Plus),
-        'u' => ty = ast::UnsignedIntLit(ast::TyU),
-        '8' => {
-            if s.len() > 2 {
-                match s.char_at(last - 1) {
-                    'i' => ty = ast::SignedIntLit(ast::TyI8, ast::Plus),
-                    'u' => ty = ast::UnsignedIntLit(ast::TyU8),
-                    _ => { }
-                }
-            }
-        },
-        '6' => {
-            if s.len() > 3 && s.char_at(last - 1) == '1' {
-                match s.char_at(last - 2) {
-                    'i' => ty = ast::SignedIntLit(ast::TyI16, ast::Plus),
-                    'u' => ty = ast::UnsignedIntLit(ast::TyU16),
-                    _ => { }
-                }
-            }
-        },
-        '2' => {
-            if s.len() > 3 && s.char_at(last - 1) == '3' {
-                match s.char_at(last - 2) {
-                    'i' => ty = ast::SignedIntLit(ast::TyI32, ast::Plus),
-                    'u' => ty = ast::UnsignedIntLit(ast::TyU32),
-                    _ => { }
-                }
-            }
-        },
-        '4' => {
-            if s.len() > 3 && s.char_at(last - 1) == '6' {
-                match s.char_at(last - 2) {
-                    'i' => ty = ast::SignedIntLit(ast::TyI64, ast::Plus),
-                    'u' => ty = ast::UnsignedIntLit(ast::TyU64),
-                    _ => { }
+    if let Some(suf) = suffix {
+        if suf.is_empty() { sd.span_bug(sp, "found empty literal suffix in Some")}
+        ty = match suf {
+            "i"   => ast::SignedIntLit(ast::TyI, ast::Plus),
+            "i8"  => ast::SignedIntLit(ast::TyI8, ast::Plus),
+            "i16" => ast::SignedIntLit(ast::TyI16, ast::Plus),
+            "i32" => ast::SignedIntLit(ast::TyI32, ast::Plus),
+            "i64" => ast::SignedIntLit(ast::TyI64, ast::Plus),
+            "u"   => ast::UnsignedIntLit(ast::TyU),
+            "u8"  => ast::UnsignedIntLit(ast::TyU8),
+            "u16" => ast::UnsignedIntLit(ast::TyU16),
+            "u32" => ast::UnsignedIntLit(ast::TyU32),
+            "u64" => ast::UnsignedIntLit(ast::TyU64),
+            _ => {
+                // i<digits> and u<digits> look like widths, so lets
+                // give an error message along those lines
+                if looks_like_width_suffix(&['i', 'u'], suf) {
+                    sd.span_err(sp, &*format!("illegal width `{}` for integer literal; \
+                                              valid widths are 8, 16, 32 and 64",
+                                              suf.slice_from(1)));
+                } else {
+                    sd.span_err(sp, &*format!("illegal suffix `{}` for numeric literal", suf));
                 }
+
+                ty
             }
-        },
-        _ => { }
+        }
     }
 
-    debug!("The suffix is {}, base {}, the new string is {}, the original \
-           string was {}", ty, base, s, orig);
-
-    s = s.slice_to(s.len() - ty.suffix_len());
+    debug!("integer_lit: the type is {}, base {}, the new string is {}, the original \
+           string was {}, the original suffix was {}", ty, base, s, orig, suffix);
 
     let res: u64 = match ::std::num::from_str_radix(s, base) {
         Some(r) => r,
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index d3ae9838c6d..a6fe3902395 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -646,6 +646,20 @@ impl<'a> Parser<'a> {
         }
     }
 
+    pub fn expect_no_suffix(&mut self, sp: Span, kind: &str, suffix: Option<ast::Name>) {
+        match suffix {
+            None => {/* everything ok */}
+            Some(suf) => {
+                let text = suf.as_str();
+                if text.is_empty() {
+                    self.span_bug(sp, "found empty literal suffix in Some")
+                }
+                self.span_err(sp, &*format!("{} with a suffix is illegal", kind));
+            }
+        }
+    }
+
+
     /// Attempt to consume a `<`. If `<<` is seen, replace it with a single
     /// `<` and continue. If a `<` is not seen, return false.
     ///
@@ -968,6 +982,9 @@ impl<'a> Parser<'a> {
     pub fn span_err(&mut self, sp: Span, m: &str) {
         self.sess.span_diagnostic.span_err(sp, m)
     }
+    pub fn span_bug(&mut self, sp: Span, m: &str) -> ! {
+        self.sess.span_diagnostic.span_bug(sp, m)
+    }
     pub fn abort_if_errors(&mut self) {
         self.sess.span_diagnostic.handler().abort_if_errors();
     }
@@ -1640,24 +1657,53 @@ impl<'a> Parser<'a> {
     /// Matches token_lit = LIT_INTEGER | ...
     pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ {
         match *tok {
-            token::LitByte(i) => LitByte(parse::byte_lit(i.as_str()).val0()),
-            token::LitChar(i) => LitChar(parse::char_lit(i.as_str()).val0()),
-            token::LitInteger(s) => parse::integer_lit(s.as_str(),
-                                                        &self.sess.span_diagnostic,
-                                                       self.last_span),
-            token::LitFloat(s) => parse::float_lit(s.as_str()),
-            token::LitStr(s) => {
-                LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()),
-                       ast::CookedStr)
-            }
-            token::LitStrRaw(s, n) => {
-                LitStr(token::intern_and_get_ident(parse::raw_str_lit(s.as_str()).as_slice()),
-                       ast::RawStr(n))
+            token::Literal(lit, suf) => {
+                let (suffix_illegal, out) = match lit {
+                    token::Byte(i) => (true, LitByte(parse::byte_lit(i.as_str()).val0())),
+                    token::Char(i) => (true, LitChar(parse::char_lit(i.as_str()).val0())),
+
+                    // there are some valid suffixes for integer and
+                    // float literals, so all the handling is done
+                    // internally.
+                    token::Integer(s) => {
+                        (false, parse::integer_lit(s.as_str(),
+                                                   suf.as_ref().map(|s| s.as_str()),
+                                                   &self.sess.span_diagnostic,
+                                                   self.last_span))
+                    }
+                    token::Float(s) => {
+                        (false, parse::float_lit(s.as_str(),
+                                                 suf.as_ref().map(|s| s.as_str()),
+                                                  &self.sess.span_diagnostic,
+                                                 self.last_span))
+                    }
+
+                    token::Str_(s) => {
+                        (true,
+                         LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()),
+                                ast::CookedStr))
+                    }
+                    token::StrRaw(s, n) => {
+                        (true,
+                         LitStr(
+                            token::intern_and_get_ident(
+                                parse::raw_str_lit(s.as_str()).as_slice()),
+                            ast::RawStr(n)))
+                    }
+                    token::Binary(i) =>
+                        (true, LitBinary(parse::binary_lit(i.as_str()))),
+                    token::BinaryRaw(i, _) =>
+                        (true,
+                         LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect()))),
+                };
+
+                if suffix_illegal {
+                    let sp = self.last_span;
+                    self.expect_no_suffix(sp, &*format!("{} literal", lit.short_name()), suf)
+                }
+
+                out
             }
-            token::LitBinary(i) =>
-                LitBinary(parse::binary_lit(i.as_str())),
-            token::LitBinaryRaw(i, _) =>
-                LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect())),
             _ => { self.unexpected_last(tok); }
         }
     }
@@ -2424,7 +2470,10 @@ impl<'a> Parser<'a> {
                         }
                     }
                   }
-                  token::LitInteger(n) => {
+                  token::Literal(token::Integer(n), suf) => {
+                    let sp = self.span;
+                    self.expect_no_suffix(sp, "tuple index", suf);
+
                     let index = n.as_str();
                     let dot = self.last_span.hi;
                     hi = self.span.hi;
@@ -2449,7 +2498,7 @@ impl<'a> Parser<'a> {
                         }
                     }
                   }
-                  token::LitFloat(n) => {
+                  token::Literal(token::Float(n), _suf) => {
                     self.bump();
                     let last_span = self.last_span;
                     let fstr = n.as_str();
@@ -5085,12 +5134,17 @@ impl<'a> Parser<'a> {
                 self.expect(&token::Semi);
                 (path, the_ident)
             },
-            token::LitStr(..) | token::LitStrRaw(..) => {
-                let path = self.parse_str();
+            token::Literal(token::Str_(..), suf) | token::Literal(token::StrRaw(..), suf) => {
+                let sp = self.span;
+                self.expect_no_suffix(sp, "extern crate name", suf);
+                // forgo the internal suffix check of `parse_str` to
+                // avoid repeats (this unwrap will always succeed due
+                // to the restriction of the `match`)
+                let (s, style, _) = self.parse_optional_str().unwrap();
                 self.expect_keyword(keywords::As);
                 let the_ident = self.parse_ident();
                 self.expect(&token::Semi);
-                (Some(path), the_ident)
+                (Some((s, style)), the_ident)
             },
             _ => {
                 let span = self.span;
@@ -5267,7 +5321,9 @@ impl<'a> Parser<'a> {
     /// the `extern` keyword, if one is found.
     fn parse_opt_abi(&mut self) -> Option<abi::Abi> {
         match self.token {
-            token::LitStr(s) | token::LitStrRaw(s, _) => {
+            token::Literal(token::Str_(s), suf) | token::Literal(token::StrRaw(s, _), suf) => {
+                let sp = self.span;
+                self.expect_no_suffix(sp, "ABI spec", suf);
                 self.bump();
                 let the_string = s.as_str();
                 match abi::lookup(the_string) {
@@ -5910,21 +5966,27 @@ impl<'a> Parser<'a> {
     }
 
     pub fn parse_optional_str(&mut self)
-                              -> Option<(InternedString, ast::StrStyle)> {
-        let (s, style) = match self.token {
-            token::LitStr(s) => (self.id_to_interned_str(s.ident()), ast::CookedStr),
-            token::LitStrRaw(s, n) => {
-                (self.id_to_interned_str(s.ident()), ast::RawStr(n))
+                              -> Option<(InternedString, ast::StrStyle, Option<ast::Name>)> {
+        let ret = match self.token {
+            token::Literal(token::Str_(s), suf) => {
+                (self.id_to_interned_str(s.ident()), ast::CookedStr, suf)
+            }
+            token::Literal(token::StrRaw(s, n), suf) => {
+                (self.id_to_interned_str(s.ident()), ast::RawStr(n), suf)
             }
             _ => return None
         };
         self.bump();
-        Some((s, style))
+        Some(ret)
     }
 
     pub fn parse_str(&mut self) -> (InternedString, StrStyle) {
         match self.parse_optional_str() {
-            Some(s) => { s }
+            Some((s, style, suf)) => {
+                let sp = self.last_span;
+                self.expect_no_suffix(sp, "str literal", suf);
+                (s, style)
+            }
             _ =>  self.fatal("expected string literal")
         }
     }
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index 298328d73ef..4272b57a4dc 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -12,6 +12,7 @@ pub use self::BinOpToken::*;
 pub use self::Nonterminal::*;
 pub use self::DelimToken::*;
 pub use self::IdentStyle::*;
+pub use self::Lit::*;
 pub use self::Token::*;
 
 use ast;
@@ -59,6 +60,31 @@ pub enum IdentStyle {
     Plain,
 }
 
+#[deriving(Clone, Encodable, Decodable, PartialEq, Eq, Hash, Show)]
+pub enum Lit {
+    Byte(ast::Name),
+    Char(ast::Name),
+    Integer(ast::Name),
+    Float(ast::Name),
+    Str_(ast::Name),
+    StrRaw(ast::Name, uint), /* raw str delimited by n hash symbols */
+    Binary(ast::Name),
+    BinaryRaw(ast::Name, uint), /* raw binary str delimited by n hash symbols */
+}
+
+impl Lit {
+    pub fn short_name(&self) -> &'static str {
+        match *self {
+            Byte(_) => "byte",
+            Char(_) => "char",
+            Integer(_) => "integer",
+            Float(_) => "float",
+            Str_(_) | StrRaw(..) => "str",
+            Binary(_) | BinaryRaw(..) => "binary str"
+        }
+    }
+}
+
 #[allow(non_camel_case_types)]
 #[deriving(Clone, Encodable, Decodable, PartialEq, Eq, Hash, Show)]
 pub enum Token {
@@ -98,14 +124,7 @@ pub enum Token {
     CloseDelim(DelimToken),
 
     /* Literals */
-    LitByte(ast::Name),
-    LitChar(ast::Name),
-    LitInteger(ast::Name),
-    LitFloat(ast::Name),
-    LitStr(ast::Name),
-    LitStrRaw(ast::Name, uint), /* raw str delimited by n hash symbols */
-    LitBinary(ast::Name),
-    LitBinaryRaw(ast::Name, uint), /* raw binary str delimited by n hash symbols */
+    Literal(Lit, Option<ast::Name>),
 
     /* Name components */
     Ident(ast::Ident, IdentStyle),
@@ -145,14 +164,7 @@ impl Token {
             Ident(_, _)                 => true,
             Underscore                  => true,
             Tilde                       => true,
-            LitByte(_)                  => true,
-            LitChar(_)                  => true,
-            LitInteger(_)               => true,
-            LitFloat(_)                 => true,
-            LitStr(_)                   => true,
-            LitStrRaw(_, _)             => true,
-            LitBinary(_)                => true,
-            LitBinaryRaw(_, _)          => true,
+            Literal(_, _)               => true,
             Pound                       => true,
             At                          => true,
             Not                         => true,
@@ -173,15 +185,8 @@ impl Token {
     /// Returns `true` if the token is any literal
     pub fn is_lit(&self) -> bool {
         match *self {
-            LitByte(_)          => true,
-            LitChar(_)          => true,
-            LitInteger(_)       => true,
-            LitFloat(_)         => true,
-            LitStr(_)           => true,
-            LitStrRaw(_, _)     => true,
-            LitBinary(_)        => true,
-            LitBinaryRaw(_, _)  => true,
-            _                   => false,
+            Literal(_, _) => true,
+            _          => false,
         }
     }
author	Jakub Bukaj <jakub@jakub.cc>	2014-11-19 22:41:05 +0100
committer	Jakub Bukaj <jakub@jakub.cc>	2014-11-19 22:41:05 +0100
commit	f71b852d3865e878b953cb280724fb2ce1203103 (patch)
tree	4cf08dabab8901fcbac433e2d3f964abeaab2d94 /src/libsyntax/parse
parent	00ffcca0164504ecae8453750a053e2b7206264a (diff)
parent	a11078f8c3a69f0d4ea20ca10aaf96622770615b (diff)
download	rust-f71b852d3865e878b953cb280724fb2ce1203103.tar.gz rust-f71b852d3865e878b953cb280724fb2ce1203103.zip