Add a b'x' byte literal of type u8.

author: Simon Sapin <simon.sapin@exyr.org> 2014-06-06 16:04:04 +0100
committer: Simon Sapin <simon.sapin@exyr.org> 2014-06-17 23:41:03 +0200
commit: bccdba02960b3cd428addbc2c856065ebb81eb04 (patch)
tree: 09c67823ea70b74b18af5ba2470b337968d758aa /src
parent: 2fd618e77accd37426819952ad443e50bb3c9015 (diff)
download: rust-bccdba02960b3cd428addbc2c856065ebb81eb04.tar.gz
rust-bccdba02960b3cd428addbc2c856065ebb81eb04.zip
16 files changed, 169 insertions, 5 deletions
diff --git a/src/librustc/middle/const_eval.rs b/src/librustc/middle/const_eval.rs
index 13d0443a00f..3c5b0664f03 100644
--- a/src/librustc/middle/const_eval.rs
+++ b/src/librustc/middle/const_eval.rs
@@ -506,6 +506,7 @@ pub fn lit_to_const(lit: &Lit) -> const_val {
         LitBinary(ref data) => {
             const_binary(Rc::new(data.iter().map(|x| *x).collect()))
         }
+        LitByte(n) => const_uint(n as u64),
         LitChar(n) => const_uint(n as u64),
         LitInt(n, _) => const_int(n),
         LitUint(n, _) => const_uint(n),
diff --git a/src/librustc/middle/lint.rs b/src/librustc/middle/lint.rs
index 392821a6ad3..4c11693e7a6 100644
--- a/src/librustc/middle/lint.rs
+++ b/src/librustc/middle/lint.rs
@@ -805,6 +805,7 @@ fn check_type_limits(cx: &Context, e: &ast::Expr) {
                     } else { t };
                     let (min, max) = uint_ty_range(uint_type);
                     let lit_val: u64 = match lit.node {
+                        ast::LitByte(_v) => return,  // _v is u8, within range by definition
                         ast::LitInt(v, _) => v as u64,
                         ast::LitUint(v, _) => v,
                         ast::LitIntUnsuffixed(v) => v as u64,
diff --git a/src/librustc/middle/trans/consts.rs b/src/librustc/middle/trans/consts.rs
index 45019edc58b..f5e66a527e7 100644
--- a/src/librustc/middle/trans/consts.rs
+++ b/src/librustc/middle/trans/consts.rs
@@ -43,6 +43,7 @@ pub fn const_lit(cx: &CrateContext, e: &ast::Expr, lit: ast::Lit)
     -> ValueRef {
     let _icx = push_ctxt("trans_lit");
     match lit.node {
+        ast::LitByte(b) => C_integral(Type::uint_from_ty(cx, ast::TyU8), b as u64, false),
         ast::LitChar(i) => C_integral(Type::char(cx), i as u64, false),
         ast::LitInt(i, t) => C_integral(Type::int_from_ty(cx, t), i as u64, true),
         ast::LitUint(u, t) => C_integral(Type::uint_from_ty(cx, t), u, false),
diff --git a/src/librustc/middle/typeck/check/mod.rs b/src/librustc/middle/typeck/check/mod.rs
index 73b92e5b868..2516a00ff76 100644
--- a/src/librustc/middle/typeck/check/mod.rs
+++ b/src/librustc/middle/typeck/check/mod.rs
@@ -1715,6 +1715,7 @@ pub fn check_lit(fcx: &FnCtxt, lit: &ast::Lit) -> ty::t {
         ast::LitBinary(..) => {
             ty::mk_slice(tcx, ty::ReStatic, ty::mt{ ty: ty::mk_u8(), mutbl: ast::MutImmutable })
         }
+        ast::LitByte(_) => ty::mk_u8(),
         ast::LitChar(_) => ty::mk_char(),
         ast::LitInt(_, t) => ty::mk_mach_int(t),
         ast::LitUint(_, t) => ty::mk_mach_uint(t),
diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs
index 823e0f6a1b3..5e84a90121f 100644
--- a/src/librustdoc/clean/mod.rs
+++ b/src/librustdoc/clean/mod.rs
@@ -1924,6 +1924,14 @@ fn lit_to_str(lit: &ast::Lit) -> String {
     match lit.node {
         ast::LitStr(ref st, _) => st.get().to_string(),
         ast::LitBinary(ref data) => format!("{:?}", data.as_slice()),
+        ast::LitByte(b) => {
+            let mut res = String::from_str("b'");
+            (b as char).escape_default(|c| {
+                res.push_char(c);
+            });
+            res.push_char('\'');
+            res
+        },
         ast::LitChar(c) => format!("'{}'", c),
         ast::LitInt(i, _t) => i.to_str(),
         ast::LitUint(u, _t) => u.to_str(),
diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs
index f0d7b029deb..8a63b55afed 100644
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -140,7 +140,7 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader,
             }
 
             // text literals
-            t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
+            t::LIT_BYTE(..) | t::LIT_CHAR(..) | t::LIT_STR(..) | t::LIT_STR_RAW(..) => "string",
 
             // number literals
             t::LIT_INT(..) | t::LIT_UINT(..) | t::LIT_INT_UNSUFFIXED(..) |
diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs
index 86dd736ceea..aeafc0e306c 100644
--- a/src/libsyntax/ast.rs
+++ b/src/libsyntax/ast.rs
@@ -616,6 +616,7 @@ pub type Lit = Spanned<Lit_>;
 pub enum Lit_ {
     LitStr(InternedString, StrStyle),
     LitBinary(Rc<Vec<u8> >),
+    LitByte(u8),
     LitChar(char),
     LitInt(i64, IntTy),
     LitUint(u64, UintTy),
diff --git a/src/libsyntax/ext/concat.rs b/src/libsyntax/ext/concat.rs
index 83f45ca9f16..670e38327d6 100644
--- a/src/libsyntax/ext/concat.rs
+++ b/src/libsyntax/ext/concat.rs
@@ -47,6 +47,7 @@ pub fn expand_syntax_ext(cx: &mut base::ExtCtxt,
                     ast::LitBool(b) => {
                         accumulator.push_str(format!("{}", b).as_slice());
                     }
+                    ast::LitByte(..) |
                     ast::LitBinary(..) => {
                         cx.span_err(e.span, "cannot concatenate a binary literal");
                     }
diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs
index 6514d8fa418..407715ab4da 100644
--- a/src/libsyntax/ext/quote.rs
+++ b/src/libsyntax/ext/quote.rs
@@ -436,6 +436,12 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc<ast::Expr> {
                                 vec!(mk_binop(cx, sp, binop)));
         }
 
+        LIT_BYTE(i) => {
+            let e_byte = cx.expr_lit(sp, ast::LitByte(i));
+
+            return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte));
+        }
+
         LIT_CHAR(i) => {
             let e_char = cx.expr_lit(sp, ast::LitChar(i));
 
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index f7eac0b323f..7e4cb195cea 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -650,10 +650,13 @@ impl<'a> StringReader<'a> {
     /// token, and updates the interner
     fn next_token_inner(&mut self) -> token::Token {
         let c = self.curr;
-        if ident_start(c) && !self.nextch_is('"') && !self.nextch_is('#') {
+        if ident_start(c) && match (c.unwrap(), self.nextch()) {
             // Note: r as in r" or r#" is part of a raw string literal,
-            // not an identifier, and is handled further down.
-
+            // b as in b' is part of a byte literal.
+            // They are not identifiers, and are handled further down.
+           ('r', Some('"')) | ('r', Some('#')) | ('b', Some('\'')) => false,
+           _ => true
+        } {
             let start = self.last_pos;
             while ident_continue(self.curr) {
                 self.bump();
@@ -854,6 +857,65 @@ impl<'a> StringReader<'a> {
             self.bump(); // advance curr past token
             return token::LIT_CHAR(c2);
           }
+          'b' => {
+            self.bump();
+            assert!(self.curr_is('\''), "Should have been a token::IDENT");
+            self.bump();
+            let start = self.last_pos;
+
+            // the eof will be picked up by the final `'` check below
+            let mut c2 = self.curr.unwrap_or('\x00');
+            self.bump();
+
+            match c2 {
+                '\\' => {
+                    // '\X' for some X must be a character constant:
+                    let escaped = self.curr;
+                    let escaped_pos = self.last_pos;
+                    self.bump();
+                    match escaped {
+                        None => {}
+                        Some(e) => {
+                            c2 = match e {
+                                'n' => '\n',
+                                'r' => '\r',
+                                't' => '\t',
+                                '\\' => '\\',
+                                '\'' => '\'',
+                                '"' => '"',
+                                '0' => '\x00',
+                                'x' => self.scan_numeric_escape(2u, '\''),
+                                c2 => {
+                                    self.err_span_char(escaped_pos, self.last_pos,
+                                                       "unknown byte escape", c2);
+                                    c2
+                                }
+                            }
+                        }
+                    }
+                }
+                '\t' | '\n' | '\r' | '\'' => {
+                    self.err_span_char( start, self.last_pos,
+                        "byte constant must be escaped", c2);
+                }
+                _ if c2 > '\x7F' => {
+                    self.err_span_char( start, self.last_pos,
+                        "byte constant must be ASCII. \
+                         Use a \\xHH escape for a non-ASCII byte", c2);
+                }
+                _ => {}
+            }
+            if !self.curr_is('\'') {
+                self.fatal_span_verbose(
+                                   // Byte offsetting here is okay because the
+                                   // character before position `start` are an
+                                   // ascii single quote and ascii 'b'.
+                                   start - BytePos(2), self.last_pos,
+                                   "unterminated byte constant".to_string());
+            }
+            self.bump(); // advance curr past token
+            return token::LIT_BYTE(c2 as u8);
+          }
           '"' => {
             let mut accum_str = String::new();
             let start_bpos = self.last_pos;
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index bbe0680ef14..0bd47ede214 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -33,7 +33,7 @@ use ast::{ForeignItem, ForeignItemStatic, ForeignItemFn, ForeignMod};
 use ast::{Ident, NormalFn, Inherited, Item, Item_, ItemStatic};
 use ast::{ItemEnum, ItemFn, ItemForeignMod, ItemImpl};
 use ast::{ItemMac, ItemMod, ItemStruct, ItemTrait, ItemTy, Lit, Lit_};
-use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar};
+use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte};
 use ast::{LitIntUnsuffixed, LitNil, LitStr, LitUint, Local, LocalLet};
 use ast::{MutImmutable, MutMutable, Mac_, MacInvocTT, Matcher, MatchNonterminal};
 use ast::{MatchSeq, MatchTok, Method, MutTy, BiMul, Mutability};
@@ -1512,6 +1512,7 @@ impl<'a> Parser<'a> {
     // matches token_lit = LIT_INT | ...
     pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ {
         match *tok {
+            token::LIT_BYTE(i) => LitByte(i),
             token::LIT_CHAR(i) => LitChar(i),
             token::LIT_INT(i, it) => LitInt(i, it),
             token::LIT_UINT(u, ut) => LitUint(u, ut),
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index a4a022708d9..b8f13624a32 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -78,6 +78,7 @@ pub enum Token {
     DOLLAR,
 
     /* Literals */
+    LIT_BYTE(u8),
     LIT_CHAR(char),
     LIT_INT(i64, ast::IntTy),
     LIT_UINT(u64, ast::UintTy),
@@ -193,6 +194,14 @@ pub fn to_str(t: &Token) -> String {
       DOLLAR => "$".to_string(),
 
       /* Literals */
+      LIT_BYTE(b) => {
+          let mut res = String::from_str("b'");
+          (b as char).escape_default(|c| {
+              res.push_char(c);
+          });
+          res.push_char('\'');
+          res
+      }
       LIT_CHAR(c) => {
           let mut res = String::from_str("'");
           c.escape_default(|c| {
@@ -273,6 +282,7 @@ pub fn can_begin_expr(t: &Token) -> bool {
       IDENT(_, _) => true,
       UNDERSCORE => true,
       TILDE => true,
+      LIT_BYTE(_) => true,
       LIT_CHAR(_) => true,
       LIT_INT(_, _) => true,
       LIT_UINT(_, _) => true,
@@ -311,6 +321,7 @@ pub fn close_delimiter_for(t: &Token) -> Option<Token> {
 
 pub fn is_lit(t: &Token) -> bool {
     match *t {
+      LIT_BYTE(_) => true,
       LIT_CHAR(_) => true,
       LIT_INT(_, _) => true,
       LIT_UINT(_, _) => true,
diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs
index badfbe7eb15..6ea2eed293e 100644
--- a/src/libsyntax/print/pprust.rs
+++ b/src/libsyntax/print/pprust.rs
@@ -2305,6 +2305,12 @@ impl<'a> State<'a> {
         }
         match lit.node {
             ast::LitStr(ref st, style) => self.print_string(st.get(), style),
+            ast::LitByte(byte) => {
+                let mut res = String::from_str("b'");
+                (byte as char).escape_default(|c| res.push_char(c));
+                res.push_char('\'');
+                word(&mut self.s, res.as_slice())
+            }
             ast::LitChar(ch) => {
                 let mut res = String::from_str("'");
                 ch.escape_default(|c| res.push_char(c));
diff --git a/src/test/compile-fail/byte-literals.rs b/src/test/compile-fail/byte-literals.rs
new file mode 100644
index 00000000000..436078fa762
--- /dev/null
+++ b/src/test/compile-fail/byte-literals.rs
@@ -0,0 +1,25 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+// ignore-tidy-tab
+
+static FOO: u8 = b'\f';  //~ ERROR unknown byte escape
+
+pub fn main() {
+    b'\f';  //~ ERROR unknown byte escape
+    b'\x0Z';  //~ ERROR illegal character in numeric character escape: Z
+    b'	';  //~ ERROR byte constant must be escaped
+    b''';  //~ ERROR byte constant must be escaped
+    b'é';  //~ ERROR byte constant must be ASCII
+    b'a  //~ ERROR unterminated byte constant
+}
+
+
diff --git a/src/test/compile-fail/concat.rs b/src/test/compile-fail/concat.rs
index c34e402c90b..a3dc1174424 100644
--- a/src/test/compile-fail/concat.rs
+++ b/src/test/compile-fail/concat.rs
@@ -9,6 +9,7 @@
 // except according to those terms.
 
 fn main() {
+    concat!(b'f');  //~ ERROR: cannot concatenate a binary literal
     concat!(foo);   //~ ERROR: expected a literal
     concat!(foo()); //~ ERROR: expected a literal
 }
diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs
new file mode 100644
index 00000000000..560b2f0337a
--- /dev/null
+++ b/src/test/run-pass/byte-literals.rs
@@ -0,0 +1,38 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+
+static FOO: u8 = b'\xF0';
+
+pub fn main() {
+    assert_eq!(b'a', 97u8);
+    assert_eq!(b'\n', 10u8);
+    assert_eq!(b'\r', 13u8);
+    assert_eq!(b'\t', 9u8);
+    assert_eq!(b'\\', 92u8);
+    assert_eq!(b'\'', 39u8);
+    assert_eq!(b'\"', 34u8);
+    assert_eq!(b'\0', 0u8);
+    assert_eq!(b'\xF0', 240u8);
+    assert_eq!(FOO, 240u8);
+
+    // FIXME: Do we want this to be valid?
+    assert_eq!([42, ..b'\t'].as_slice(), &[42, 42, 42, 42, 42, 42, 42, 42, 42]);
+
+    match 42 {
+        b'*' => {},
+        _ => fail!()
+    }
+
+    match 100 {
+        b'a' .. b'z' => {},
+        _ => fail!()
+    }
+}
author	Simon Sapin <simon.sapin@exyr.org>	2014-06-06 16:04:04 +0100
committer	Simon Sapin <simon.sapin@exyr.org>	2014-06-17 23:41:03 +0200
commit	bccdba02960b3cd428addbc2c856065ebb81eb04 (patch)
tree	09c67823ea70b74b18af5ba2470b337968d758aa /src
parent	2fd618e77accd37426819952ad443e50bb3c9015 (diff)
download	rust-bccdba02960b3cd428addbc2c856065ebb81eb04.tar.gz rust-bccdba02960b3cd428addbc2c856065ebb81eb04.zip