From bccdba02960b3cd428addbc2c856065ebb81eb04 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 6 Jun 2014 16:04:04 +0100 Subject: Add a b'x' byte literal of type u8. --- src/libsyntax/ast.rs | 1 + src/libsyntax/ext/concat.rs | 1 + src/libsyntax/ext/quote.rs | 6 ++++ src/libsyntax/parse/lexer/mod.rs | 68 ++++++++++++++++++++++++++++++++++++++-- src/libsyntax/parse/parser.rs | 3 +- src/libsyntax/parse/token.rs | 11 +++++++ src/libsyntax/print/pprust.rs | 6 ++++ 7 files changed, 92 insertions(+), 4 deletions(-) (limited to 'src/libsyntax') diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 86dd736ceea..aeafc0e306c 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -616,6 +616,7 @@ pub type Lit = Spanned; pub enum Lit_ { LitStr(InternedString, StrStyle), LitBinary(Rc >), + LitByte(u8), LitChar(char), LitInt(i64, IntTy), LitUint(u64, UintTy), diff --git a/src/libsyntax/ext/concat.rs b/src/libsyntax/ext/concat.rs index 83f45ca9f16..670e38327d6 100644 --- a/src/libsyntax/ext/concat.rs +++ b/src/libsyntax/ext/concat.rs @@ -47,6 +47,7 @@ pub fn expand_syntax_ext(cx: &mut base::ExtCtxt, ast::LitBool(b) => { accumulator.push_str(format!("{}", b).as_slice()); } + ast::LitByte(..) | ast::LitBinary(..) => { cx.span_err(e.span, "cannot concatenate a binary literal"); } diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index 6514d8fa418..407715ab4da 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -436,6 +436,12 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> Gc { vec!(mk_binop(cx, sp, binop))); } + LIT_BYTE(i) => { + let e_byte = cx.expr_lit(sp, ast::LitByte(i)); + + return cx.expr_call(sp, mk_token_path(cx, sp, "LIT_BYTE"), vec!(e_byte)); + } + LIT_CHAR(i) => { let e_char = cx.expr_lit(sp, ast::LitChar(i)); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index f7eac0b323f..7e4cb195cea 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -650,10 +650,13 @@ impl<'a> StringReader<'a> { /// token, and updates the interner fn next_token_inner(&mut self) -> token::Token { let c = self.curr; - if ident_start(c) && !self.nextch_is('"') && !self.nextch_is('#') { + if ident_start(c) && match (c.unwrap(), self.nextch()) { // Note: r as in r" or r#" is part of a raw string literal, - // not an identifier, and is handled further down. - + // b as in b' is part of a byte literal. + // They are not identifiers, and are handled further down. + ('r', Some('"')) | ('r', Some('#')) | ('b', Some('\'')) => false, + _ => true + } { let start = self.last_pos; while ident_continue(self.curr) { self.bump(); @@ -854,6 +857,65 @@ impl<'a> StringReader<'a> { self.bump(); // advance curr past token return token::LIT_CHAR(c2); } + 'b' => { + self.bump(); + assert!(self.curr_is('\''), "Should have been a token::IDENT"); + self.bump(); + let start = self.last_pos; + + // the eof will be picked up by the final `'` check below + let mut c2 = self.curr.unwrap_or('\x00'); + self.bump(); + + match c2 { + '\\' => { + // '\X' for some X must be a character constant: + let escaped = self.curr; + let escaped_pos = self.last_pos; + self.bump(); + match escaped { + None => {} + Some(e) => { + c2 = match e { + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + '\'' => '\'', + '"' => '"', + '0' => '\x00', + 'x' => self.scan_numeric_escape(2u, '\''), + c2 => { + self.err_span_char(escaped_pos, self.last_pos, + "unknown byte escape", c2); + c2 + } + } + } + } + } + '\t' | '\n' | '\r' | '\'' => { + self.err_span_char( start, self.last_pos, + "byte constant must be escaped", c2); + } + _ if c2 > '\x7F' => { + self.err_span_char( start, self.last_pos, + "byte constant must be ASCII. \ + Use a \\xHH escape for a non-ASCII byte", c2); + } + _ => {} + } + if !self.curr_is('\'') { + self.fatal_span_verbose( + // Byte offsetting here is okay because the + // character before position `start` are an + // ascii single quote and ascii 'b'. + start - BytePos(2), self.last_pos, + "unterminated byte constant".to_string()); + } + self.bump(); // advance curr past token + return token::LIT_BYTE(c2 as u8); + } '"' => { let mut accum_str = String::new(); let start_bpos = self.last_pos; diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index bbe0680ef14..0bd47ede214 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -33,7 +33,7 @@ use ast::{ForeignItem, ForeignItemStatic, ForeignItemFn, ForeignMod}; use ast::{Ident, NormalFn, Inherited, Item, Item_, ItemStatic}; use ast::{ItemEnum, ItemFn, ItemForeignMod, ItemImpl}; use ast::{ItemMac, ItemMod, ItemStruct, ItemTrait, ItemTy, Lit, Lit_}; -use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar}; +use ast::{LitBool, LitFloat, LitFloatUnsuffixed, LitInt, LitChar, LitByte}; use ast::{LitIntUnsuffixed, LitNil, LitStr, LitUint, Local, LocalLet}; use ast::{MutImmutable, MutMutable, Mac_, MacInvocTT, Matcher, MatchNonterminal}; use ast::{MatchSeq, MatchTok, Method, MutTy, BiMul, Mutability}; @@ -1512,6 +1512,7 @@ impl<'a> Parser<'a> { // matches token_lit = LIT_INT | ... pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ { match *tok { + token::LIT_BYTE(i) => LitByte(i), token::LIT_CHAR(i) => LitChar(i), token::LIT_INT(i, it) => LitInt(i, it), token::LIT_UINT(u, ut) => LitUint(u, ut), diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index a4a022708d9..b8f13624a32 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -78,6 +78,7 @@ pub enum Token { DOLLAR, /* Literals */ + LIT_BYTE(u8), LIT_CHAR(char), LIT_INT(i64, ast::IntTy), LIT_UINT(u64, ast::UintTy), @@ -193,6 +194,14 @@ pub fn to_str(t: &Token) -> String { DOLLAR => "$".to_string(), /* Literals */ + LIT_BYTE(b) => { + let mut res = String::from_str("b'"); + (b as char).escape_default(|c| { + res.push_char(c); + }); + res.push_char('\''); + res + } LIT_CHAR(c) => { let mut res = String::from_str("'"); c.escape_default(|c| { @@ -273,6 +282,7 @@ pub fn can_begin_expr(t: &Token) -> bool { IDENT(_, _) => true, UNDERSCORE => true, TILDE => true, + LIT_BYTE(_) => true, LIT_CHAR(_) => true, LIT_INT(_, _) => true, LIT_UINT(_, _) => true, @@ -311,6 +321,7 @@ pub fn close_delimiter_for(t: &Token) -> Option { pub fn is_lit(t: &Token) -> bool { match *t { + LIT_BYTE(_) => true, LIT_CHAR(_) => true, LIT_INT(_, _) => true, LIT_UINT(_, _) => true, diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index badfbe7eb15..6ea2eed293e 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -2305,6 +2305,12 @@ impl<'a> State<'a> { } match lit.node { ast::LitStr(ref st, style) => self.print_string(st.get(), style), + ast::LitByte(byte) => { + let mut res = String::from_str("b'"); + (byte as char).escape_default(|c| res.push_char(c)); + res.push_char('\''); + word(&mut self.s, res.as_slice()) + } ast::LitChar(ch) => { let mut res = String::from_str("'"); ch.escape_default(|c| res.push_char(c)); -- cgit 1.4.1-3-g733a5