auto merge of #9308 : ben0x539/rust/lexer-error-spans, r=alexcrichton

Previously, the lexer calling `rdr.fatal(...)` would report the span of the last complete token, instead of a span within the erroneous token (besides one span fixed in 1ac90bb). This branch adds wrappers around `rdr.fatal(...)` that sets the span explicilty, so that all fatal errors in `libsyntax/parse/lexer.rs` now report the offending code more precisely. A number of tests try to verify that, though the `compile-fail` testing setup can only check that the spans are on the right lines, and the "unterminated string/block comment" errors can't have the line marked at all, so that's incomplete. This closes #9149. Also, the lexer errors now report the offending code in the error message, not just via the span, just like other errors do.
author: bors <bors@rust-lang.org> 2013-09-19 21:46:45 -0700
committer: bors <bors@rust-lang.org> 2013-09-19 21:46:45 -0700
commit: 7f826cb25ac3c1295a90bc8eb16e1cdf518fc6e1 (patch)
tree: 2be92973440fb858516695621584587bbe0afe15 /src/libsyntax/parse
parent: 407d179f4e0b625e6911ebaf72c87cd35935fb06 (diff)
parent: 567c567b2dabbab9d74cf565b252e5024900e290 (diff)
download: rust-7f826cb25ac3c1295a90bc8eb16e1cdf518fc6e1.tar.gz
rust-7f826cb25ac3c1295a90bc8eb16e1cdf518fc6e1.zip
1 files changed, 91 insertions, 26 deletions
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
index 0bc9e619274..d3b0866d4a7 100644
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@@ -149,6 +149,46 @@ impl reader for TtReader {
     fn dup(@mut self) -> @mut reader { dup_tt_reader(self) as @mut reader }
 }
 
+// report a lexical error spanning [`from_pos`, `to_pos`)
+fn fatal_span(rdr: @mut StringReader,
+              from_pos: BytePos,
+              to_pos: BytePos,
+              m: ~str)
+           -> ! {
+    rdr.peek_span = codemap::mk_sp(from_pos, to_pos);
+    rdr.fatal(m);
+}
+
+// report a lexical error spanning [`from_pos`, `to_pos`), appending an
+// escaped character to the error message
+fn fatal_span_char(rdr: @mut StringReader,
+                   from_pos: BytePos,
+                   to_pos: BytePos,
+                   m: ~str,
+                   c: char)
+                -> ! {
+    let mut m = m;
+    m.push_str(": ");
+    char::escape_default(c, |c| m.push_char(c));
+    fatal_span(rdr, from_pos, to_pos, m);
+}
+
+// report a lexical error spanning [`from_pos`, `to_pos`), appending the
+// offending string to the error message
+fn fatal_span_verbose(rdr: @mut StringReader,
+                      from_pos: BytePos,
+                      to_pos: BytePos,
+                      m: ~str)
+                   -> ! {
+    let mut m = m;
+    m.push_str(": ");
+    let s = rdr.src.slice(
+                  byte_offset(rdr, from_pos).to_uint(),
+                  byte_offset(rdr, to_pos).to_uint());
+    m.push_str(s);
+    fatal_span(rdr, from_pos, to_pos, m);
+}
+
 // EFFECT: advance peek_tok and peek_span to refer to the next token.
 // EFFECT: update the interner, maybe.
 fn string_advance_token(r: @mut StringReader) {
@@ -327,7 +367,8 @@ fn consume_block_comment(rdr: @mut StringReader)
             bump(rdr);
         }
         if is_eof(rdr) {
-            rdr.fatal(~"unterminated block doc-comment");
+            fatal_span(rdr, start_bpos, rdr.last_pos,
+                       ~"unterminated block doc-comment");
         } else {
             bump(rdr);
             bump(rdr);
@@ -344,8 +385,12 @@ fn consume_block_comment(rdr: @mut StringReader)
             }
         }
     } else {
+        let start_bpos = rdr.last_pos - BytePos(2u);
         loop {
-            if is_eof(rdr) { rdr.fatal(~"unterminated block comment"); }
+            if is_eof(rdr) {
+                fatal_span(rdr, start_bpos, rdr.last_pos,
+                           ~"unterminated block comment");
+            }
             if rdr.curr == '*' && nextch(rdr) == '/' {
                 bump(rdr);
                 bump(rdr);
@@ -361,7 +406,7 @@ fn consume_block_comment(rdr: @mut StringReader)
    if res.is_some() { res } else { consume_whitespace_and_comments(rdr) }
 }
 
-fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
+fn scan_exponent(rdr: @mut StringReader, start_bpos: BytePos) -> Option<~str> {
     let mut c = rdr.curr;
     let mut rslt = ~"";
     if c == 'e' || c == 'E' {
@@ -375,7 +420,10 @@ fn scan_exponent(rdr: @mut StringReader) -> Option<~str> {
         let exponent = scan_digits(rdr, 10u);
         if exponent.len() > 0u {
             return Some(rslt + exponent);
-        } else { rdr.fatal(~"scan_exponent: bad fp literal"); }
+        } else {
+            fatal_span(rdr, start_bpos, rdr.last_pos,
+                       ~"scan_exponent: bad fp literal");
+        }
     } else { return None::<~str>; }
 }
 
@@ -399,6 +447,7 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
     let mut base = 10u;
     let mut c = c;
     let mut n = nextch(rdr);
+    let start_bpos = rdr.last_pos;
     if c == '0' && n == 'x' {
         bump(rdr);
         bump(rdr);
@@ -442,11 +491,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
                       else { either::Right(ast::ty_u64) };
         }
         if num_str.len() == 0u {
-            rdr.fatal(~"no valid digits found for number");
+            fatal_span(rdr, start_bpos, rdr.last_pos,
+                       ~"no valid digits found for number");
         }
         let parsed = match from_str_radix::<u64>(num_str, base as uint) {
             Some(p) => p,
-            None => rdr.fatal(~"int literal is too large")
+            None => fatal_span(rdr, start_bpos, rdr.last_pos,
+                               ~"int literal is too large")
         };
 
         match tp {
@@ -464,12 +515,14 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
     }
     if is_float {
         match base {
-          16u => rdr.fatal(~"hexadecimal float literal is not supported"),
-          2u => rdr.fatal(~"binary float literal is not supported"),
+          16u => fatal_span(rdr, start_bpos, rdr.last_pos,
+                            ~"hexadecimal float literal is not supported"),
+          2u => fatal_span(rdr, start_bpos, rdr.last_pos,
+                           ~"binary float literal is not supported"),
           _ => ()
         }
     }
-    match scan_exponent(rdr) {
+    match scan_exponent(rdr, start_bpos) {
       Some(ref s) => {
         is_float = true;
         num_str.push_str(*s);
@@ -507,11 +560,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
         return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str));
     } else {
         if num_str.len() == 0u {
-            rdr.fatal(~"no valid digits found for number");
+            fatal_span(rdr, start_bpos, rdr.last_pos,
+                       ~"no valid digits found for number");
         }
         let parsed = match from_str_radix::<u64>(num_str, base as uint) {
             Some(p) => p,
-            None => rdr.fatal(~"int literal is too large")
+            None => fatal_span(rdr, start_bpos, rdr.last_pos,
+                               ~"int literal is too large")
         };
 
         debug!("lexing %s as an unsuffixed integer literal",
@@ -523,19 +578,23 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
 fn scan_numeric_escape(rdr: @mut StringReader, n_hex_digits: uint) -> char {
     let mut accum_int = 0;
     let mut i = n_hex_digits;
+    let start_bpos = rdr.last_pos;
     while i != 0u {
         let n = rdr.curr;
-        bump(rdr);
         if !is_hex_digit(n) {
-            rdr.fatal(fmt!("illegal numeric character escape: %d", n as int));
+            fatal_span_char(rdr, rdr.last_pos, rdr.pos,
+                            ~"illegal character in numeric character escape",
+                            n);
         }
+        bump(rdr);
         accum_int *= 16;
         accum_int += hex_digit_val(n);
         i -= 1u;
     }
     match char::from_u32(accum_int as u32) {
         Some(x) => x,
-        None => rdr.fatal(fmt!("illegal numeric character escape"))
+        None => fatal_span(rdr, start_bpos, rdr.last_pos,
+                           ~"illegal numeric character escape")
     }
 }
 
@@ -691,6 +750,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
         if c2 == '\\' {
             // '\X' for some X must be a character constant:
             let escaped = rdr.curr;
+            let escaped_pos = rdr.last_pos;
             bump(rdr);
             match escaped {
               'n' => { c2 = '\n'; }
@@ -704,25 +764,31 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
               'u' => { c2 = scan_numeric_escape(rdr, 4u); }
               'U' => { c2 = scan_numeric_escape(rdr, 8u); }
               c2 => {
-                rdr.fatal(fmt!("unknown character escape: %d", c2 as int));
+                fatal_span_char(rdr, escaped_pos, rdr.last_pos,
+                                ~"unknown character escape", c2);
               }
             }
         }
         if rdr.curr != '\'' {
-            rdr.fatal(~"unterminated character constant");
+            fatal_span_verbose(rdr,
+                               // Byte offsetting here is okay because the
+                               // character before position `start` is an
+                               // ascii single quote.
+                               start - BytePos(1u),
+                               rdr.last_pos,
+                               ~"unterminated character constant");
         }
         bump(rdr); // advance curr past token
         return token::LIT_CHAR(c2 as u32);
       }
       '"' => {
         let mut accum_str = ~"";
-        let n = rdr.last_pos;
+        let start_bpos = rdr.last_pos;
         bump(rdr);
         while rdr.curr != '"' {
             if is_eof(rdr) {
-                do with_str_from(rdr, n) |s| {
-                    rdr.fatal(fmt!("unterminated double quote string: %s", s));
-                }
+                fatal_span(rdr, start_bpos, rdr.last_pos,
+                           ~"unterminated double quote string");
             }
 
             let ch = rdr.curr;
@@ -730,6 +796,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
             match ch {
               '\\' => {
                 let escaped = rdr.curr;
+                let escaped_pos = rdr.last_pos;
                 bump(rdr);
                 match escaped {
                   'n' => accum_str.push_char('\n'),
@@ -750,7 +817,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
                     accum_str.push_char(scan_numeric_escape(rdr, 8u));
                   }
                   c2 => {
-                    rdr.fatal(fmt!("unknown string escape: %d", c2 as int));
+                    fatal_span_char(rdr, escaped_pos, rdr.last_pos,
+                                    ~"unknown string escape", c2);
                   }
                 }
               }
@@ -786,11 +854,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
       '^' => { return binop(rdr, token::CARET); }
       '%' => { return binop(rdr, token::PERCENT); }
       c => {
-          // So the error span points to the unrecognized character
-          rdr.peek_span = codemap::mk_sp(rdr.last_pos, rdr.pos);
-          let mut cs = ~"";
-          char::escape_default(c, |c| cs.push_char(c));
-          rdr.fatal(fmt!("unknown start of token: %s", cs));
+          fatal_span_char(rdr, rdr.last_pos, rdr.pos,
+                          ~"unknown start of token", c);
       }
     }
 }
author	bors <bors@rust-lang.org>	2013-09-19 21:46:45 -0700
committer	bors <bors@rust-lang.org>	2013-09-19 21:46:45 -0700
commit	7f826cb25ac3c1295a90bc8eb16e1cdf518fc6e1 (patch)
tree	2be92973440fb858516695621584587bbe0afe15 /src/libsyntax/parse
parent	407d179f4e0b625e6911ebaf72c87cd35935fb06 (diff)
parent	567c567b2dabbab9d74cf565b252e5024900e290 (diff)
download	rust-7f826cb25ac3c1295a90bc8eb16e1cdf518fc6e1.tar.gz rust-7f826cb25ac3c1295a90bc8eb16e1cdf518fc6e1.zip