diff options
| author | bors <bors@rust-lang.org> | 2013-09-19 21:46:45 -0700 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2013-09-19 21:46:45 -0700 |
| commit | 7f826cb25ac3c1295a90bc8eb16e1cdf518fc6e1 (patch) | |
| tree | 2be92973440fb858516695621584587bbe0afe15 /src/libsyntax/parse | |
| parent | 407d179f4e0b625e6911ebaf72c87cd35935fb06 (diff) | |
| parent | 567c567b2dabbab9d74cf565b252e5024900e290 (diff) | |
| download | rust-7f826cb25ac3c1295a90bc8eb16e1cdf518fc6e1.tar.gz rust-7f826cb25ac3c1295a90bc8eb16e1cdf518fc6e1.zip | |
auto merge of #9308 : ben0x539/rust/lexer-error-spans, r=alexcrichton
Previously, the lexer calling `rdr.fatal(...)` would report the span of the last complete token, instead of a span within the erroneous token (besides one span fixed in 1ac90bb). This branch adds wrappers around `rdr.fatal(...)` that sets the span explicilty, so that all fatal errors in `libsyntax/parse/lexer.rs` now report the offending code more precisely. A number of tests try to verify that, though the `compile-fail` testing setup can only check that the spans are on the right lines, and the "unterminated string/block comment" errors can't have the line marked at all, so that's incomplete. This closes #9149. Also, the lexer errors now report the offending code in the error message, not just via the span, just like other errors do.
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer.rs | 117 |
1 files changed, 91 insertions, 26 deletions
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 0bc9e619274..d3b0866d4a7 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -149,6 +149,46 @@ impl reader for TtReader { fn dup(@mut self) -> @mut reader { dup_tt_reader(self) as @mut reader } } +// report a lexical error spanning [`from_pos`, `to_pos`) +fn fatal_span(rdr: @mut StringReader, + from_pos: BytePos, + to_pos: BytePos, + m: ~str) + -> ! { + rdr.peek_span = codemap::mk_sp(from_pos, to_pos); + rdr.fatal(m); +} + +// report a lexical error spanning [`from_pos`, `to_pos`), appending an +// escaped character to the error message +fn fatal_span_char(rdr: @mut StringReader, + from_pos: BytePos, + to_pos: BytePos, + m: ~str, + c: char) + -> ! { + let mut m = m; + m.push_str(": "); + char::escape_default(c, |c| m.push_char(c)); + fatal_span(rdr, from_pos, to_pos, m); +} + +// report a lexical error spanning [`from_pos`, `to_pos`), appending the +// offending string to the error message +fn fatal_span_verbose(rdr: @mut StringReader, + from_pos: BytePos, + to_pos: BytePos, + m: ~str) + -> ! { + let mut m = m; + m.push_str(": "); + let s = rdr.src.slice( + byte_offset(rdr, from_pos).to_uint(), + byte_offset(rdr, to_pos).to_uint()); + m.push_str(s); + fatal_span(rdr, from_pos, to_pos, m); +} + // EFFECT: advance peek_tok and peek_span to refer to the next token. // EFFECT: update the interner, maybe. fn string_advance_token(r: @mut StringReader) { @@ -327,7 +367,8 @@ fn consume_block_comment(rdr: @mut StringReader) bump(rdr); } if is_eof(rdr) { - rdr.fatal(~"unterminated block doc-comment"); + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"unterminated block doc-comment"); } else { bump(rdr); bump(rdr); @@ -344,8 +385,12 @@ fn consume_block_comment(rdr: @mut StringReader) } } } else { + let start_bpos = rdr.last_pos - BytePos(2u); loop { - if is_eof(rdr) { rdr.fatal(~"unterminated block comment"); } + if is_eof(rdr) { + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"unterminated block comment"); + } if rdr.curr == '*' && nextch(rdr) == '/' { bump(rdr); bump(rdr); @@ -361,7 +406,7 @@ fn consume_block_comment(rdr: @mut StringReader) if res.is_some() { res } else { consume_whitespace_and_comments(rdr) } } -fn scan_exponent(rdr: @mut StringReader) -> Option<~str> { +fn scan_exponent(rdr: @mut StringReader, start_bpos: BytePos) -> Option<~str> { let mut c = rdr.curr; let mut rslt = ~""; if c == 'e' || c == 'E' { @@ -375,7 +420,10 @@ fn scan_exponent(rdr: @mut StringReader) -> Option<~str> { let exponent = scan_digits(rdr, 10u); if exponent.len() > 0u { return Some(rslt + exponent); - } else { rdr.fatal(~"scan_exponent: bad fp literal"); } + } else { + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"scan_exponent: bad fp literal"); + } } else { return None::<~str>; } } @@ -399,6 +447,7 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { let mut base = 10u; let mut c = c; let mut n = nextch(rdr); + let start_bpos = rdr.last_pos; if c == '0' && n == 'x' { bump(rdr); bump(rdr); @@ -442,11 +491,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { else { either::Right(ast::ty_u64) }; } if num_str.len() == 0u { - rdr.fatal(~"no valid digits found for number"); + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"no valid digits found for number"); } let parsed = match from_str_radix::<u64>(num_str, base as uint) { Some(p) => p, - None => rdr.fatal(~"int literal is too large") + None => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"int literal is too large") }; match tp { @@ -464,12 +515,14 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { } if is_float { match base { - 16u => rdr.fatal(~"hexadecimal float literal is not supported"), - 2u => rdr.fatal(~"binary float literal is not supported"), + 16u => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"hexadecimal float literal is not supported"), + 2u => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"binary float literal is not supported"), _ => () } } - match scan_exponent(rdr) { + match scan_exponent(rdr, start_bpos) { Some(ref s) => { is_float = true; num_str.push_str(*s); @@ -507,11 +560,13 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str)); } else { if num_str.len() == 0u { - rdr.fatal(~"no valid digits found for number"); + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"no valid digits found for number"); } let parsed = match from_str_radix::<u64>(num_str, base as uint) { Some(p) => p, - None => rdr.fatal(~"int literal is too large") + None => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"int literal is too large") }; debug!("lexing %s as an unsuffixed integer literal", @@ -523,19 +578,23 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token { fn scan_numeric_escape(rdr: @mut StringReader, n_hex_digits: uint) -> char { let mut accum_int = 0; let mut i = n_hex_digits; + let start_bpos = rdr.last_pos; while i != 0u { let n = rdr.curr; - bump(rdr); if !is_hex_digit(n) { - rdr.fatal(fmt!("illegal numeric character escape: %d", n as int)); + fatal_span_char(rdr, rdr.last_pos, rdr.pos, + ~"illegal character in numeric character escape", + n); } + bump(rdr); accum_int *= 16; accum_int += hex_digit_val(n); i -= 1u; } match char::from_u32(accum_int as u32) { Some(x) => x, - None => rdr.fatal(fmt!("illegal numeric character escape")) + None => fatal_span(rdr, start_bpos, rdr.last_pos, + ~"illegal numeric character escape") } } @@ -691,6 +750,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { if c2 == '\\' { // '\X' for some X must be a character constant: let escaped = rdr.curr; + let escaped_pos = rdr.last_pos; bump(rdr); match escaped { 'n' => { c2 = '\n'; } @@ -704,25 +764,31 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { 'u' => { c2 = scan_numeric_escape(rdr, 4u); } 'U' => { c2 = scan_numeric_escape(rdr, 8u); } c2 => { - rdr.fatal(fmt!("unknown character escape: %d", c2 as int)); + fatal_span_char(rdr, escaped_pos, rdr.last_pos, + ~"unknown character escape", c2); } } } if rdr.curr != '\'' { - rdr.fatal(~"unterminated character constant"); + fatal_span_verbose(rdr, + // Byte offsetting here is okay because the + // character before position `start` is an + // ascii single quote. + start - BytePos(1u), + rdr.last_pos, + ~"unterminated character constant"); } bump(rdr); // advance curr past token return token::LIT_CHAR(c2 as u32); } '"' => { let mut accum_str = ~""; - let n = rdr.last_pos; + let start_bpos = rdr.last_pos; bump(rdr); while rdr.curr != '"' { if is_eof(rdr) { - do with_str_from(rdr, n) |s| { - rdr.fatal(fmt!("unterminated double quote string: %s", s)); - } + fatal_span(rdr, start_bpos, rdr.last_pos, + ~"unterminated double quote string"); } let ch = rdr.curr; @@ -730,6 +796,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { match ch { '\\' => { let escaped = rdr.curr; + let escaped_pos = rdr.last_pos; bump(rdr); match escaped { 'n' => accum_str.push_char('\n'), @@ -750,7 +817,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { accum_str.push_char(scan_numeric_escape(rdr, 8u)); } c2 => { - rdr.fatal(fmt!("unknown string escape: %d", c2 as int)); + fatal_span_char(rdr, escaped_pos, rdr.last_pos, + ~"unknown string escape", c2); } } } @@ -786,11 +854,8 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { '^' => { return binop(rdr, token::CARET); } '%' => { return binop(rdr, token::PERCENT); } c => { - // So the error span points to the unrecognized character - rdr.peek_span = codemap::mk_sp(rdr.last_pos, rdr.pos); - let mut cs = ~""; - char::escape_default(c, |c| cs.push_char(c)); - rdr.fatal(fmt!("unknown start of token: %s", cs)); + fatal_span_char(rdr, rdr.last_pos, rdr.pos, + ~"unknown start of token", c); } } } |
