diff options
| author | Björn Steinbrink <bsteinbr@gmail.com> | 2013-06-07 20:00:37 +0200 |
|---|---|---|
| committer | Björn Steinbrink <bsteinbr@gmail.com> | 2013-06-08 03:24:47 +0200 |
| commit | 43cae880790b532e5b3e2fb99c64a4ec78030652 (patch) | |
| tree | 75071fc72f9db7179be51239702a2c810a76cee5 /src/libsyntax/parse | |
| parent | b8cf2f8056d29423430b723acd70e525d1249a32 (diff) | |
| download | rust-43cae880790b532e5b3e2fb99c64a4ec78030652.tar.gz rust-43cae880790b532e5b3e2fb99c64a4ec78030652.zip | |
Lexer: Fix offset handling in get_str_from()
As the comment said, the subtraction is bogus for multibyte characters. Fortunately, we can just use last_pos instead of pos to get the correct position without any subtraction hackery.
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/comments.rs | 2 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer.rs | 16 |
2 files changed, 8 insertions, 10 deletions
diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index dab8dd3b4b6..54fba29a19a 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -347,7 +347,7 @@ pub fn gather_comments_and_literals(span_diagnostic: } - let bstart = rdr.pos; + let bstart = rdr.last_pos; rdr.next_token(); //discard, and look ahead; we're working with internal state let TokenAndSpan {tok: tok, sp: sp} = rdr.peek(); diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 84700f052c9..172e7f15458 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -161,22 +161,20 @@ fn string_advance_token(r: @mut StringReader) { } } -fn byte_offset(rdr: &StringReader) -> BytePos { - (rdr.pos - rdr.filemap.start_pos) +fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos { + (pos - rdr.filemap.start_pos) } pub fn get_str_from(rdr: @mut StringReader, start: BytePos) -> ~str { - // I'm pretty skeptical about this subtraction. What if there's a - // multi-byte character before the mark? - return str::slice(*rdr.src, start.to_uint() - 1u, - byte_offset(rdr).to_uint() - 1u).to_owned(); + return str::slice(*rdr.src, start.to_uint(), + byte_offset(rdr, rdr.last_pos).to_uint()).to_owned(); } // EFFECT: advance the StringReader by one character. If a newline is // discovered, add it to the FileMap's list of line start offsets. pub fn bump(rdr: &mut StringReader) { rdr.last_pos = rdr.pos; - let current_byte_offset = byte_offset(rdr).to_uint();; + let current_byte_offset = byte_offset(rdr, rdr.pos).to_uint(); if current_byte_offset < (*rdr.src).len() { assert!(rdr.curr != -1 as char); let last_char = rdr.curr; @@ -202,7 +200,7 @@ pub fn is_eof(rdr: @mut StringReader) -> bool { rdr.curr == -1 as char } pub fn nextch(rdr: @mut StringReader) -> char { - let offset = byte_offset(rdr).to_uint(); + let offset = byte_offset(rdr, rdr.pos).to_uint(); if offset < (*rdr.src).len() { return str::char_at(*rdr.src, offset); } else { return -1 as char; } @@ -692,7 +690,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token { return token::LIT_INT(c2 as i64, ast::ty_char); } '"' => { - let n = byte_offset(rdr); + let n = byte_offset(rdr, rdr.last_pos); bump(rdr); while rdr.curr != '"' { if is_eof(rdr) { |
