auto merge of #7005 : dotdash/rust/fix_get_str_from, r=bstrie

As the comment said, the subtraction is bogus for multibyte characters. Fortunately, we can just use last_pos instead of pos to get the correct position without any subtraction hackery.
author: bors <bors@rust-lang.org> 2013-06-08 00:34:22 -0700
committer: bors <bors@rust-lang.org> 2013-06-08 00:34:22 -0700
commit: 1d06aea0b76cc3e466a41cb9b2a6dcde7c197fb2 (patch)
tree: e85567117466fe02ae7a7a114bfc0b00d514c1b6 /src/libsyntax/parse
parent: 1cf57f7b673ca94a32ec82c31b2bce79a5ca9394 (diff)
parent: 43cae880790b532e5b3e2fb99c64a4ec78030652 (diff)
download: rust-1d06aea0b76cc3e466a41cb9b2a6dcde7c197fb2.tar.gz
rust-1d06aea0b76cc3e466a41cb9b2a6dcde7c197fb2.zip
2 files changed, 8 insertions, 10 deletions
diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs
index dab8dd3b4b6..54fba29a19a 100644
--- a/src/libsyntax/parse/comments.rs
+++ b/src/libsyntax/parse/comments.rs
@@ -347,7 +347,7 @@ pub fn gather_comments_and_literals(span_diagnostic:
         }
 
 
-        let bstart = rdr.pos;
+        let bstart = rdr.last_pos;
         rdr.next_token();
         //discard, and look ahead; we're working with internal state
         let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
index 84700f052c9..172e7f15458 100644
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@@ -161,22 +161,20 @@ fn string_advance_token(r: @mut StringReader) {
     }
 }
 
-fn byte_offset(rdr: &StringReader) -> BytePos {
-    (rdr.pos - rdr.filemap.start_pos)
+fn byte_offset(rdr: &StringReader, pos: BytePos) -> BytePos {
+    (pos - rdr.filemap.start_pos)
 }
 
 pub fn get_str_from(rdr: @mut StringReader, start: BytePos) -> ~str {
-    // I'm pretty skeptical about this subtraction. What if there's a
-    // multi-byte character before the mark?
-    return str::slice(*rdr.src, start.to_uint() - 1u,
-                      byte_offset(rdr).to_uint() - 1u).to_owned();
+    return str::slice(*rdr.src, start.to_uint(),
+                      byte_offset(rdr, rdr.last_pos).to_uint()).to_owned();
 }
 
 // EFFECT: advance the StringReader by one character. If a newline is
 // discovered, add it to the FileMap's list of line start offsets.
 pub fn bump(rdr: &mut StringReader) {
     rdr.last_pos = rdr.pos;
-    let current_byte_offset = byte_offset(rdr).to_uint();;
+    let current_byte_offset = byte_offset(rdr, rdr.pos).to_uint();
     if current_byte_offset < (*rdr.src).len() {
         assert!(rdr.curr != -1 as char);
         let last_char = rdr.curr;
@@ -202,7 +200,7 @@ pub fn is_eof(rdr: @mut StringReader) -> bool {
     rdr.curr == -1 as char
 }
 pub fn nextch(rdr: @mut StringReader) -> char {
-    let offset = byte_offset(rdr).to_uint();
+    let offset = byte_offset(rdr, rdr.pos).to_uint();
     if offset < (*rdr.src).len() {
         return str::char_at(*rdr.src, offset);
     } else { return -1 as char; }
@@ -692,7 +690,7 @@ fn next_token_inner(rdr: @mut StringReader) -> token::Token {
         return token::LIT_INT(c2 as i64, ast::ty_char);
       }
       '"' => {
-        let n = byte_offset(rdr);
+        let n = byte_offset(rdr, rdr.last_pos);
         bump(rdr);
         while rdr.curr != '"' {
             if is_eof(rdr) {
author	bors <bors@rust-lang.org>	2013-06-08 00:34:22 -0700
committer	bors <bors@rust-lang.org>	2013-06-08 00:34:22 -0700
commit	1d06aea0b76cc3e466a41cb9b2a6dcde7c197fb2 (patch)
tree	e85567117466fe02ae7a7a114bfc0b00d514c1b6 /src/libsyntax/parse
parent	1cf57f7b673ca94a32ec82c31b2bce79a5ca9394 (diff)
parent	43cae880790b532e5b3e2fb99c64a4ec78030652 (diff)
download	rust-1d06aea0b76cc3e466a41cb9b2a6dcde7c197fb2.tar.gz rust-1d06aea0b76cc3e466a41cb9b2a6dcde7c197fb2.zip