Change spans to use byte offsets instead of char offsets

author: Brian Anderson <banderson@mozilla.com> 2012-11-15 19:37:29 -0800
committer: Brian Anderson <banderson@mozilla.com> 2012-11-16 12:06:44 -0800
commit: 81d20156cd44358e47e5081635f28ea31c01a757 (patch)
tree: 000e99c48bf31156f8574e9ea2d6830722503328 /src/libsyntax/parse
parent: 8cba337cce19c71c4030f26fba2b00842172b99e (diff)
download: rust-81d20156cd44358e47e5081635f28ea31c01a757.tar.gz
rust-81d20156cd44358e47e5081635f28ea31c01a757.zip
5 files changed, 33 insertions, 28 deletions
diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs
index 31528c10fe1..f0cb1d4ba3e 100644
--- a/src/libsyntax/parse/attr.rs
+++ b/src/libsyntax/parse/attr.rs
@@ -14,7 +14,7 @@ trait parser_attr {
         -> attr_or_ext;
     fn parse_outer_attributes() -> ~[ast::attribute];
     fn parse_attribute(style: ast::attr_style) -> ast::attribute;
-    fn parse_attribute_naked(style: ast::attr_style, lo: CharPos) ->
+    fn parse_attribute_naked(style: ast::attr_style, lo: BytePos) ->
         ast::attribute;
     fn parse_inner_attrs_and_next() ->
         {inner: ~[ast::attribute], next: ~[ast::attribute]};
@@ -85,7 +85,7 @@ impl Parser: parser_attr {
         return self.parse_attribute_naked(style, lo);
     }
 
-    fn parse_attribute_naked(style: ast::attr_style, lo: CharPos) ->
+    fn parse_attribute_naked(style: ast::attr_style, lo: BytePos) ->
         ast::attribute {
         self.expect(token::LBRACKET);
         let meta_item = self.parse_meta_item();
diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs
index 92736b9f361..589a5f25ecf 100644
--- a/src/libsyntax/parse/comments.rs
+++ b/src/libsyntax/parse/comments.rs
@@ -28,7 +28,7 @@ impl cmnt_style : cmp::Eq {
     }
 }
 
-type cmnt = {style: cmnt_style, lines: ~[~str], pos: CharPos};
+type cmnt = {style: cmnt_style, lines: ~[~str], pos: BytePos};
 
 fn is_doc_comment(s: ~str) -> bool {
     s.starts_with(~"///") ||
@@ -131,7 +131,7 @@ fn consume_non_eol_whitespace(rdr: string_reader) {
 fn push_blank_line_comment(rdr: string_reader, comments: &mut ~[cmnt]) {
     debug!(">>> blank-line comment");
     let v: ~[~str] = ~[];
-    comments.push({style: blank_line, lines: v, pos: rdr.last_pos.ch});
+    comments.push({style: blank_line, lines: v, pos: rdr.last_pos.byte});
 }
 
 fn consume_whitespace_counting_blank_lines(rdr: string_reader,
@@ -148,7 +148,7 @@ fn consume_whitespace_counting_blank_lines(rdr: string_reader,
 fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
                                             comments: &mut ~[cmnt]) {
     debug!(">>> shebang comment");
-    let p = rdr.last_pos.ch;
+    let p = rdr.last_pos.byte;
     debug!("<<< shebang comment");
     comments.push({
         style: if code_to_the_left { trailing } else { isolated },
@@ -160,7 +160,7 @@ fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
 fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
                                           comments: &mut ~[cmnt]) {
     debug!(">>> line comments");
-    let p = rdr.last_pos.ch;
+    let p = rdr.last_pos.byte;
     let mut lines: ~[~str] = ~[];
     while rdr.curr == '/' && nextch(rdr) == '/' {
         let line = read_one_line_comment(rdr);
@@ -209,7 +209,7 @@ fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
 fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
                                           comments: &mut ~[cmnt]) {
     debug!(">>> block comment");
-    let p = rdr.last_pos.ch;
+    let p = rdr.last_pos.byte;
     let mut lines: ~[~str] = ~[];
     let mut col: CharPos = rdr.col;
     bump(rdr);
@@ -284,7 +284,7 @@ fn consume_comment(rdr: string_reader, code_to_the_left: bool,
     debug!("<<< consume comment");
 }
 
-type lit = {lit: ~str, pos: CharPos};
+type lit = {lit: ~str, pos: BytePos};
 
 fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
                                 path: ~str,
diff --git a/src/libsyntax/parse/common.rs b/src/libsyntax/parse/common.rs
index 66930009eb8..1811951fc0e 100644
--- a/src/libsyntax/parse/common.rs
+++ b/src/libsyntax/parse/common.rs
@@ -205,7 +205,7 @@ impl Parser: parser_common {
         if self.token == token::GT {
             self.bump();
         } else if self.token == token::BINOP(token::SHR) {
-            self.swap(token::GT, self.span.lo + CharPos(1u), self.span.hi);
+            self.swap(token::GT, self.span.lo + BytePos(1u), self.span.hi);
         } else {
             let mut s: ~str = ~"expected `";
             s += token_to_str(self.reader, token::GT);
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
index 1eadaec1fcd..00a0b40ab65 100644
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@@ -23,7 +23,7 @@ type string_reader = @{
     src: @~str,
     // The absolute offset within the codemap of the next character to read
     mut pos: FilePos,
-    // The absolute offset within the codemap of the last character to be read (curr)
+    // The absolute offset within the codemap of the last character read(curr)
     mut last_pos: FilePos,
     // The column of the next character to read
     mut col: CharPos,
@@ -123,9 +123,9 @@ fn string_advance_token(&&r: string_reader) {
     if is_eof(r) {
         r.peek_tok = token::EOF;
     } else {
-        let start_chpos = r.last_pos.ch;
+        let start_bytepos = r.last_pos.byte;
         r.peek_tok = next_token_inner(r);
-        r.peek_span = ast_util::mk_sp(start_chpos, r.last_pos.ch);
+        r.peek_span = ast_util::mk_sp(start_bytepos, r.last_pos.byte);
     };
 
 }
@@ -158,6 +158,11 @@ fn bump(rdr: string_reader) {
             rdr.filemap.next_line(rdr.last_pos);
             rdr.col = CharPos(0u);
         }
+
+        if byte_offset_diff > 1 {
+            rdr.filemap.record_multibyte_char(
+                BytePos(current_byte_offset), byte_offset_diff);
+        }
     } else {
         // XXX: What does this accomplish?
         if (rdr.curr != -1 as char) {
@@ -233,7 +238,7 @@ fn consume_any_line_comment(rdr: string_reader)
             bump(rdr);
             // line comments starting with "///" or "//!" are doc-comments
             if rdr.curr == '/' || rdr.curr == '!' {
-                let start_chpos = rdr.pos.ch - CharPos(2u);
+                let start_bpos = rdr.pos.byte - BytePos(2u);
                 let mut acc = ~"//";
                 while rdr.curr != '\n' && !is_eof(rdr) {
                     str::push_char(&mut acc, rdr.curr);
@@ -241,7 +246,7 @@ fn consume_any_line_comment(rdr: string_reader)
                 }
                 return Some({
                     tok: token::DOC_COMMENT(rdr.interner.intern(@acc)),
-                    sp: ast_util::mk_sp(start_chpos, rdr.pos.ch)
+                    sp: ast_util::mk_sp(start_bpos, rdr.pos.byte)
                 });
             } else {
                 while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
@@ -256,7 +261,7 @@ fn consume_any_line_comment(rdr: string_reader)
         if nextch(rdr) == '!' {
             let cmap = @CodeMap::new();
             (*cmap).files.push(rdr.filemap);
-            let loc = cmap.lookup_char_pos_adj(rdr.last_pos.ch);
+            let loc = cmap.lookup_char_pos_adj(rdr.last_pos.byte);
             if loc.line == 1u && loc.col == CharPos(0u) {
                 while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
                 return consume_whitespace_and_comments(rdr);
@@ -272,7 +277,7 @@ fn consume_block_comment(rdr: string_reader)
 
     // block comments starting with "/**" or "/*!" are doc-comments
     if rdr.curr == '*' || rdr.curr == '!' {
-        let start_chpos = rdr.pos.ch - CharPos(2u);
+        let start_bpos = rdr.pos.byte - BytePos(2u);
         let mut acc = ~"/*";
         while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
             str::push_char(&mut acc, rdr.curr);
@@ -286,7 +291,7 @@ fn consume_block_comment(rdr: string_reader)
             bump(rdr);
             return Some({
                 tok: token::DOC_COMMENT(rdr.interner.intern(@acc)),
-                sp: ast_util::mk_sp(start_chpos, rdr.pos.ch)
+                sp: ast_util::mk_sp(start_bpos, rdr.pos.byte)
             });
         }
     } else {
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index bd314d6bcaf..74d06789ad8 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -5,7 +5,7 @@ use either::{Either, Left, Right};
 use std::map::HashMap;
 use token::{can_begin_expr, is_ident, is_ident_or_path, is_plain_ident,
             INTERPOLATED, special_idents};
-use codemap::{span,FssNone, CharPos};
+use codemap::{span,FssNone, BytePos};
 use util::interner::Interner;
 use ast_util::{spanned, respan, mk_sp, ident_to_path, operator_prec};
 use lexer::reader;
@@ -244,7 +244,7 @@ impl Parser {
         self.token = next.tok;
         self.span = next.sp;
     }
-    fn swap(next: token::Token, +lo: CharPos, +hi: CharPos) {
+    fn swap(next: token::Token, +lo: BytePos, +hi: BytePos) {
         self.token = next;
         self.span = mk_sp(lo, hi);
     }
@@ -904,12 +904,12 @@ impl Parser {
         return spanned(lo, e.span.hi, {mutbl: m, ident: i, expr: e});
     }
 
-    fn mk_expr(+lo: CharPos, +hi: CharPos, +node: expr_) -> @expr {
+    fn mk_expr(+lo: BytePos, +hi: BytePos, +node: expr_) -> @expr {
         return @{id: self.get_id(), callee_id: self.get_id(),
               node: node, span: mk_sp(lo, hi)};
     }
 
-    fn mk_mac_expr(+lo: CharPos, +hi: CharPos, m: mac_) -> @expr {
+    fn mk_mac_expr(+lo: BytePos, +hi: BytePos, m: mac_) -> @expr {
         return @{id: self.get_id(),
               callee_id: self.get_id(),
               node: expr_mac({node: m, span: mk_sp(lo, hi)}),
@@ -1134,7 +1134,7 @@ impl Parser {
         return self.mk_expr(lo, hi, ex);
     }
 
-    fn parse_block_expr(lo: CharPos, blk_mode: blk_check_mode) -> @expr {
+    fn parse_block_expr(lo: BytePos, blk_mode: blk_check_mode) -> @expr {
         self.expect(token::LBRACE);
         let blk = self.parse_block_tail(lo, blk_mode);
         return self.mk_expr(blk.span.lo, blk.span.hi, expr_block(blk));
@@ -1146,7 +1146,7 @@ impl Parser {
         return self.parse_syntax_ext_naked(lo);
     }
 
-    fn parse_syntax_ext_naked(lo: CharPos) -> @expr {
+    fn parse_syntax_ext_naked(lo: BytePos) -> @expr {
         match self.token {
           token::IDENT(_, _) => (),
           _ => self.fatal(~"expected a syntax expander name")
@@ -2279,11 +2279,11 @@ impl Parser {
     // I guess that also means "already parsed the 'impure'" if
     // necessary, and this should take a qualifier.
     // some blocks start with "#{"...
-    fn parse_block_tail(lo: CharPos, s: blk_check_mode) -> blk {
+    fn parse_block_tail(lo: BytePos, s: blk_check_mode) -> blk {
         self.parse_block_tail_(lo, s, ~[])
     }
 
-    fn parse_block_tail_(lo: CharPos, s: blk_check_mode,
+    fn parse_block_tail_(lo: BytePos, s: blk_check_mode,
                          +first_item_attrs: ~[attribute]) -> blk {
         let mut stmts = ~[];
         let mut expr = None;
@@ -2581,7 +2581,7 @@ impl Parser {
         return {ident: id, tps: ty_params};
     }
 
-    fn mk_item(+lo: CharPos, +hi: CharPos, +ident: ident,
+    fn mk_item(+lo: BytePos, +hi: BytePos, +ident: ident,
                +node: item_, vis: visibility,
                +attrs: ~[attribute]) -> @item {
         return @{ident: ident,
@@ -3037,7 +3037,7 @@ impl Parser {
             items: items};
     }
 
-    fn parse_item_foreign_mod(lo: CharPos,
+    fn parse_item_foreign_mod(lo: BytePos,
                               visibility: visibility,
                               attrs: ~[attribute],
                               items_allowed: bool)
@@ -3092,7 +3092,7 @@ impl Parser {
         });
     }
 
-    fn parse_type_decl() -> {lo: CharPos, ident: ident} {
+    fn parse_type_decl() -> {lo: BytePos, ident: ident} {
         let lo = self.last_span.lo;
         let id = self.parse_ident();
         return {lo: lo, ident: id};
author	Brian Anderson <banderson@mozilla.com>	2012-11-15 19:37:29 -0800
committer	Brian Anderson <banderson@mozilla.com>	2012-11-16 12:06:44 -0800
commit	81d20156cd44358e47e5081635f28ea31c01a757 (patch)
tree	000e99c48bf31156f8574e9ea2d6830722503328 /src/libsyntax/parse
parent	8cba337cce19c71c4030f26fba2b00842172b99e (diff)
download	rust-81d20156cd44358e47e5081635f28ea31c01a757.tar.gz rust-81d20156cd44358e47e5081635f28ea31c01a757.zip