Add types for character position and byte position in the codemap

author: Brian Anderson <banderson@mozilla.com> 2012-11-12 19:32:48 -0800
committer: Brian Anderson <banderson@mozilla.com> 2012-11-14 15:18:29 -0800
commit: f67bfe97389a256fc95216c29a2b8a066ee16a2c (patch)
tree: a638c4c480ceb60138a39a05c5b04967ac814eb6 /src/libsyntax/parse
parent: 9ecf86343a136c71cbb2bb8da9bfd1734fec37f4 (diff)
download: rust-f67bfe97389a256fc95216c29a2b8a066ee16a2c.tar.gz
rust-f67bfe97389a256fc95216c29a2b8a066ee16a2c.zip
5 files changed, 51 insertions, 46 deletions
diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs
index 42101a431d6..31528c10fe1 100644
--- a/src/libsyntax/parse/attr.rs
+++ b/src/libsyntax/parse/attr.rs
@@ -14,7 +14,7 @@ trait parser_attr {
         -> attr_or_ext;
     fn parse_outer_attributes() -> ~[ast::attribute];
     fn parse_attribute(style: ast::attr_style) -> ast::attribute;
-    fn parse_attribute_naked(style: ast::attr_style, lo: uint) ->
+    fn parse_attribute_naked(style: ast::attr_style, lo: CharPos) ->
         ast::attribute;
     fn parse_inner_attrs_and_next() ->
         {inner: ~[ast::attribute], next: ~[ast::attribute]};
@@ -85,7 +85,7 @@ impl Parser: parser_attr {
         return self.parse_attribute_naked(style, lo);
     }
 
-    fn parse_attribute_naked(style: ast::attr_style, lo: uint) ->
+    fn parse_attribute_naked(style: ast::attr_style, lo: CharPos) ->
         ast::attribute {
         self.expect(token::LBRACKET);
         let meta_item = self.parse_meta_item();
diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs
index a2f73fd189c..06b042a468a 100644
--- a/src/libsyntax/parse/comments.rs
+++ b/src/libsyntax/parse/comments.rs
@@ -3,7 +3,7 @@ use io::ReaderUtil;
 use util::interner;
 use lexer::{string_reader, bump, is_eof, nextch,
                is_whitespace, get_str_from, reader};
-use codemap::FileMap;
+use codemap::{FileMap, CharPos};
 
 export cmnt;
 export lit;
@@ -28,7 +28,7 @@ impl cmnt_style : cmp::Eq {
     }
 }
 
-type cmnt = {style: cmnt_style, lines: ~[~str], pos: uint};
+type cmnt = {style: cmnt_style, lines: ~[~str], pos: CharPos};
 
 fn is_doc_comment(s: ~str) -> bool {
     s.starts_with(~"///") ||
@@ -137,7 +137,7 @@ fn push_blank_line_comment(rdr: string_reader, comments: &mut ~[cmnt]) {
 fn consume_whitespace_counting_blank_lines(rdr: string_reader,
                                            comments: &mut ~[cmnt]) {
     while is_whitespace(rdr.curr) && !is_eof(rdr) {
-        if rdr.col == 0u && rdr.curr == '\n' {
+        if rdr.col == CharPos(0u) && rdr.curr == '\n' {
             push_blank_line_comment(rdr, comments);
         }
         bump(rdr);
@@ -181,6 +181,8 @@ fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
     }
 }
 
+// FIXME #3961: This is not the right way to convert string byte
+// offsets to characters.
 fn all_whitespace(s: ~str, begin: uint, end: uint) -> bool {
     let mut i: uint = begin;
     while i != end {
@@ -190,9 +192,11 @@ fn all_whitespace(s: ~str, begin: uint, end: uint) -> bool {
 }
 
 fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
-                                        s: ~str, col: uint) {
+                                        s: ~str, col: CharPos) {
     let mut s1;
     let len = str::len(s);
+    // FIXME #3961: Doing bytewise comparison and slicing with CharPos
+    let col = col.to_uint();
     if all_whitespace(s, 0u, uint::min(len, col)) {
         if col < len {
             s1 = str::slice(s, col, len);
@@ -207,7 +211,7 @@ fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
     debug!(">>> block comment");
     let p = rdr.chpos;
     let mut lines: ~[~str] = ~[];
-    let mut col: uint = rdr.col;
+    let mut col: CharPos = rdr.col;
     bump(rdr);
     bump(rdr);
 
@@ -280,7 +284,7 @@ fn consume_comment(rdr: string_reader, code_to_the_left: bool,
     debug!("<<< consume comment");
 }
 
-type lit = {lit: ~str, pos: uint};
+type lit = {lit: ~str, pos: CharPos};
 
 fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
                                 path: ~str,
@@ -289,7 +293,8 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler,
     let src = @str::from_bytes(srdr.read_whole_stream());
     let itr = parse::token::mk_fake_ident_interner();
     let rdr = lexer::new_low_level_string_reader
-        (span_diagnostic, @FileMap::new(path, src, 0u, 0u), itr);
+        (span_diagnostic, @FileMap::new(path, src,
+                                        CharPos(0u), BytePos(0u)), itr);
 
     let mut comments: ~[cmnt] = ~[];
     let mut literals: ~[lit] = ~[];
diff --git a/src/libsyntax/parse/common.rs b/src/libsyntax/parse/common.rs
index 50c22c08f4f..66930009eb8 100644
--- a/src/libsyntax/parse/common.rs
+++ b/src/libsyntax/parse/common.rs
@@ -205,7 +205,7 @@ impl Parser: parser_common {
         if self.token == token::GT {
             self.bump();
         } else if self.token == token::BINOP(token::SHR) {
-            self.swap(token::GT, self.span.lo + 1u, self.span.hi);
+            self.swap(token::GT, self.span.lo + CharPos(1u), self.span.hi);
         } else {
             let mut s: ~str = ~"expected `";
             s += token_to_str(self.reader, token::GT);
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
index 06b8b1e8eee..dbeaff28625 100644
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@@ -1,5 +1,5 @@
 use diagnostic::span_handler;
-use codemap::{span, CodeMap};
+use codemap::{span, CodeMap, CharPos, BytePos};
 use ext::tt::transcribe::{tt_reader,  new_tt_reader, dup_tt_reader,
                              tt_next_token};
 
@@ -21,10 +21,10 @@ trait reader {
 type string_reader = @{
     span_diagnostic: span_handler,
     src: @~str,
-    mut col: uint,
-    mut pos: uint,
+    mut col: CharPos,
+    mut pos: BytePos,
     mut curr: char,
-    mut chpos: uint,
+    mut chpos: CharPos,
     filemap: @codemap::FileMap,
     interner: @token::ident_interner,
     /* cached: */
@@ -46,15 +46,15 @@ fn new_low_level_string_reader(span_diagnostic: span_handler,
                                itr: @token::ident_interner)
     -> string_reader {
     let r = @{span_diagnostic: span_diagnostic, src: filemap.src,
-              mut col: 0u, mut pos: 0u, mut curr: -1 as char,
+              mut col: CharPos(0), mut pos: BytePos(0), mut curr: -1 as char,
               mut chpos: filemap.start_pos.ch,
               filemap: filemap, interner: itr,
               /* dummy values; not read */
               mut peek_tok: token::EOF,
-              mut peek_span: ast_util::mk_sp(0u,0u)};
-    if r.pos < (*filemap.src).len() {
-        let next = str::char_range_at(*r.src, r.pos);
-        r.pos = next.next;
+              mut peek_span: ast_util::dummy_sp()};
+    if r.pos.to_uint() < (*filemap.src).len() {
+        let next = str::char_range_at(*r.src, r.pos.to_uint());
+        r.pos = BytePos(next.next);
         r.curr = next.ch;
     }
     return r;
@@ -124,27 +124,27 @@ fn string_advance_token(&&r: string_reader) {
 
 }
 
-fn get_str_from(rdr: string_reader, start: uint) -> ~str unsafe {
+fn get_str_from(rdr: string_reader, start: BytePos) -> ~str unsafe {
     // I'm pretty skeptical about this subtraction. What if there's a
     // multi-byte character before the mark?
-    return str::slice(*rdr.src, start - 1u, rdr.pos - 1u);
+    return str::slice(*rdr.src, start.to_uint() - 1u, rdr.pos.to_uint() - 1u);
 }
 
 fn bump(rdr: string_reader) {
-    if rdr.pos < (*rdr.src).len() {
-        rdr.col += 1u;
-        rdr.chpos += 1u;
+    if rdr.pos.to_uint() < (*rdr.src).len() {
+        rdr.col += CharPos(1u);
+        rdr.chpos += CharPos(1u);
         if rdr.curr == '\n' {
             rdr.filemap.next_line(rdr.chpos, rdr.pos);
-            rdr.col = 0u;
+            rdr.col = CharPos(0u);
         }
-        let next = str::char_range_at(*rdr.src, rdr.pos);
-        rdr.pos = next.next;
+        let next = str::char_range_at(*rdr.src, rdr.pos.to_uint());
+        rdr.pos = BytePos(next.next);
         rdr.curr = next.ch;
     } else {
         if (rdr.curr != -1 as char) {
-            rdr.col += 1u;
-            rdr.chpos += 1u;
+            rdr.col += CharPos(1u);
+            rdr.chpos += CharPos(1u);
             rdr.curr = -1 as char;
         }
     }
@@ -153,8 +153,8 @@ fn is_eof(rdr: string_reader) -> bool {
     rdr.curr == -1 as char
 }
 fn nextch(rdr: string_reader) -> char {
-    if rdr.pos < (*rdr.src).len() {
-        return str::char_at(*rdr.src, rdr.pos);
+    if rdr.pos.to_uint() < (*rdr.src).len() {
+        return str::char_at(*rdr.src, rdr.pos.to_uint());
     } else { return -1 as char; }
 }
 
@@ -211,7 +211,7 @@ fn consume_any_line_comment(rdr: string_reader)
             bump(rdr);
             // line comments starting with "///" or "//!" are doc-comments
             if rdr.curr == '/' || rdr.curr == '!' {
-                let start_chpos = rdr.chpos - 2u;
+                let start_chpos = rdr.chpos - CharPos(2u);
                 let mut acc = ~"//";
                 while rdr.curr != '\n' && !is_eof(rdr) {
                     str::push_char(&mut acc, rdr.curr);
@@ -235,7 +235,7 @@ fn consume_any_line_comment(rdr: string_reader)
             let cmap = @CodeMap::new();
             (*cmap).files.push(rdr.filemap);
             let loc = cmap.lookup_char_pos_adj(rdr.chpos);
-            if loc.line == 1u && loc.col == 0u {
+            if loc.line == 1u && loc.col == CharPos(0u) {
                 while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
                 return consume_whitespace_and_comments(rdr);
             }
@@ -250,7 +250,7 @@ fn consume_block_comment(rdr: string_reader)
 
     // block comments starting with "/**" or "/*!" are doc-comments
     if rdr.curr == '*' || rdr.curr == '!' {
-        let start_chpos = rdr.chpos - 2u;
+        let start_chpos = rdr.chpos - CharPos(2u);
         let mut acc = ~"/*";
         while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
             str::push_char(&mut acc, rdr.curr);
@@ -584,7 +584,7 @@ fn next_token_inner(rdr: string_reader) -> token::Token {
         return token::LIT_INT(c2 as i64, ast::ty_char);
       }
       '"' => {
-        let n = rdr.chpos;
+        let n = rdr.pos;
         bump(rdr);
         while rdr.curr != '"' {
             if is_eof(rdr) {
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index 43628b0908f..bd314d6bcaf 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -5,7 +5,7 @@ use either::{Either, Left, Right};
 use std::map::HashMap;
 use token::{can_begin_expr, is_ident, is_ident_or_path, is_plain_ident,
             INTERPOLATED, special_idents};
-use codemap::{span,FssNone};
+use codemap::{span,FssNone, CharPos};
 use util::interner::Interner;
 use ast_util::{spanned, respan, mk_sp, ident_to_path, operator_prec};
 use lexer::reader;
@@ -244,7 +244,7 @@ impl Parser {
         self.token = next.tok;
         self.span = next.sp;
     }
-    fn swap(next: token::Token, lo: uint, hi: uint) {
+    fn swap(next: token::Token, +lo: CharPos, +hi: CharPos) {
         self.token = next;
         self.span = mk_sp(lo, hi);
     }
@@ -904,12 +904,12 @@ impl Parser {
         return spanned(lo, e.span.hi, {mutbl: m, ident: i, expr: e});
     }
 
-    fn mk_expr(lo: uint, hi: uint, +node: expr_) -> @expr {
+    fn mk_expr(+lo: CharPos, +hi: CharPos, +node: expr_) -> @expr {
         return @{id: self.get_id(), callee_id: self.get_id(),
               node: node, span: mk_sp(lo, hi)};
     }
 
-    fn mk_mac_expr(lo: uint, hi: uint, m: mac_) -> @expr {
+    fn mk_mac_expr(+lo: CharPos, +hi: CharPos, m: mac_) -> @expr {
         return @{id: self.get_id(),
               callee_id: self.get_id(),
               node: expr_mac({node: m, span: mk_sp(lo, hi)}),
@@ -1134,7 +1134,7 @@ impl Parser {
         return self.mk_expr(lo, hi, ex);
     }
 
-    fn parse_block_expr(lo: uint, blk_mode: blk_check_mode) -> @expr {
+    fn parse_block_expr(lo: CharPos, blk_mode: blk_check_mode) -> @expr {
         self.expect(token::LBRACE);
         let blk = self.parse_block_tail(lo, blk_mode);
         return self.mk_expr(blk.span.lo, blk.span.hi, expr_block(blk));
@@ -1146,7 +1146,7 @@ impl Parser {
         return self.parse_syntax_ext_naked(lo);
     }
 
-    fn parse_syntax_ext_naked(lo: uint) -> @expr {
+    fn parse_syntax_ext_naked(lo: CharPos) -> @expr {
         match self.token {
           token::IDENT(_, _) => (),
           _ => self.fatal(~"expected a syntax expander name")
@@ -2279,11 +2279,11 @@ impl Parser {
     // I guess that also means "already parsed the 'impure'" if
     // necessary, and this should take a qualifier.
     // some blocks start with "#{"...
-    fn parse_block_tail(lo: uint, s: blk_check_mode) -> blk {
+    fn parse_block_tail(lo: CharPos, s: blk_check_mode) -> blk {
         self.parse_block_tail_(lo, s, ~[])
     }
 
-    fn parse_block_tail_(lo: uint, s: blk_check_mode,
+    fn parse_block_tail_(lo: CharPos, s: blk_check_mode,
                          +first_item_attrs: ~[attribute]) -> blk {
         let mut stmts = ~[];
         let mut expr = None;
@@ -2581,7 +2581,7 @@ impl Parser {
         return {ident: id, tps: ty_params};
     }
 
-    fn mk_item(lo: uint, hi: uint, +ident: ident,
+    fn mk_item(+lo: CharPos, +hi: CharPos, +ident: ident,
                +node: item_, vis: visibility,
                +attrs: ~[attribute]) -> @item {
         return @{ident: ident,
@@ -3037,7 +3037,7 @@ impl Parser {
             items: items};
     }
 
-    fn parse_item_foreign_mod(lo: uint,
+    fn parse_item_foreign_mod(lo: CharPos,
                               visibility: visibility,
                               attrs: ~[attribute],
                               items_allowed: bool)
@@ -3092,7 +3092,7 @@ impl Parser {
         });
     }
 
-    fn parse_type_decl() -> {lo: uint, ident: ident} {
+    fn parse_type_decl() -> {lo: CharPos, ident: ident} {
         let lo = self.last_span.lo;
         let id = self.parse_ident();
         return {lo: lo, ident: id};
author	Brian Anderson <banderson@mozilla.com>	2012-11-12 19:32:48 -0800
committer	Brian Anderson <banderson@mozilla.com>	2012-11-14 15:18:29 -0800
commit	f67bfe97389a256fc95216c29a2b8a066ee16a2c (patch)
tree	a638c4c480ceb60138a39a05c5b04967ac814eb6 /src/libsyntax/parse
parent	9ecf86343a136c71cbb2bb8da9bfd1734fec37f4 (diff)
download	rust-f67bfe97389a256fc95216c29a2b8a066ee16a2c.tar.gz rust-f67bfe97389a256fc95216c29a2b8a066ee16a2c.zip