about summary refs log tree commit diff
path: root/src/libsyntax
diff options
context:
space:
mode:
authorNick Cameron <ncameron@mozilla.com>2014-02-28 12:53:36 +1300
committerAlex Crichton <alex@alexcrichton.com>2014-02-27 21:04:05 -0800
commita8d57a26dfe1c8e44e9af1923c900f147d13889b (patch)
tree6b251a76724c67aa0f3b7770f2138bea34bdf033 /src/libsyntax
parent53a3f281158bf584361ad63400ba0b4a88472b23 (diff)
downloadrust-a8d57a26dfe1c8e44e9af1923c900f147d13889b.tar.gz
rust-a8d57a26dfe1c8e44e9af1923c900f147d13889b.zip
Fix bytepos_to_file_charpos.
Make bytepos_to_charpos relative to the start of the filemap rather than its previous behaviour which was to be realtive to the start of the codemap, but ignoring multi-byte chars in earlier filemaps. Rename to bytepos_to_file_charpos. Add tests for multi-byte chars.
Diffstat (limited to 'src/libsyntax')
-rw-r--r--src/libsyntax/codemap.rs62
1 files changed, 52 insertions, 10 deletions
diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs
index 79ccc8df0f3..7b70e14e802 100644
--- a/src/libsyntax/codemap.rs
+++ b/src/libsyntax/codemap.rs
@@ -420,10 +420,10 @@ impl CodeMap {
     fn lookup_pos(&self, pos: BytePos) -> Loc {
         let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
         let line = a + 1u; // Line numbers start at 1
-        let chpos = self.bytepos_to_charpos(pos);
+        let chpos = self.bytepos_to_file_charpos(pos);
         let lines = f.lines.borrow();
         let linebpos = lines.get()[a];
-        let linechpos = self.bytepos_to_charpos(linebpos);
+        let linechpos = self.bytepos_to_file_charpos(linebpos);
         debug!("codemap: byte pos {:?} is on the line at byte pos {:?}",
                pos, linebpos);
         debug!("codemap: char pos {:?} is on the line at char pos {:?}",
@@ -446,8 +446,8 @@ impl CodeMap {
         return FileMapAndBytePos {fm: fm, pos: offset};
     }
 
-    // Converts an absolute BytePos to a CharPos relative to the codemap.
-    fn bytepos_to_charpos(&self, bpos: BytePos) -> CharPos {
+    // Converts an absolute BytePos to a CharPos relative to the filemap.
+    fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
         debug!("codemap: converting {:?} to char pos", bpos);
         let idx = self.lookup_filemap_idx(bpos);
         let files = self.files.borrow();
@@ -471,7 +471,8 @@ impl CodeMap {
             }
         }
 
-        CharPos(bpos.to_uint() - total_extra_bytes)
+        assert!(map.start_pos.to_uint() + total_extra_bytes <= bpos.to_uint());
+        CharPos(bpos.to_uint() - map.start_pos.to_uint() - total_extra_bytes)
     }
 }
 
@@ -501,7 +502,7 @@ mod test {
         fm.next_line(BytePos(2));
     }
 
-    fn init_code_map() ->CodeMap {
+    fn init_code_map() -> CodeMap {
         let cm = CodeMap::new();
         let fm1 = cm.new_filemap(~"blork.rs",~"first line.\nsecond line");
         let fm2 = cm.new_filemap(~"empty.rs",~"");
@@ -532,14 +533,14 @@ mod test {
 
     #[test]
     fn t4() {
-        // Test bytepos_to_charpos
+        // Test bytepos_to_file_charpos
         let cm = init_code_map();
 
-        let cp1 = cm.bytepos_to_charpos(BytePos(22));
+        let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
         assert_eq!(cp1, CharPos(22));
 
-        let cp2 = cm.bytepos_to_charpos(BytePos(23));
-        assert_eq!(cp2, CharPos(23));
+        let cp2 = cm.bytepos_to_file_charpos(BytePos(23));
+        assert_eq!(cp2, CharPos(0));
     }
 
     #[test]
@@ -557,4 +558,45 @@ mod test {
         assert_eq!(loc2.line, 1);
         assert_eq!(loc2.col, CharPos(0));
     }
+
+    fn init_code_map_mbc() -> CodeMap {
+        let cm = CodeMap::new();
+        // € is a three byte utf8 char.
+        let fm1 = cm.new_filemap(~"blork.rs",~"fir€st €€€€ line.\nsecond line");
+        let fm2 = cm.new_filemap(~"blork2.rs",~"first line€€.\n€ second line");
+
+        fm1.next_line(BytePos(0));
+        fm1.next_line(BytePos(22));
+        fm2.next_line(BytePos(39));
+        fm2.next_line(BytePos(57));
+
+        fm1.record_multibyte_char(BytePos(3), 3);
+        fm1.record_multibyte_char(BytePos(9), 3);
+        fm1.record_multibyte_char(BytePos(12), 3);
+        fm1.record_multibyte_char(BytePos(15), 3);
+        fm1.record_multibyte_char(BytePos(18), 3);
+        fm2.record_multibyte_char(BytePos(49), 3);
+        fm2.record_multibyte_char(BytePos(52), 3);
+        fm2.record_multibyte_char(BytePos(57), 3);
+
+        cm
+    }
+
+    #[test]
+    fn t6() {
+        // Test bytepos_to_file_charpos in the presence of multi-byte chars
+        let cm = init_code_map_mbc();
+
+        let cp1 = cm.bytepos_to_file_charpos(BytePos(3));
+        assert_eq!(cp1, CharPos(3));
+
+        let cp2 = cm.bytepos_to_file_charpos(BytePos(6));
+        assert_eq!(cp2, CharPos(4));
+
+        let cp3 = cm.bytepos_to_file_charpos(BytePos(55));
+        assert_eq!(cp3, CharPos(12));
+
+        let cp4 = cm.bytepos_to_file_charpos(BytePos(60));
+        assert_eq!(cp4, CharPos(15));
+    }
 }