about summary refs log tree commit diff
path: root/src/libsyntax
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2015-07-21 09:59:18 +0000
committerbors <bors@rust-lang.org>2015-07-21 09:59:18 +0000
commite33b128e01e1a5a2d23abac5b88274eea5cf9059 (patch)
treed0da3b98226376b52150853f61b4faee90446dc4 /src/libsyntax
parent247a0d185446fb145e1e5843c44c97b9a564e135 (diff)
parentf47d20aecdcd7db34d41ad1666fd3eee095cc943 (diff)
downloadrust-e33b128e01e1a5a2d23abac5b88274eea5cf9059.tar.gz
rust-e33b128e01e1a5a2d23abac5b88274eea5cf9059.zip
Auto merge of #26816 - nrc:zero-codemap, r=@jroesch
See commits for details
Diffstat (limited to 'src/libsyntax')
-rw-r--r--src/libsyntax/codemap.rs194
-rw-r--r--src/libsyntax/diagnostic.rs20
-rw-r--r--src/libsyntax/ext/source_util.rs4
-rw-r--r--src/libsyntax/parse/lexer/mod.rs1
-rw-r--r--src/libsyntax/parse/mod.rs11
-rw-r--r--src/libsyntax/parse/parser.rs24
6 files changed, 141 insertions, 113 deletions
diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs
index 5ddcfaef9ea..e6bc3218897 100644
--- a/src/libsyntax/codemap.rs
+++ b/src/libsyntax/codemap.rs
@@ -115,6 +115,10 @@ impl Sub for CharPos {
 /// are *absolute* positions from the beginning of the codemap, not positions
 /// relative to FileMaps. Methods on the CodeMap can be used to relate spans back
 /// to the original source.
+/// You must be careful if the span crosses more than one file - you will not be
+/// able to use many of the functions on spans in codemap and you cannot assume
+/// that the length of the span = hi - lo; there may be space in the BytePos
+/// range between files.
 #[derive(Clone, Copy, Hash)]
 pub struct Span {
     pub lo: BytePos,
@@ -339,7 +343,7 @@ pub struct MultiByteChar {
     pub bytes: usize,
 }
 
-/// A single source in the CodeMap
+/// A single source in the CodeMap.
 pub struct FileMap {
     /// The name of the file that the source came from, source that doesn't
     /// originate from files has names between angle brackets by convention,
@@ -508,6 +512,9 @@ impl FileMap {
                 lines.get(line_number).map(|&line| {
                     let begin: BytePos = line - self.start_pos;
                     let begin = begin.to_usize();
+                    // We can't use `lines.get(line_number+1)` because we might
+                    // be parsing when we call this function and thus the current
+                    // line is the last one we have line info for.
                     let slice = &src[begin..];
                     match slice.find('\n') {
                         Some(e) => &slice[..e],
@@ -598,27 +605,27 @@ impl CodeMap {
         Ok(self.new_filemap(path.to_str().unwrap().to_string(), src))
     }
 
+    fn next_start_pos(&self) -> usize {
+        let files = self.files.borrow();
+        match files.last() {
+            None => 0,
+            // Add one so there is some space between files. This lets us distinguish
+            // positions in the codemap, even in the presence of zero-length files.
+            Some(last) => last.end_pos.to_usize() + 1,
+        }
+    }
+
+    /// Creates a new filemap without setting its line information. If you don't
+    /// intend to set the line information yourself, you should use new_filemap_and_lines.
     pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc<FileMap> {
+        let start_pos = self.next_start_pos();
         let mut files = self.files.borrow_mut();
-        let start_pos = match files.last() {
-            None => 0,
-            Some(last) => last.end_pos.to_usize(),
-        };
 
         // Remove utf-8 BOM if any.
         if src.starts_with("\u{feff}") {
             src.drain(..3);
         }
 
-        // Append '\n' in case it's not already there.
-        // This is a workaround to prevent CodeMap.lookup_filemap_idx from
-        // accidentally overflowing into the next filemap in case the last byte
-        // of span is also the last byte of filemap, which leads to incorrect
-        // results from CodeMap.span_to_*.
-        if !src.is_empty() && !src.ends_with("\n") {
-            src.push('\n');
-        }
-
         let end_pos = start_pos + src.len();
 
         let filemap = Rc::new(FileMap {
@@ -635,6 +642,21 @@ impl CodeMap {
         filemap
     }
 
+    /// Creates a new filemap and sets its line information.
+    pub fn new_filemap_and_lines(&self, filename: &str, src: &str) -> Rc<FileMap> {
+        let fm = self.new_filemap(filename.to_string(), src.to_owned());
+        let mut byte_pos: u32 = 0;
+        for line in src.lines() {
+            // register the start of this line
+            fm.next_line(BytePos(byte_pos));
+
+            // update byte_pos to include this line and the \n at the end
+            byte_pos += line.len() as u32 + 1;
+        }
+        fm
+    }
+
+
     /// Allocates a new FileMap representing a source file from an external
     /// crate. The source code of such an "imported filemap" is not available,
     /// but we still know enough to generate accurate debuginfo location
@@ -645,11 +667,8 @@ impl CodeMap {
                                 mut file_local_lines: Vec<BytePos>,
                                 mut file_local_multibyte_chars: Vec<MultiByteChar>)
                                 -> Rc<FileMap> {
+        let start_pos = self.next_start_pos();
         let mut files = self.files.borrow_mut();
-        let start_pos = match files.last() {
-            None => 0,
-            Some(last) => last.end_pos.to_usize(),
-        };
 
         let end_pos = Pos::from_usize(start_pos + source_len);
         let start_pos = Pos::from_usize(start_pos);
@@ -686,39 +705,61 @@ impl CodeMap {
 
     /// Lookup source information about a BytePos
     pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
-        let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
-        let line = a + 1; // Line numbers start at 1
         let chpos = self.bytepos_to_file_charpos(pos);
-        let linebpos = (*f.lines.borrow())[a];
-        let linechpos = self.bytepos_to_file_charpos(linebpos);
-        debug!("byte pos {:?} is on the line at byte pos {:?}",
-               pos, linebpos);
-        debug!("char pos {:?} is on the line at char pos {:?}",
-               chpos, linechpos);
-        debug!("byte is on line: {}", line);
-        assert!(chpos >= linechpos);
-        Loc {
-            file: f,
-            line: line,
-            col: chpos - linechpos
+        match self.lookup_line(pos) {
+            Ok(FileMapAndLine { fm: f, line: a }) => {
+                let line = a + 1; // Line numbers start at 1
+                let linebpos = (*f.lines.borrow())[a];
+                let linechpos = self.bytepos_to_file_charpos(linebpos);
+                debug!("byte pos {:?} is on the line at byte pos {:?}",
+                       pos, linebpos);
+                debug!("char pos {:?} is on the line at char pos {:?}",
+                       chpos, linechpos);
+                debug!("byte is on line: {}", line);
+                assert!(chpos >= linechpos);
+                Loc {
+                    file: f,
+                    line: line,
+                    col: chpos - linechpos,
+                }
+            }
+            Err(f) => {
+                Loc {
+                    file: f,
+                    line: 0,
+                    col: chpos,
+                }
+            }
         }
     }
 
-    fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
+    // If the relevant filemap is empty, we don't return a line number.
+    fn lookup_line(&self, pos: BytePos) -> Result<FileMapAndLine, Rc<FileMap>> {
         let idx = self.lookup_filemap_idx(pos);
 
         let files = self.files.borrow();
         let f = (*files)[idx].clone();
+
+        let len = f.lines.borrow().len();
+        if len == 0 {
+            return Err(f);
+        }
+
         let mut a = 0;
         {
             let lines = f.lines.borrow();
             let mut b = lines.len();
             while b - a > 1 {
                 let m = (a + b) / 2;
-                if (*lines)[m] > pos { b = m; } else { a = m; }
+                if (*lines)[m] > pos {
+                    b = m;
+                } else {
+                    a = m;
+                }
             }
+            assert!(a <= lines.len());
         }
-        FileMapAndLine {fm: f, line: a}
+        Ok(FileMapAndLine { fm: f, line: a })
     }
 
     pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
@@ -853,7 +894,7 @@ impl CodeMap {
         FileMapAndBytePos {fm: fm, pos: offset}
     }
 
-    /// Converts an absolute BytePos to a CharPos relative to the filemap and above.
+    /// Converts an absolute BytePos to a CharPos relative to the filemap.
     pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
         let idx = self.lookup_filemap_idx(bpos);
         let files = self.files.borrow();
@@ -880,12 +921,15 @@ impl CodeMap {
         CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes)
     }
 
+    // Return the index of the filemap (in self.files) which contains pos.
     fn lookup_filemap_idx(&self, pos: BytePos) -> usize {
         let files = self.files.borrow();
         let files = &*files;
-        let len = files.len();
+        let count = files.len();
+
+        // Binary search for the filemap.
         let mut a = 0;
-        let mut b = len;
+        let mut b = count;
         while b - a > 1 {
             let m = (a + b) / 2;
             if files[m].start_pos > pos {
@@ -894,26 +938,8 @@ impl CodeMap {
                 a = m;
             }
         }
-        // There can be filemaps with length 0. These have the same start_pos as
-        // the previous filemap, but are not the filemaps we want (because they
-        // are length 0, they cannot contain what we are looking for). So,
-        // rewind until we find a useful filemap.
-        loop {
-            let lines = files[a].lines.borrow();
-            let lines = lines;
-            if !lines.is_empty() {
-                break;
-            }
-            if a == 0 {
-                panic!("position {} does not resolve to a source location",
-                      pos.to_usize());
-            }
-            a -= 1;
-        }
-        if a >= len {
-            panic!("position {} does not resolve to a source location",
-                  pos.to_usize())
-        }
+
+        assert!(a < count, "position {} does not resolve to a source location", pos.to_usize());
 
         return a;
     }
@@ -1027,10 +1053,13 @@ mod tests {
         let fm = cm.new_filemap("blork.rs".to_string(),
                                 "first line.\nsecond line".to_string());
         fm.next_line(BytePos(0));
+        // Test we can get lines with partial line info.
         assert_eq!(fm.get_line(0), Some("first line."));
-        // TESTING BROKEN BEHAVIOR:
+        // TESTING BROKEN BEHAVIOR: line break declared before actual line break.
         fm.next_line(BytePos(10));
         assert_eq!(fm.get_line(1), Some("."));
+        fm.next_line(BytePos(12));
+        assert_eq!(fm.get_line(2), Some("second line"));
     }
 
     #[test]
@@ -1056,9 +1085,9 @@ mod tests {
 
         fm1.next_line(BytePos(0));
         fm1.next_line(BytePos(12));
-        fm2.next_line(BytePos(24));
-        fm3.next_line(BytePos(24));
-        fm3.next_line(BytePos(34));
+        fm2.next_line(fm2.start_pos);
+        fm3.next_line(fm3.start_pos);
+        fm3.next_line(fm3.start_pos + BytePos(12));
 
         cm
     }
@@ -1068,11 +1097,15 @@ mod tests {
         // Test lookup_byte_offset
         let cm = init_code_map();
 
-        let fmabp1 = cm.lookup_byte_offset(BytePos(22));
+        let fmabp1 = cm.lookup_byte_offset(BytePos(23));
         assert_eq!(fmabp1.fm.name, "blork.rs");
-        assert_eq!(fmabp1.pos, BytePos(22));
+        assert_eq!(fmabp1.pos, BytePos(23));
+
+        let fmabp1 = cm.lookup_byte_offset(BytePos(24));
+        assert_eq!(fmabp1.fm.name, "empty.rs");
+        assert_eq!(fmabp1.pos, BytePos(0));
 
-        let fmabp2 = cm.lookup_byte_offset(BytePos(24));
+        let fmabp2 = cm.lookup_byte_offset(BytePos(25));
         assert_eq!(fmabp2.fm.name, "blork2.rs");
         assert_eq!(fmabp2.pos, BytePos(0));
     }
@@ -1085,7 +1118,7 @@ mod tests {
         let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
         assert_eq!(cp1, CharPos(22));
 
-        let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
+        let cp2 = cm.bytepos_to_file_charpos(BytePos(25));
         assert_eq!(cp2, CharPos(0));
     }
 
@@ -1099,7 +1132,7 @@ mod tests {
         assert_eq!(loc1.line, 2);
         assert_eq!(loc1.col, CharPos(10));
 
-        let loc2 = cm.lookup_char_pos(BytePos(24));
+        let loc2 = cm.lookup_char_pos(BytePos(25));
         assert_eq!(loc2.file.name, "blork2.rs");
         assert_eq!(loc2.line, 1);
         assert_eq!(loc2.col, CharPos(0));
@@ -1115,18 +1148,18 @@ mod tests {
                                  "first line€€.\n€ second line".to_string());
 
         fm1.next_line(BytePos(0));
-        fm1.next_line(BytePos(22));
-        fm2.next_line(BytePos(40));
-        fm2.next_line(BytePos(58));
+        fm1.next_line(BytePos(28));
+        fm2.next_line(fm2.start_pos);
+        fm2.next_line(fm2.start_pos + BytePos(20));
 
         fm1.record_multibyte_char(BytePos(3), 3);
         fm1.record_multibyte_char(BytePos(9), 3);
         fm1.record_multibyte_char(BytePos(12), 3);
         fm1.record_multibyte_char(BytePos(15), 3);
         fm1.record_multibyte_char(BytePos(18), 3);
-        fm2.record_multibyte_char(BytePos(50), 3);
-        fm2.record_multibyte_char(BytePos(53), 3);
-        fm2.record_multibyte_char(BytePos(58), 3);
+        fm2.record_multibyte_char(fm2.start_pos + BytePos(10), 3);
+        fm2.record_multibyte_char(fm2.start_pos + BytePos(13), 3);
+        fm2.record_multibyte_char(fm2.start_pos + BytePos(18), 3);
 
         cm
     }
@@ -1172,19 +1205,6 @@ mod tests {
         Span { lo: BytePos(left_index), hi: BytePos(right_index + 1), expn_id: NO_EXPANSION }
     }
 
-    fn new_filemap_and_lines(cm: &CodeMap, filename: &str, input: &str) -> Rc<FileMap> {
-        let fm = cm.new_filemap(filename.to_string(), input.to_string());
-        let mut byte_pos: u32 = 0;
-        for line in input.lines() {
-            // register the start of this line
-            fm.next_line(BytePos(byte_pos));
-
-            // update byte_pos to include this line and the \n at the end
-            byte_pos += line.len() as u32 + 1;
-        }
-        fm
-    }
-
     /// Test span_to_snippet and span_to_lines for a span coverting 3
     /// lines in the middle of a file.
     #[test]
@@ -1192,7 +1212,7 @@ mod tests {
         let cm = CodeMap::new();
         let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n";
         let selection = "     \n    ^~\n~~~\n~~~~~     \n   \n";
-        new_filemap_and_lines(&cm, "blork.rs", inputtext);
+        cm.new_filemap_and_lines("blork.rs", inputtext);
         let span = span_from_selection(inputtext, selection);
 
         // check that we are extracting the text we thought we were extracting
diff --git a/src/libsyntax/diagnostic.rs b/src/libsyntax/diagnostic.rs
index e95813e44ba..f1d748595d6 100644
--- a/src/libsyntax/diagnostic.rs
+++ b/src/libsyntax/diagnostic.rs
@@ -841,12 +841,7 @@ mod test {
         tolv
         dreizehn
         ";
-        let file = cm.new_filemap("dummy.txt".to_string(), content.to_string());
-        for (i, b) in content.bytes().enumerate() {
-            if b == b'\n' {
-                file.next_line(BytePos(i as u32));
-            }
-        }
+        let file = cm.new_filemap_and_lines("dummy.txt", content);
         let start = file.lines.borrow()[7];
         let end = file.lines.borrow()[11];
         let sp = mk_sp(start, end);
@@ -858,11 +853,12 @@ mod test {
         println!("done");
         let vec = data.lock().unwrap().clone();
         let vec: &[u8] = &vec;
-        println!("{}", from_utf8(vec).unwrap());
-        assert_eq!(vec, "dummy.txt: 8 \n\
-                         dummy.txt: 9 \n\
-                         dummy.txt:10 \n\
-                         dummy.txt:11 \n\
-                         dummy.txt:12 \n".as_bytes());
+        let str = from_utf8(vec).unwrap();
+        println!("{}", str);
+        assert_eq!(str, "dummy.txt: 8         line8\n\
+                         dummy.txt: 9         line9\n\
+                         dummy.txt:10         line10\n\
+                         dummy.txt:11         e-lä-vän\n\
+                         dummy.txt:12         tolv\n");
     }
 }
diff --git a/src/libsyntax/ext/source_util.rs b/src/libsyntax/ext/source_util.rs
index 5418b1f43e4..22517dc5f1b 100644
--- a/src/libsyntax/ext/source_util.rs
+++ b/src/libsyntax/ext/source_util.rs
@@ -156,7 +156,7 @@ pub fn expand_include_str(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree])
             // dependency information
             let filename = format!("{}", file.display());
             let interned = token::intern_and_get_ident(&src[..]);
-            cx.codemap().new_filemap(filename, src);
+            cx.codemap().new_filemap_and_lines(&filename, &src);
 
             base::MacEager::expr(cx.expr_str(sp, interned))
         }
@@ -187,7 +187,7 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree])
             // Add this input file to the code map to make it available as
             // dependency information, but don't enter it's contents
             let filename = format!("{}", file.display());
-            cx.codemap().new_filemap(filename, "".to_string());
+            cx.codemap().new_filemap_and_lines(&filename, "");
 
             base::MacEager::expr(cx.expr_lit(sp, ast::LitBinary(Rc::new(bytes))))
         }
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index b6a3788dacc..621335ecd97 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -231,6 +231,7 @@ impl<'a> StringReader<'a> {
             None => {
                 if self.is_eof() {
                     self.peek_tok = token::Eof;
+                    self.peek_span = codemap::mk_sp(self.filemap.end_pos, self.filemap.end_pos);
                 } else {
                     let start_bytepos = self.last_pos;
                     self.peek_tok = self.next_token_inner();
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
index d6c28d41447..34a63fc92fe 100644
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -11,7 +11,7 @@
 //! The main parser interface
 
 use ast;
-use codemap::{Span, CodeMap, FileMap};
+use codemap::{self, Span, CodeMap, FileMap};
 use diagnostic::{SpanHandler, Handler, Auto, FatalError};
 use parse::attr::ParserAttr;
 use parse::parser::Parser;
@@ -203,7 +203,14 @@ pub fn new_sub_parser_from_file<'a>(sess: &'a ParseSess,
 pub fn filemap_to_parser<'a>(sess: &'a ParseSess,
                              filemap: Rc<FileMap>,
                              cfg: ast::CrateConfig) -> Parser<'a> {
-    tts_to_parser(sess, filemap_to_tts(sess, filemap), cfg)
+    let end_pos = filemap.end_pos;
+    let mut parser = tts_to_parser(sess, filemap_to_tts(sess, filemap), cfg);
+
+    if parser.token == token::Eof && parser.span == codemap::DUMMY_SP {
+        parser.span = codemap::mk_sp(end_pos, end_pos);
+    }
+
+    parser
 }
 
 // must preserve old name for now, because quote! from the *existing*
diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs
index 28802d323c6..db1b2489f1d 100644
--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
@@ -4824,8 +4824,14 @@ impl<'a> Parser<'a> {
             return Err(self.fatal(&format!("expected item, found `{}`", token_str)));
         }
 
+        let hi = if self.span == codemap::DUMMY_SP {
+            inner_lo
+        } else {
+            self.span.lo
+        };
+
         Ok(ast::Mod {
-            inner: mk_sp(inner_lo, self.span.lo),
+            inner: mk_sp(inner_lo, hi),
             items: items
         })
     }
@@ -4869,8 +4875,7 @@ impl<'a> Parser<'a> {
 
     fn push_mod_path(&mut self, id: Ident, attrs: &[Attribute]) {
         let default_path = self.id_to_interned_str(id);
-        let file_path = match ::attr::first_attr_value_str_by_name(attrs,
-                                                                   "path") {
+        let file_path = match ::attr::first_attr_value_str_by_name(attrs, "path") {
             Some(d) => d,
             None => default_path,
         };
@@ -5003,13 +5008,12 @@ impl<'a> Parser<'a> {
         included_mod_stack.push(path.clone());
         drop(included_mod_stack);
 
-        let mut p0 =
-            new_sub_parser_from_file(self.sess,
-                                     self.cfg.clone(),
-                                     &path,
-                                     owns_directory,
-                                     Some(name),
-                                     id_sp);
+        let mut p0 = new_sub_parser_from_file(self.sess,
+                                              self.cfg.clone(),
+                                              &path,
+                                              owns_directory,
+                                              Some(name),
+                                              id_sp);
         let mod_inner_lo = p0.span.lo;
         let mod_attrs = p0.parse_inner_attributes();
         let m0 = try!(p0.parse_mod_items(&token::Eof, mod_inner_lo));