diff options
| author | bors <bors@rust-lang.org> | 2015-03-04 14:47:51 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2015-03-04 14:47:51 +0000 |
| commit | 3b3bb0e682c2d252e9f62dd9df5cff9552af91ad (patch) | |
| tree | 32f1cf1ffe136e20b558935271eab570a36c25fc /src/libsyntax | |
| parent | bdf6e4fcf5ed8b8fe3c281040414e4f6a6afb8d6 (diff) | |
| parent | 2f8865556bd6cae123b3db4ceaa0c7977dacea8d (diff) | |
| download | rust-3b3bb0e682c2d252e9f62dd9df5cff9552af91ad.tar.gz rust-3b3bb0e682c2d252e9f62dd9df5cff9552af91ad.zip | |
Auto merge of #22235 - michaelwoerister:cross-crate-spans, r=michaelwoerister
This allows to create proper debuginfo line information for items inlined from other crates (e.g. instantiations of generics). Only the codemap's 'metadata' is stored in a crate's metadata. That is, just filename, positions of line-beginnings, etc. but not the actual source code itself.
Crate metadata size is increased by this change because spans in the encoded ASTs take up space now:
```
BEFORE AFTER
libcore 36 MiB 39.6 MiB +10%
libsyntax 51.1 MiB 60.5 MiB +18.4%
libcollections 11.2 MiB 12.8 MiB +14.3%
```
This only affects binaries containing metadata (rlibs and dylibs), executables should not be affected in size.
Fixes #19228 and probably #22226.
Diffstat (limited to 'src/libsyntax')
| -rw-r--r-- | src/libsyntax/codemap.rs | 317 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 33 | ||||
| -rw-r--r-- | src/libsyntax/parse/mod.rs | 154 |
3 files changed, 343 insertions, 161 deletions
diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index 099f6462942..162da2ac54c 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -29,6 +29,11 @@ use std::rc::Rc; use libc::c_uint; use serialize::{Encodable, Decodable, Encoder, Decoder}; + +// _____________________________________________________________________________ +// Pos, BytePos, CharPos +// + pub trait Pos { fn from_usize(n: usize) -> Self; fn to_usize(&self) -> usize; @@ -69,6 +74,18 @@ impl Sub for BytePos { } } +impl Encodable for BytePos { + fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_u32(self.0) + } +} + +impl Decodable for BytePos { + fn decode<D: Decoder>(d: &mut D) -> Result<BytePos, D::Error> { + Ok(BytePos(try!{ d.read_u32() })) + } +} + impl Pos for CharPos { fn from_usize(n: usize) -> CharPos { CharPos(n) } fn to_usize(&self) -> usize { let CharPos(n) = *self; n } @@ -90,6 +107,10 @@ impl Sub for CharPos { } } +// _____________________________________________________________________________ +// Span, Spanned +// + /// Spans represent a region of code, used for error reporting. Positions in spans /// are *absolute* positions from the beginning of the codemap, not positions /// relative to FileMaps. Methods on the CodeMap can be used to relate spans back @@ -126,15 +147,20 @@ impl PartialEq for Span { impl Eq for Span {} impl Encodable for Span { - /* Note #1972 -- spans are encoded but not decoded */ fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { - s.emit_nil() + // Encode spans as a single u64 in order to cut down on tagging overhead + // added by the RBML metadata encoding. The should be solved differently + // altogether some time (FIXME #21482) + s.emit_u64( (self.lo.0 as u64) | ((self.hi.0 as u64) << 32) ) } } impl Decodable for Span { - fn decode<D: Decoder>(_d: &mut D) -> Result<Span, D::Error> { - Ok(DUMMY_SP) + fn decode<D: Decoder>(d: &mut D) -> Result<Span, D::Error> { + let lo_hi: u64 = try! { d.read_u64() }; + let lo = BytePos(lo_hi as u32); + let hi = BytePos((lo_hi >> 32) as u32); + Ok(mk_sp(lo, hi)) } } @@ -168,6 +194,10 @@ pub fn original_sp(cm: &CodeMap, sp: Span, enclosing_sp: Span) -> Span { } } +// _____________________________________________________________________________ +// Loc, LocWithOpt, FileMapAndLine, FileMapAndBytePos +// + /// A source code location used for error reporting pub struct Loc { /// Information about the original source @@ -192,6 +222,11 @@ pub struct LocWithOpt { pub struct FileMapAndLine { pub fm: Rc<FileMap>, pub line: usize } pub struct FileMapAndBytePos { pub fm: Rc<FileMap>, pub pos: BytePos } + +// _____________________________________________________________________________ +// MacroFormat, NameAndSpan, ExpnInfo, ExpnId +// + /// The syntax with which a macro was invoked. #[derive(Clone, Copy, Hash, Debug)] pub enum MacroFormat { @@ -254,6 +289,10 @@ impl ExpnId { } } +// _____________________________________________________________________________ +// FileMap, MultiByteChar, FileName, FileLines +// + pub type FileName = String; pub struct FileLines { @@ -262,7 +301,7 @@ pub struct FileLines { } /// Identifies an offset of a multi-byte character in a FileMap -#[derive(Copy)] +#[derive(Copy, RustcEncodable, RustcDecodable, Eq, PartialEq)] pub struct MultiByteChar { /// The absolute offset of the character in the CodeMap pub pos: BytePos, @@ -277,13 +316,134 @@ pub struct FileMap { /// e.g. `<anon>` pub name: FileName, /// The complete source code - pub src: String, + pub src: Option<Rc<String>>, /// The start position of this source in the CodeMap pub start_pos: BytePos, + /// The end position of this source in the CodeMap + pub end_pos: BytePos, /// Locations of lines beginnings in the source code - pub lines: RefCell<Vec<BytePos> >, + pub lines: RefCell<Vec<BytePos>>, /// Locations of multi-byte characters in the source code - pub multibyte_chars: RefCell<Vec<MultiByteChar> >, + pub multibyte_chars: RefCell<Vec<MultiByteChar>>, +} + +impl Encodable for FileMap { + fn encode<S: Encoder>(&self, s: &mut S) -> Result<(), S::Error> { + s.emit_struct("FileMap", 5, |s| { + try! { s.emit_struct_field("name", 0, |s| self.name.encode(s)) }; + try! { s.emit_struct_field("start_pos", 1, |s| self.start_pos.encode(s)) }; + try! { s.emit_struct_field("end_pos", 2, |s| self.end_pos.encode(s)) }; + try! { s.emit_struct_field("lines", 3, |s| { + let lines = self.lines.borrow(); + // store the length + try! { s.emit_u32(lines.len() as u32) }; + + if lines.len() > 0 { + // In order to preserve some space, we exploit the fact that + // the lines list is sorted and individual lines are + // probably not that long. Because of that we can store lines + // as a difference list, using as little space as possible + // for the differences. + let max_line_length = if lines.len() == 1 { + 0 + } else { + lines.as_slice() + .windows(2) + .map(|w| w[1] - w[0]) + .map(|bp| bp.to_usize()) + .max() + .unwrap() + }; + + let bytes_per_diff: u8 = match max_line_length { + 0 ... 0xFF => 1, + 0x100 ... 0xFFFF => 2, + _ => 4 + }; + + // Encode the number of bytes used per diff. + try! { bytes_per_diff.encode(s) }; + + // Encode the first element. + try! { lines[0].encode(s) }; + + let diff_iter = (&lines[..]).windows(2) + .map(|w| (w[1] - w[0])); + + match bytes_per_diff { + 1 => for diff in diff_iter { try! { (diff.0 as u8).encode(s) } }, + 2 => for diff in diff_iter { try! { (diff.0 as u16).encode(s) } }, + 4 => for diff in diff_iter { try! { (diff.0 as u32).encode(s) } }, + _ => unreachable!() + } + } + + Ok(()) + }) + }; + s.emit_struct_field("multibyte_chars", 4, |s| { + (*self.multibyte_chars.borrow()).encode(s) + }) + }) + } +} + +impl Decodable for FileMap { + fn decode<D: Decoder>(d: &mut D) -> Result<FileMap, D::Error> { + + d.read_struct("FileMap", 5, |d| { + let name: String = try! { + d.read_struct_field("name", 0, |d| Decodable::decode(d)) + }; + let start_pos: BytePos = try! { + d.read_struct_field("start_pos", 1, |d| Decodable::decode(d)) + }; + let end_pos: BytePos = try! { + d.read_struct_field("end_pos", 2, |d| Decodable::decode(d)) + }; + let lines: Vec<BytePos> = try! { + d.read_struct_field("lines", 3, |d| { + let num_lines: u32 = try! { Decodable::decode(d) }; + let mut lines = Vec::with_capacity(num_lines as usize); + + if num_lines > 0 { + // Read the number of bytes used per diff. + let bytes_per_diff: u8 = try! { Decodable::decode(d) }; + + // Read the first element. + let mut line_start: BytePos = try! { Decodable::decode(d) }; + lines.push(line_start); + + for _ in 1..num_lines { + let diff = match bytes_per_diff { + 1 => try! { d.read_u8() } as u32, + 2 => try! { d.read_u16() } as u32, + 4 => try! { d.read_u32() }, + _ => unreachable!() + }; + + line_start = line_start + BytePos(diff); + + lines.push(line_start); + } + } + + Ok(lines) + }) + }; + let multibyte_chars: Vec<MultiByteChar> = try! { + d.read_struct_field("multibyte_chars", 4, |d| Decodable::decode(d)) + }; + Ok(FileMap { + name: name, + start_pos: start_pos, + end_pos: end_pos, + src: None, + lines: RefCell::new(lines), + multibyte_chars: RefCell::new(multibyte_chars) + }) + }) + } } impl FileMap { @@ -307,16 +467,21 @@ impl FileMap { /// get a line from the list of pre-computed line-beginnings /// pub fn get_line(&self, line_number: usize) -> Option<String> { - let lines = self.lines.borrow(); - lines.get(line_number).map(|&line| { - let begin: BytePos = line - self.start_pos; - let begin = begin.to_usize(); - let slice = &self.src[begin..]; - match slice.find('\n') { - Some(e) => &slice[..e], - None => slice - }.to_string() - }) + match self.src { + Some(ref src) => { + let lines = self.lines.borrow(); + lines.get(line_number).map(|&line| { + let begin: BytePos = line - self.start_pos; + let begin = begin.to_usize(); + let slice = &src[begin..]; + match slice.find('\n') { + Some(e) => &slice[..e], + None => slice + }.to_string() + }) + } + None => None + } } pub fn record_multibyte_char(&self, pos: BytePos, bytes: usize) { @@ -332,8 +497,17 @@ impl FileMap { !(self.name.starts_with("<") && self.name.ends_with(">")) } + + pub fn is_imported(&self) -> bool { + self.src.is_none() + } } + +// _____________________________________________________________________________ +// CodeMap +// + pub struct CodeMap { pub files: RefCell<Vec<Rc<FileMap>>>, expansions: RefCell<Vec<ExpnInfo>> @@ -351,7 +525,7 @@ impl CodeMap { let mut files = self.files.borrow_mut(); let start_pos = match files.last() { None => 0, - Some(last) => last.start_pos.to_usize() + last.src.len(), + Some(last) => last.end_pos.to_usize(), }; // Remove utf-8 BOM if any. @@ -372,10 +546,13 @@ impl CodeMap { src.push('\n'); } + let end_pos = start_pos + src.len(); + let filemap = Rc::new(FileMap { name: filename, - src: src.to_string(), + src: Some(Rc::new(src)), start_pos: Pos::from_usize(start_pos), + end_pos: Pos::from_usize(end_pos), lines: RefCell::new(Vec::new()), multibyte_chars: RefCell::new(Vec::new()), }); @@ -385,6 +562,45 @@ impl CodeMap { filemap } + /// Allocates a new FileMap representing a source file from an external + /// crate. The source code of such an "imported filemap" is not available, + /// but we still know enough to generate accurate debuginfo location + /// information for things inlined from other crates. + pub fn new_imported_filemap(&self, + filename: FileName, + source_len: usize, + file_local_lines: Vec<BytePos>, + file_local_multibyte_chars: Vec<MultiByteChar>) + -> Rc<FileMap> { + let mut files = self.files.borrow_mut(); + let start_pos = match files.last() { + None => 0, + Some(last) => last.end_pos.to_usize(), + }; + + let end_pos = Pos::from_usize(start_pos + source_len); + let start_pos = Pos::from_usize(start_pos); + + let lines = file_local_lines.map_in_place(|pos| pos + start_pos); + let multibyte_chars = file_local_multibyte_chars.map_in_place(|mbc| MultiByteChar { + pos: mbc.pos + start_pos, + bytes: mbc.bytes + }); + + let filemap = Rc::new(FileMap { + name: filename, + src: None, + start_pos: start_pos, + end_pos: end_pos, + lines: RefCell::new(lines), + multibyte_chars: RefCell::new(multibyte_chars), + }); + + files.push(filemap.clone()); + + filemap + } + pub fn mk_substr_filename(&self, sp: Span) -> String { let pos = self.lookup_char_pos(sp.lo); (format!("<{}:{}:{}>", @@ -442,30 +658,42 @@ impl CodeMap { return Err(SpanSnippetError::IllFormedSpan(sp)); } - let begin = self.lookup_byte_offset(sp.lo); - let end = self.lookup_byte_offset(sp.hi); + let local_begin = self.lookup_byte_offset(sp.lo); + let local_end = self.lookup_byte_offset(sp.hi); - if begin.fm.start_pos != end.fm.start_pos { + if local_begin.fm.start_pos != local_end.fm.start_pos { return Err(SpanSnippetError::DistinctSources(DistinctSources { - begin: (begin.fm.name.clone(), - begin.fm.start_pos), - end: (end.fm.name.clone(), - end.fm.start_pos) + begin: (local_begin.fm.name.clone(), + local_begin.fm.start_pos), + end: (local_end.fm.name.clone(), + local_end.fm.start_pos) })); } else { - let start = begin.pos.to_usize(); - let limit = end.pos.to_usize(); - if start > limit || limit > begin.fm.src.len() { - return Err(SpanSnippetError::MalformedForCodemap( - MalformedCodemapPositions { - name: begin.fm.name.clone(), - source_len: begin.fm.src.len(), - begin_pos: begin.pos, - end_pos: end.pos, - })); - } + match local_begin.fm.src { + Some(ref src) => { + let start_index = local_begin.pos.to_usize(); + let end_index = local_end.pos.to_usize(); + let source_len = (local_begin.fm.end_pos - + local_begin.fm.start_pos).to_usize(); + + if start_index > end_index || end_index > source_len { + return Err(SpanSnippetError::MalformedForCodemap( + MalformedCodemapPositions { + name: local_begin.fm.name.clone(), + source_len: source_len, + begin_pos: local_begin.pos, + end_pos: local_end.pos, + })); + } - return Ok((&begin.fm.src[start..limit]).to_string()) + return Ok((&src[start_index..end_index]).to_string()) + } + None => { + return Err(SpanSnippetError::SourceNotAvailable { + filename: local_begin.fm.name.clone() + }); + } + } } } @@ -478,6 +706,7 @@ impl CodeMap { panic!("asking for {} which we don't know about", filename); } + /// For a global BytePos compute the local offset within the containing FileMap pub fn lookup_byte_offset(&self, bpos: BytePos) -> FileMapAndBytePos { let idx = self.lookup_filemap_idx(bpos); let fm = (*self.files.borrow())[idx].clone(); @@ -639,11 +868,16 @@ impl CodeMap { } } +// _____________________________________________________________________________ +// SpanSnippetError, DistinctSources, MalformedCodemapPositions +// + #[derive(Clone, PartialEq, Eq, Debug)] pub enum SpanSnippetError { IllFormedSpan(Span), DistinctSources(DistinctSources), MalformedForCodemap(MalformedCodemapPositions), + SourceNotAvailable { filename: String } } #[derive(Clone, PartialEq, Eq, Debug)] @@ -660,6 +894,11 @@ pub struct MalformedCodemapPositions { end_pos: BytePos } + +// _____________________________________________________________________________ +// Tests +// + #[cfg(test)] mod test { use super::*; diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index bbe1ddfd4cf..8d3e93d35dd 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -76,6 +76,10 @@ pub struct StringReader<'a> { // are revised to go directly to token-trees. /// Is \x00<name>,<ctxt>\x00 is interpreted as encoded ast::Ident? read_embedded_ident: bool, + + // cache a direct reference to the source text, so that we don't have to + // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time. + source_text: Rc<String> } impl<'a> Reader for StringReader<'a> { @@ -141,7 +145,14 @@ pub fn make_reader_with_embedded_idents<'b>(span_diagnostic: &'b SpanHandler, impl<'a> StringReader<'a> { /// For comments.rs, which hackily pokes into pos and curr pub fn new_raw<'b>(span_diagnostic: &'b SpanHandler, - filemap: Rc<codemap::FileMap>) -> StringReader<'b> { + filemap: Rc<codemap::FileMap>) -> StringReader<'b> { + if filemap.src.is_none() { + span_diagnostic.handler.bug(&format!("Cannot lex filemap without source: {}", + filemap.name)[..]); + } + + let source_text = (*filemap.src.as_ref().unwrap()).clone(); + let mut sr = StringReader { span_diagnostic: span_diagnostic, pos: filemap.start_pos, @@ -153,6 +164,7 @@ impl<'a> StringReader<'a> { peek_tok: token::Eof, peek_span: codemap::DUMMY_SP, read_embedded_ident: false, + source_text: source_text }; sr.bump(); sr @@ -213,7 +225,7 @@ impl<'a> StringReader<'a> { m.push_str(": "); let from = self.byte_offset(from_pos).to_usize(); let to = self.byte_offset(to_pos).to_usize(); - m.push_str(&self.filemap.src[from..to]); + m.push_str(&self.source_text[from..to]); self.fatal_span_(from_pos, to_pos, &m[..]); } @@ -270,9 +282,8 @@ impl<'a> StringReader<'a> { fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T where F: FnOnce(&str) -> T, { - f(&self.filemap.src[ - self.byte_offset(start).to_usize().. - self.byte_offset(end).to_usize()]) + f(&self.source_text[self.byte_offset(start).to_usize().. + self.byte_offset(end).to_usize()]) } /// Converts CRLF to LF in the given string, raising an error on bare CR. @@ -321,12 +332,10 @@ impl<'a> StringReader<'a> { pub fn bump(&mut self) { self.last_pos = self.pos; let current_byte_offset = self.byte_offset(self.pos).to_usize(); - if current_byte_offset < self.filemap.src.len() { + if current_byte_offset < self.source_text.len() { assert!(self.curr.is_some()); let last_char = self.curr.unwrap(); - let next = self.filemap - .src - .char_range_at(current_byte_offset); + let next = self.source_text.char_range_at(current_byte_offset); let byte_offset_diff = next.next - current_byte_offset; self.pos = self.pos + Pos::from_usize(byte_offset_diff); self.curr = Some(next.ch); @@ -346,8 +355,8 @@ impl<'a> StringReader<'a> { pub fn nextch(&self) -> Option<char> { let offset = self.byte_offset(self.pos).to_usize(); - if offset < self.filemap.src.len() { - Some(self.filemap.src.char_at(offset)) + if offset < self.source_text.len() { + Some(self.source_text.char_at(offset)) } else { None } @@ -359,7 +368,7 @@ impl<'a> StringReader<'a> { pub fn nextnextch(&self) -> Option<char> { let offset = self.byte_offset(self.pos).to_usize(); - let s = &*self.filemap.src; + let s = &self.source_text[..]; if offset >= s.len() { return None } let str::CharRange { next, .. } = s.char_range_at(offset); if next < s.len() { diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 4d099529cb4..66589d5e3d1 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -751,6 +751,7 @@ pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> #[cfg(test)] mod test { use super::*; + use std::rc::Rc; use serialize::json; use codemap::{Span, BytePos, Pos, Spanned, NO_EXPANSION}; use owned_slice::OwnedSlice; @@ -855,117 +856,50 @@ mod test { } #[test] - fn string_to_tts_1 () { + fn string_to_tts_1() { let tts = string_to_tts("fn a (b : i32) { b; }".to_string()); - assert_eq!(json::encode(&tts).unwrap(), - "[\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - {\ - \"variant\":\"Ident\",\ - \"fields\":[\ - \"fn\",\ - \"Plain\"\ - ]\ - }\ - ]\ - },\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - {\ - \"variant\":\"Ident\",\ - \"fields\":[\ - \"a\",\ - \"Plain\"\ - ]\ - }\ - ]\ - },\ - {\ - \"variant\":\"TtDelimited\",\ - \"fields\":[\ - null,\ - {\ - \"delim\":\"Paren\",\ - \"open_span\":null,\ - \"tts\":[\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - {\ - \"variant\":\"Ident\",\ - \"fields\":[\ - \"b\",\ - \"Plain\"\ - ]\ - }\ - ]\ - },\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - \"Colon\"\ - ]\ - },\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - {\ - \"variant\":\"Ident\",\ - \"fields\":[\ - \"i32\",\ - \"Plain\"\ - ]\ - }\ - ]\ - }\ - ],\ - \"close_span\":null\ - }\ - ]\ - },\ - {\ - \"variant\":\"TtDelimited\",\ - \"fields\":[\ - null,\ - {\ - \"delim\":\"Brace\",\ - \"open_span\":null,\ - \"tts\":[\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - {\ - \"variant\":\"Ident\",\ - \"fields\":[\ - \"b\",\ - \"Plain\"\ - ]\ - }\ - ]\ - },\ - {\ - \"variant\":\"TtToken\",\ - \"fields\":[\ - null,\ - \"Semi\"\ - ]\ - }\ - ],\ - \"close_span\":null\ - }\ - ]\ - }\ -]" - ); + + let expected = vec![ + ast::TtToken(sp(0, 2), + token::Ident(str_to_ident("fn"), + token::IdentStyle::Plain)), + ast::TtToken(sp(3, 4), + token::Ident(str_to_ident("a"), + token::IdentStyle::Plain)), + ast::TtDelimited( + sp(5, 14), + Rc::new(ast::Delimited { + delim: token::DelimToken::Paren, + open_span: sp(5, 6), + tts: vec![ + ast::TtToken(sp(6, 7), + token::Ident(str_to_ident("b"), + token::IdentStyle::Plain)), + ast::TtToken(sp(8, 9), + token::Colon), + ast::TtToken(sp(10, 13), + token::Ident(str_to_ident("i32"), + token::IdentStyle::Plain)), + ], + close_span: sp(13, 14), + })), + ast::TtDelimited( + sp(15, 21), + Rc::new(ast::Delimited { + delim: token::DelimToken::Brace, + open_span: sp(15, 16), + tts: vec![ + ast::TtToken(sp(17, 18), + token::Ident(str_to_ident("b"), + token::IdentStyle::Plain)), + ast::TtToken(sp(18, 19), + token::Semi) + ], + close_span: sp(20, 21), + })) + ]; + + assert_eq!(tts, expected); } #[test] fn ret_expr() { |
