diff options
| author | bors <bors@rust-lang.org> | 2019-07-06 02:58:36 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2019-07-06 02:58:36 +0000 |
| commit | b820c761744db080ff7a4ba3ac88d259065cb836 (patch) | |
| tree | d3d1512abe1b1b3b5bf5c8f73eff3e54bb0da547 /src/libsyntax | |
| parent | 481068a707679257e2a738b40987246e0420e787 (diff) | |
| parent | 46edb516dfc207450d84b89c4914c83e4d82006a (diff) | |
| download | rust-b820c761744db080ff7a4ba3ac88d259065cb836.tar.gz rust-b820c761744db080ff7a4ba3ac88d259065cb836.zip | |
Auto merge of #62428 - Centril:rollup-2udow5e, r=Centril
Rollup of 7 pull requests Successful merges: - #62151 (Update linked OpenSSL version) - #62245 (Miri engine: support extra function (pointer) values) - #62257 (forward read_c_str method from Memory to Alloc) - #62264 (Fix perf regression from Miri Machine trait changes) - #62296 (request at least ptr-size alignment from posix_memalign) - #62329 (Remove support for 1-token lookahead from the lexer) - #62377 (Add test for ICE #62375) Failed merges: r? @ghost
Diffstat (limited to 'src/libsyntax')
| -rw-r--r-- | src/libsyntax/parse/lexer/comments.rs | 4 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 218 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/tokentrees.rs | 27 | ||||
| -rw-r--r-- | src/libsyntax/parse/mod.rs | 2 |
4 files changed, 94 insertions, 157 deletions
diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 97d3fc002e9..988f1aa38d9 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -268,7 +268,7 @@ fn read_block_comment(rdr: &mut StringReader<'_>, while level > 0 { debug!("=== block comment level {}", level); if rdr.is_eof() { - rdr.fatal("unterminated block comment").raise(); + rdr.fatal_span_(rdr.pos, rdr.pos, "unterminated block comment").raise(); } if rdr.ch_is('\n') { trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); @@ -346,7 +346,7 @@ pub fn gather_comments(sess: &ParseSess, path: FileName, srdr: &mut dyn Read) -> srdr.read_to_string(&mut src).unwrap(); let cm = SourceMap::new(sess.source_map().path_mapping().clone()); let source_file = cm.new_source_file(path, src); - let mut rdr = lexer::StringReader::new_raw(sess, source_file, None); + let mut rdr = lexer::StringReader::new(sess, source_file, None); let mut comments: Vec<Comment> = Vec::new(); let mut code_to_the_left = false; // Only code diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 3717bb435f6..1abbf0ff1ee 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -38,9 +38,6 @@ pub struct StringReader<'a> { crate source_file: Lrc<syntax_pos::SourceFile>, /// Stop reading src at this index. crate end_src_index: usize, - // cached: - peek_token: Token, - peek_span_src_raw: Span, fatal_errs: Vec<DiagnosticBuilder<'a>>, // cache a direct reference to the source text, so that we don't have to // retrieve it via `self.source_file.src.as_ref().unwrap()` all the time. @@ -49,15 +46,59 @@ pub struct StringReader<'a> { } impl<'a> StringReader<'a> { - fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { - self.mk_sp_and_raw(lo, hi).0 + pub fn new(sess: &'a ParseSess, + source_file: Lrc<syntax_pos::SourceFile>, + override_span: Option<Span>) -> Self { + let mut sr = StringReader::new_internal(sess, source_file, override_span); + sr.bump(); + sr + } + + pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { + let begin = sess.source_map().lookup_byte_offset(span.lo()); + let end = sess.source_map().lookup_byte_offset(span.hi()); + + // Make the range zero-length if the span is invalid. + if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos { + span = span.shrink_to_lo(); + } + + let mut sr = StringReader::new_internal(sess, begin.sf, None); + + // Seek the lexer to the right byte range. + sr.next_pos = span.lo(); + sr.end_src_index = sr.src_index(span.hi()); + + sr.bump(); + + sr } - fn mk_sp_and_raw(&self, lo: BytePos, hi: BytePos) -> (Span, Span) { - let raw = Span::new(lo, hi, NO_EXPANSION); - let real = self.override_span.unwrap_or(raw); + fn new_internal(sess: &'a ParseSess, source_file: Lrc<syntax_pos::SourceFile>, + override_span: Option<Span>) -> Self + { + if source_file.src.is_none() { + sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}", + source_file.name)); + } + + let src = (*source_file.src.as_ref().unwrap()).clone(); + + StringReader { + sess, + next_pos: source_file.start_pos, + pos: source_file.start_pos, + ch: Some('\n'), + source_file, + end_src_index: src.len(), + src, + fatal_errs: Vec::new(), + override_span, + } + } - (real, raw) + fn mk_sp(&self, lo: BytePos, hi: BytePos) -> Span { + self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION)) } fn unwrap_or_abort(&mut self, res: Result<Token, ()>) -> Token { @@ -70,35 +111,32 @@ impl<'a> StringReader<'a> { } } - fn next_token(&mut self) -> Token where Self: Sized { - let res = self.try_next_token(); - self.unwrap_or_abort(res) - } - - /// Returns the next token. EFFECT: advances the string_reader. + /// Returns the next token, including trivia like whitespace or comments. + /// + /// `Err(())` means that some errors were encountered, which can be + /// retrieved using `buffer_fatal_errors`. pub fn try_next_token(&mut self) -> Result<Token, ()> { assert!(self.fatal_errs.is_empty()); - let ret_val = self.peek_token.take(); - self.advance_token()?; - Ok(ret_val) - } - - fn try_real_token(&mut self) -> Result<Token, ()> { - let mut t = self.try_next_token()?; - loop { - match t.kind { - token::Whitespace | token::Comment | token::Shebang(_) => { - t = self.try_next_token()?; - } - _ => break, + match self.scan_whitespace_or_comment() { + Some(comment) => Ok(comment), + None => { + let (kind, start_pos, end_pos) = if self.is_eof() { + (token::Eof, self.source_file.end_pos, self.source_file.end_pos) + } else { + let start_pos = self.pos; + (self.next_token_inner()?, start_pos, self.pos) + }; + let span = self.mk_sp(start_pos, end_pos); + Ok(Token::new(kind, span)) } } - - Ok(t) } - pub fn real_token(&mut self) -> Token { - let res = self.try_real_token(); + /// Returns the next token, including trivia like whitespace or comments. + /// + /// Aborts in case of an error. + pub fn next_token(&mut self) -> Token { + let res = self.try_next_token(); self.unwrap_or_abort(res) } @@ -120,10 +158,6 @@ impl<'a> StringReader<'a> { FatalError.raise(); } - fn fatal(&self, m: &str) -> FatalError { - self.fatal_span(self.peek_token.span, m) - } - crate fn emit_fatal_errors(&mut self) { for err in &mut self.fatal_errs { err.emit(); @@ -142,81 +176,6 @@ impl<'a> StringReader<'a> { buffer } - pub fn peek(&self) -> &Token { - &self.peek_token - } - - /// For comments.rs, which hackily pokes into next_pos and ch - fn new_raw(sess: &'a ParseSess, - source_file: Lrc<syntax_pos::SourceFile>, - override_span: Option<Span>) -> Self { - let mut sr = StringReader::new_raw_internal(sess, source_file, override_span); - sr.bump(); - - sr - } - - fn new_raw_internal(sess: &'a ParseSess, source_file: Lrc<syntax_pos::SourceFile>, - override_span: Option<Span>) -> Self - { - if source_file.src.is_none() { - sess.span_diagnostic.bug(&format!("Cannot lex source_file without source: {}", - source_file.name)); - } - - let src = (*source_file.src.as_ref().unwrap()).clone(); - - StringReader { - sess, - next_pos: source_file.start_pos, - pos: source_file.start_pos, - ch: Some('\n'), - source_file, - end_src_index: src.len(), - peek_token: Token::dummy(), - peek_span_src_raw: syntax_pos::DUMMY_SP, - src, - fatal_errs: Vec::new(), - override_span, - } - } - - pub fn new_or_buffered_errs(sess: &'a ParseSess, - source_file: Lrc<syntax_pos::SourceFile>, - override_span: Option<Span>) -> Result<Self, Vec<Diagnostic>> { - let mut sr = StringReader::new_raw(sess, source_file, override_span); - if sr.advance_token().is_err() { - Err(sr.buffer_fatal_errors()) - } else { - Ok(sr) - } - } - - pub fn retokenize(sess: &'a ParseSess, mut span: Span) -> Self { - let begin = sess.source_map().lookup_byte_offset(span.lo()); - let end = sess.source_map().lookup_byte_offset(span.hi()); - - // Make the range zero-length if the span is invalid. - if span.lo() > span.hi() || begin.sf.start_pos != end.sf.start_pos { - span = span.shrink_to_lo(); - } - - let mut sr = StringReader::new_raw_internal(sess, begin.sf, None); - - // Seek the lexer to the right byte range. - sr.next_pos = span.lo(); - sr.end_src_index = sr.src_index(span.hi()); - - sr.bump(); - - if sr.advance_token().is_err() { - sr.emit_fatal_errors(); - FatalError.raise(); - } - - sr - } - #[inline] fn ch_is(&self, c: char) -> bool { self.ch == Some(c) @@ -269,30 +228,6 @@ impl<'a> StringReader<'a> { self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..]) } - /// Advance peek_token to refer to the next token, and - /// possibly update the interner. - fn advance_token(&mut self) -> Result<(), ()> { - match self.scan_whitespace_or_comment() { - Some(comment) => { - self.peek_span_src_raw = comment.span; - self.peek_token = comment; - } - None => { - let (kind, start_pos, end_pos) = if self.is_eof() { - (token::Eof, self.source_file.end_pos, self.source_file.end_pos) - } else { - let start_pos = self.pos; - (self.next_token_inner()?, start_pos, self.pos) - }; - let (real, raw) = self.mk_sp_and_raw(start_pos, end_pos); - self.peek_token = Token::new(kind, real); - self.peek_span_src_raw = raw; - } - } - - Ok(()) - } - #[inline] fn src_index(&self, pos: BytePos) -> usize { (pos - self.source_file.start_pos).to_usize() @@ -1462,12 +1397,7 @@ mod tests { teststr: String) -> StringReader<'a> { let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); - let mut sr = StringReader::new_raw(sess, sf, None); - if sr.advance_token().is_err() { - sr.emit_fatal_errors(); - FatalError.raise(); - } - sr + StringReader::new(sess, sf, None) } #[test] @@ -1489,17 +1419,17 @@ mod tests { assert_eq!(tok1.kind, tok2.kind); assert_eq!(tok1.span, tok2.span); assert_eq!(string_reader.next_token(), token::Whitespace); - // the 'main' id is already read: - assert_eq!(string_reader.pos.clone(), BytePos(28)); // read another token: let tok3 = string_reader.next_token(); + assert_eq!(string_reader.pos.clone(), BytePos(28)); let tok4 = Token::new( mk_ident("main"), Span::new(BytePos(24), BytePos(28), NO_EXPANSION), ); assert_eq!(tok3.kind, tok4.kind); assert_eq!(tok3.span, tok4.span); - // the lparen is already read: + + assert_eq!(string_reader.next_token(), token::OpenDelim(token::Paren)); assert_eq!(string_reader.pos.clone(), BytePos(29)) }) } diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index 99d9d40a45b..830fbec58de 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -4,13 +4,14 @@ use crate::print::pprust::token_to_string; use crate::parse::lexer::{StringReader, UnmatchedBrace}; use crate::parse::token::{self, Token}; use crate::parse::PResult; -use crate::tokenstream::{DelimSpan, IsJoint::*, TokenStream, TokenTree, TreeAndJoint}; +use crate::tokenstream::{DelimSpan, IsJoint::{self, *}, TokenStream, TokenTree, TreeAndJoint}; impl<'a> StringReader<'a> { crate fn into_token_trees(self) -> (PResult<'a, TokenStream>, Vec<UnmatchedBrace>) { let mut tt_reader = TokenTreesReader { string_reader: self, token: Token::dummy(), + joint_to_prev: Joint, open_braces: Vec::new(), unmatched_braces: Vec::new(), matching_delim_spans: Vec::new(), @@ -24,6 +25,7 @@ impl<'a> StringReader<'a> { struct TokenTreesReader<'a> { string_reader: StringReader<'a>, token: Token, + joint_to_prev: IsJoint, /// Stack of open delimiters and their spans. Used for error message. open_braces: Vec<(token::DelimToken, Span)>, unmatched_braces: Vec<UnmatchedBrace>, @@ -203,21 +205,26 @@ impl<'a> TokenTreesReader<'a> { }, _ => { let tt = TokenTree::Token(self.token.take()); - // Note that testing for joint-ness here is done via the raw - // source span as the joint-ness is a property of the raw source - // rather than wanting to take `override_span` into account. - // Additionally, we actually check if the *next* pair of tokens - // is joint, but this is equivalent to checking the current pair. - let raw = self.string_reader.peek_span_src_raw; self.real_token(); - let is_joint = raw.hi() == self.string_reader.peek_span_src_raw.lo() - && self.token.is_op(); + let is_joint = self.joint_to_prev == Joint && self.token.is_op(); Ok((tt, if is_joint { Joint } else { NonJoint })) } } } fn real_token(&mut self) { - self.token = self.string_reader.real_token(); + self.joint_to_prev = Joint; + loop { + let token = self.string_reader.next_token(); + match token.kind { + token::Whitespace | token::Comment | token::Shebang(_) => { + self.joint_to_prev = NonJoint; + } + _ => { + self.token = token; + return; + }, + } + } } } diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 4056905d5dd..4c4551b1757 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -308,7 +308,7 @@ pub fn maybe_file_to_stream( source_file: Lrc<SourceFile>, override_span: Option<Span>, ) -> Result<(TokenStream, Vec<lexer::UnmatchedBrace>), Vec<Diagnostic>> { - let srdr = lexer::StringReader::new_or_buffered_errs(sess, source_file, override_span)?; + let srdr = lexer::StringReader::new(sess, source_file, override_span); let (token_trees, unmatched_braces) = srdr.into_token_trees(); match token_trees { |
