diff options
| author | Mazdak Farrokhzad <twingoow@gmail.com> | 2019-10-11 14:39:52 +0200 |
|---|---|---|
| committer | Mazdak Farrokhzad <twingoow@gmail.com> | 2019-11-07 13:59:13 +0100 |
| commit | 27f97aa468b5079bfd159e6fee9a04d5501a8818 (patch) | |
| tree | 014c6f7ebc01fdde85bbd4d12d10e74a377d6f36 /src/libsyntax/parse | |
| parent | a1571b68552b0d56d85080c5f92fdab233775de4 (diff) | |
| download | rust-27f97aa468b5079bfd159e6fee9a04d5501a8818.tar.gz rust-27f97aa468b5079bfd159e6fee9a04d5501a8818.zip | |
move syntax::parse::lexer::comments -> syntax::util::comments
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/lexer/comments.rs | 255 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/comments/tests.rs | 47 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 21 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer/tests.rs | 1 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser/attr.rs | 2 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser/mod.rs | 2 |
6 files changed, 6 insertions, 322 deletions
diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs deleted file mode 100644 index 33415bdcb62..00000000000 --- a/src/libsyntax/parse/lexer/comments.rs +++ /dev/null @@ -1,255 +0,0 @@ -pub use CommentStyle::*; - -use super::is_block_doc_comment; - -use crate::ast; -use crate::source_map::SourceMap; -use crate::sess::ParseSess; - -use syntax_pos::{BytePos, CharPos, Pos, FileName}; - -use std::usize; - -#[cfg(test)] -mod tests; - -#[derive(Clone, Copy, PartialEq, Debug)] -pub enum CommentStyle { - /// No code on either side of each line of the comment - Isolated, - /// Code exists to the left of the comment - Trailing, - /// Code before /* foo */ and after the comment - Mixed, - /// Just a manual blank line "\n\n", for layout - BlankLine, -} - -#[derive(Clone)] -pub struct Comment { - pub style: CommentStyle, - pub lines: Vec<String>, - pub pos: BytePos, -} - -fn is_doc_comment(s: &str) -> bool { - (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") || - (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!") -} - -pub fn doc_comment_style(comment: &str) -> ast::AttrStyle { - assert!(is_doc_comment(comment)); - if comment.starts_with("//!") || comment.starts_with("/*!") { - ast::AttrStyle::Inner - } else { - ast::AttrStyle::Outer - } -} - -pub fn strip_doc_comment_decoration(comment: &str) -> String { - /// remove whitespace-only lines from the start/end of lines - fn vertical_trim(lines: Vec<String>) -> Vec<String> { - let mut i = 0; - let mut j = lines.len(); - // first line of all-stars should be omitted - if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { - i += 1; - } - - while i < j && lines[i].trim().is_empty() { - i += 1; - } - // like the first, a last line of all stars should be omitted - if j > i && - lines[j - 1] - .chars() - .skip(1) - .all(|c| c == '*') { - j -= 1; - } - - while j > i && lines[j - 1].trim().is_empty() { - j -= 1; - } - - lines[i..j].to_vec() - } - - /// remove a "[ \t]*\*" block from each line, if possible - fn horizontal_trim(lines: Vec<String>) -> Vec<String> { - let mut i = usize::MAX; - let mut can_trim = true; - let mut first = true; - - for line in &lines { - for (j, c) in line.chars().enumerate() { - if j > i || !"* \t".contains(c) { - can_trim = false; - break; - } - if c == '*' { - if first { - i = j; - first = false; - } else if i != j { - can_trim = false; - } - break; - } - } - if i >= line.len() { - can_trim = false; - } - if !can_trim { - break; - } - } - - if can_trim { - lines.iter() - .map(|line| (&line[i + 1..line.len()]).to_string()) - .collect() - } else { - lines - } - } - - // one-line comments lose their prefix - const ONELINERS: &[&str] = &["///!", "///", "//!", "//"]; - - for prefix in ONELINERS { - if comment.starts_with(*prefix) { - return (&comment[prefix.len()..]).to_string(); - } - } - - if comment.starts_with("/*") { - let lines = comment[3..comment.len() - 2] - .lines() - .map(|s| s.to_string()) - .collect::<Vec<String>>(); - - let lines = vertical_trim(lines); - let lines = horizontal_trim(lines); - - return lines.join("\n"); - } - - panic!("not a doc-comment: {}", comment); -} - -/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char. -/// Otherwise returns `Some(k)` where `k` is first char offset after that leading -/// whitespace. Note that `k` may be outside bounds of `s`. -fn all_whitespace(s: &str, col: CharPos) -> Option<usize> { - let mut idx = 0; - for (i, ch) in s.char_indices().take(col.to_usize()) { - if !ch.is_whitespace() { - return None; - } - idx = i + ch.len_utf8(); - } - Some(idx) -} - -fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str { - let len = s.len(); - match all_whitespace(&s, col) { - Some(col) => if col < len { &s[col..] } else { "" }, - None => s, - } -} - -fn split_block_comment_into_lines( - text: &str, - col: CharPos, -) -> Vec<String> { - let mut res: Vec<String> = vec![]; - let mut lines = text.lines(); - // just push the first line - res.extend(lines.next().map(|it| it.to_string())); - // for other lines, strip common whitespace prefix - for line in lines { - res.push(trim_whitespace_prefix(line, col).to_string()) - } - res -} - -// it appears this function is called only from pprust... that's -// probably not a good thing. -crate fn gather_comments(sess: &ParseSess, path: FileName, src: String) -> Vec<Comment> { - let cm = SourceMap::new(sess.source_map().path_mapping().clone()); - let source_file = cm.new_source_file(path, src); - let text = (*source_file.src.as_ref().unwrap()).clone(); - - let text: &str = text.as_str(); - let start_bpos = source_file.start_pos; - let mut pos = 0; - let mut comments: Vec<Comment> = Vec::new(); - let mut code_to_the_left = false; - - if let Some(shebang_len) = rustc_lexer::strip_shebang(text) { - comments.push(Comment { - style: Isolated, - lines: vec![text[..shebang_len].to_string()], - pos: start_bpos, - }); - pos += shebang_len; - } - - for token in rustc_lexer::tokenize(&text[pos..]) { - let token_text = &text[pos..pos + token.len]; - match token.kind { - rustc_lexer::TokenKind::Whitespace => { - if let Some(mut idx) = token_text.find('\n') { - code_to_the_left = false; - while let Some(next_newline) = &token_text[idx + 1..].find('\n') { - idx = idx + 1 + next_newline; - comments.push(Comment { - style: BlankLine, - lines: vec![], - pos: start_bpos + BytePos((pos + idx) as u32), - }); - } - } - } - rustc_lexer::TokenKind::BlockComment { terminated: _ } => { - if !is_block_doc_comment(token_text) { - let code_to_the_right = match text[pos + token.len..].chars().next() { - Some('\r') | Some('\n') => false, - _ => true, - }; - let style = match (code_to_the_left, code_to_the_right) { - (true, true) | (false, true) => Mixed, - (false, false) => Isolated, - (true, false) => Trailing, - }; - - // Count the number of chars since the start of the line by rescanning. - let pos_in_file = start_bpos + BytePos(pos as u32); - let line_begin_in_file = source_file.line_begin_pos(pos_in_file); - let line_begin_pos = (line_begin_in_file - start_bpos).to_usize(); - let col = CharPos(text[line_begin_pos..pos].chars().count()); - - let lines = split_block_comment_into_lines(token_text, col); - comments.push(Comment { style, lines, pos: pos_in_file }) - } - } - rustc_lexer::TokenKind::LineComment => { - if !is_doc_comment(token_text) { - comments.push(Comment { - style: if code_to_the_left { Trailing } else { Isolated }, - lines: vec![token_text.to_string()], - pos: start_bpos + BytePos(pos as u32), - }) - } - } - _ => { - code_to_the_left = true; - } - } - pos += token.len; - } - - comments -} diff --git a/src/libsyntax/parse/lexer/comments/tests.rs b/src/libsyntax/parse/lexer/comments/tests.rs deleted file mode 100644 index f9cd69fb50d..00000000000 --- a/src/libsyntax/parse/lexer/comments/tests.rs +++ /dev/null @@ -1,47 +0,0 @@ -use super::*; - -#[test] -fn test_block_doc_comment_1() { - let comment = "/**\n * Test \n ** Test\n * Test\n*/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " Test \n* Test\n Test"); -} - -#[test] -fn test_block_doc_comment_2() { - let comment = "/**\n * Test\n * Test\n*/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " Test\n Test"); -} - -#[test] -fn test_block_doc_comment_3() { - let comment = "/**\n let a: *i32;\n *a = 5;\n*/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " let a: *i32;\n *a = 5;"); -} - -#[test] -fn test_block_doc_comment_4() { - let comment = "/*******************\n test\n *********************/"; - let stripped = strip_doc_comment_decoration(comment); - assert_eq!(stripped, " test"); -} - -#[test] -fn test_line_doc_comment() { - let stripped = strip_doc_comment_decoration("/// test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("///! test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("// test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("// test"); - assert_eq!(stripped, " test"); - let stripped = strip_doc_comment_decoration("///test"); - assert_eq!(stripped, "test"); - let stripped = strip_doc_comment_decoration("///!test"); - assert_eq!(stripped, "test"); - let stripped = strip_doc_comment_decoration("//test"); - assert_eq!(stripped, "test"); -} diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 5499a3cae5f..b1b7b08c78a 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -1,6 +1,7 @@ use crate::token::{self, Token, TokenKind}; use crate::sess::ParseSess; use crate::symbol::{sym, Symbol}; +use crate::util::comments; use errors::{FatalError, DiagnosticBuilder}; use syntax_pos::{BytePos, Pos, Span}; @@ -15,7 +16,6 @@ use log::debug; #[cfg(test)] mod tests; -pub mod comments; mod tokentrees; mod unicode_chars; mod unescape_error_reporting; @@ -179,7 +179,7 @@ impl<'a> StringReader<'a> { rustc_lexer::TokenKind::LineComment => { let string = self.str_from(start); // comments with only more "/"s are not doc comments - let tok = if is_doc_comment(string) { + let tok = if comments::is_line_doc_comment(string) { self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment"); token::DocComment(Symbol::intern(string)) } else { @@ -192,7 +192,7 @@ impl<'a> StringReader<'a> { let string = self.str_from(start); // block comments starting with "/**" or "/*!" are doc-comments // but comments with only "*"s between two "/"s are not - let is_doc_comment = is_block_doc_comment(string); + let is_doc_comment = comments::is_block_doc_comment(string); if !terminated { let msg = if is_doc_comment { @@ -643,18 +643,3 @@ impl<'a> StringReader<'a> { } } } - -fn is_doc_comment(s: &str) -> bool { - let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') || - s.starts_with("//!"); - debug!("is {:?} a doc comment? {}", s, res); - res -} - -fn is_block_doc_comment(s: &str) -> bool { - // Prevent `/**/` from being parsed as a doc comment - let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') || - s.starts_with("/*!")) && s.len() >= 5; - debug!("is {:?} a doc comment? {}", s, res); - res -} diff --git a/src/libsyntax/parse/lexer/tests.rs b/src/libsyntax/parse/lexer/tests.rs index b9b82b2bcef..baa6fb59537 100644 --- a/src/libsyntax/parse/lexer/tests.rs +++ b/src/libsyntax/parse/lexer/tests.rs @@ -3,6 +3,7 @@ use super::*; use crate::symbol::Symbol; use crate::source_map::{SourceMap, FilePathMapping}; use crate::token; +use crate::util::comments::is_doc_comment; use crate::with_default_globals; use errors::{Handler, emitter::EmitterWriter}; diff --git a/src/libsyntax/parse/parser/attr.rs b/src/libsyntax/parse/parser/attr.rs index 920e7a521ef..31f0a02a483 100644 --- a/src/libsyntax/parse/parser/attr.rs +++ b/src/libsyntax/parse/parser/attr.rs @@ -1,7 +1,7 @@ use super::{SeqSep, Parser, TokenType, PathStyle}; use crate::attr; use crate::ast; -use crate::parse::lexer::comments; +use crate::util::comments; use crate::token::{self, Nonterminal, DelimToken}; use crate::tokenstream::{TokenStream, TokenTree}; use crate::source_map::Span; diff --git a/src/libsyntax/parse/parser/mod.rs b/src/libsyntax/parse/parser/mod.rs index f7f7b0e83d4..455f4172f5f 100644 --- a/src/libsyntax/parse/parser/mod.rs +++ b/src/libsyntax/parse/parser/mod.rs @@ -17,7 +17,7 @@ use crate::ast::{ }; use crate::parse::{Directory, DirectoryOwnership}; use crate::parse::lexer::UnmatchedBrace; -use crate::parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration}; +use crate::util::comments::{doc_comment_style, strip_doc_comment_decoration}; use crate::token::{self, Token, TokenKind, DelimToken}; use crate::print::pprust; use crate::ptr::P; |
