move syntax::parse::lexer::comments -> syntax::util::comments

author: Mazdak Farrokhzad <twingoow@gmail.com> 2019-10-11 14:39:52 +0200
committer: Mazdak Farrokhzad <twingoow@gmail.com> 2019-11-07 13:59:13 +0100
commit: 27f97aa468b5079bfd159e6fee9a04d5501a8818 (patch)
tree: 014c6f7ebc01fdde85bbd4d12d10e74a377d6f36 /src/libsyntax/parse
parent: a1571b68552b0d56d85080c5f92fdab233775de4 (diff)
download: rust-27f97aa468b5079bfd159e6fee9a04d5501a8818.tar.gz
rust-27f97aa468b5079bfd159e6fee9a04d5501a8818.zip
6 files changed, 6 insertions, 322 deletions
diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs
deleted file mode 100644
index 33415bdcb62..00000000000
--- a/src/libsyntax/parse/lexer/comments.rs
+++ /dev/null
@@ -1,255 +0,0 @@
-pub use CommentStyle::*;
-
-use super::is_block_doc_comment;
-
-use crate::ast;
-use crate::source_map::SourceMap;
-use crate::sess::ParseSess;
-
-use syntax_pos::{BytePos, CharPos, Pos, FileName};
-
-use std::usize;
-
-#[cfg(test)]
-mod tests;
-
-#[derive(Clone, Copy, PartialEq, Debug)]
-pub enum CommentStyle {
-    /// No code on either side of each line of the comment
-    Isolated,
-    /// Code exists to the left of the comment
-    Trailing,
-    /// Code before /* foo */ and after the comment
-    Mixed,
-    /// Just a manual blank line "\n\n", for layout
-    BlankLine,
-}
-
-#[derive(Clone)]
-pub struct Comment {
-    pub style: CommentStyle,
-    pub lines: Vec<String>,
-    pub pos: BytePos,
-}
-
-fn is_doc_comment(s: &str) -> bool {
-    (s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") ||
-    (s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!")
-}
-
-pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
-    assert!(is_doc_comment(comment));
-    if comment.starts_with("//!") || comment.starts_with("/*!") {
-        ast::AttrStyle::Inner
-    } else {
-        ast::AttrStyle::Outer
-    }
-}
-
-pub fn strip_doc_comment_decoration(comment: &str) -> String {
-    /// remove whitespace-only lines from the start/end of lines
-    fn vertical_trim(lines: Vec<String>) -> Vec<String> {
-        let mut i = 0;
-        let mut j = lines.len();
-        // first line of all-stars should be omitted
-        if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
-            i += 1;
-        }
-
-        while i < j && lines[i].trim().is_empty() {
-            i += 1;
-        }
-        // like the first, a last line of all stars should be omitted
-        if j > i &&
-           lines[j - 1]
-               .chars()
-               .skip(1)
-               .all(|c| c == '*') {
-            j -= 1;
-        }
-
-        while j > i && lines[j - 1].trim().is_empty() {
-            j -= 1;
-        }
-
-        lines[i..j].to_vec()
-    }
-
-    /// remove a "[ \t]*\*" block from each line, if possible
-    fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
-        let mut i = usize::MAX;
-        let mut can_trim = true;
-        let mut first = true;
-
-        for line in &lines {
-            for (j, c) in line.chars().enumerate() {
-                if j > i || !"* \t".contains(c) {
-                    can_trim = false;
-                    break;
-                }
-                if c == '*' {
-                    if first {
-                        i = j;
-                        first = false;
-                    } else if i != j {
-                        can_trim = false;
-                    }
-                    break;
-                }
-            }
-            if i >= line.len() {
-                can_trim = false;
-            }
-            if !can_trim {
-                break;
-            }
-        }
-
-        if can_trim {
-            lines.iter()
-                 .map(|line| (&line[i + 1..line.len()]).to_string())
-                 .collect()
-        } else {
-            lines
-        }
-    }
-
-    // one-line comments lose their prefix
-    const ONELINERS: &[&str] = &["///!", "///", "//!", "//"];
-
-    for prefix in ONELINERS {
-        if comment.starts_with(*prefix) {
-            return (&comment[prefix.len()..]).to_string();
-        }
-    }
-
-    if comment.starts_with("/*") {
-        let lines = comment[3..comment.len() - 2]
-                        .lines()
-                        .map(|s| s.to_string())
-                        .collect::<Vec<String>>();
-
-        let lines = vertical_trim(lines);
-        let lines = horizontal_trim(lines);
-
-        return lines.join("\n");
-    }
-
-    panic!("not a doc-comment: {}", comment);
-}
-
-/// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
-/// Otherwise returns `Some(k)` where `k` is first char offset after that leading
-/// whitespace. Note that `k` may be outside bounds of `s`.
-fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
-    let mut idx = 0;
-    for (i, ch) in s.char_indices().take(col.to_usize()) {
-        if !ch.is_whitespace() {
-            return None;
-        }
-        idx = i + ch.len_utf8();
-    }
-    Some(idx)
-}
-
-fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
-    let len = s.len();
-    match all_whitespace(&s, col) {
-        Some(col) => if col < len { &s[col..] } else { "" },
-        None => s,
-    }
-}
-
-fn split_block_comment_into_lines(
-    text: &str,
-    col: CharPos,
-) -> Vec<String> {
-    let mut res: Vec<String> = vec![];
-    let mut lines = text.lines();
-    // just push the first line
-    res.extend(lines.next().map(|it| it.to_string()));
-    // for other lines, strip common whitespace prefix
-    for line in lines {
-        res.push(trim_whitespace_prefix(line, col).to_string())
-    }
-    res
-}
-
-// it appears this function is called only from pprust... that's
-// probably not a good thing.
-crate fn gather_comments(sess: &ParseSess, path: FileName, src: String) -> Vec<Comment> {
-    let cm = SourceMap::new(sess.source_map().path_mapping().clone());
-    let source_file = cm.new_source_file(path, src);
-    let text = (*source_file.src.as_ref().unwrap()).clone();
-
-    let text: &str = text.as_str();
-    let start_bpos = source_file.start_pos;
-    let mut pos = 0;
-    let mut comments: Vec<Comment> = Vec::new();
-    let mut code_to_the_left = false;
-
-    if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
-        comments.push(Comment {
-            style: Isolated,
-            lines: vec![text[..shebang_len].to_string()],
-            pos: start_bpos,
-        });
-        pos += shebang_len;
-    }
-
-    for token in rustc_lexer::tokenize(&text[pos..]) {
-        let token_text = &text[pos..pos + token.len];
-        match token.kind {
-            rustc_lexer::TokenKind::Whitespace => {
-                if let Some(mut idx) = token_text.find('\n') {
-                    code_to_the_left = false;
-                    while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
-                        idx = idx + 1 + next_newline;
-                        comments.push(Comment {
-                            style: BlankLine,
-                            lines: vec![],
-                            pos: start_bpos + BytePos((pos + idx) as u32),
-                        });
-                    }
-                }
-            }
-            rustc_lexer::TokenKind::BlockComment { terminated: _ } => {
-                if !is_block_doc_comment(token_text) {
-                    let code_to_the_right = match text[pos + token.len..].chars().next() {
-                        Some('\r') | Some('\n') => false,
-                        _ => true,
-                    };
-                    let style = match (code_to_the_left, code_to_the_right) {
-                        (true, true) | (false, true) => Mixed,
-                        (false, false) => Isolated,
-                        (true, false) => Trailing,
-                    };
-
-                    // Count the number of chars since the start of the line by rescanning.
-                    let pos_in_file = start_bpos + BytePos(pos as u32);
-                    let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
-                    let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
-                    let col = CharPos(text[line_begin_pos..pos].chars().count());
-
-                    let lines = split_block_comment_into_lines(token_text, col);
-                    comments.push(Comment { style, lines, pos: pos_in_file })
-                }
-            }
-            rustc_lexer::TokenKind::LineComment => {
-                if !is_doc_comment(token_text) {
-                    comments.push(Comment {
-                        style: if code_to_the_left { Trailing } else { Isolated },
-                        lines: vec![token_text.to_string()],
-                        pos: start_bpos + BytePos(pos as u32),
-                    })
-                }
-            }
-            _ => {
-                code_to_the_left = true;
-            }
-        }
-        pos += token.len;
-    }
-
-    comments
-}
diff --git a/src/libsyntax/parse/lexer/comments/tests.rs b/src/libsyntax/parse/lexer/comments/tests.rs
deleted file mode 100644
index f9cd69fb50d..00000000000
--- a/src/libsyntax/parse/lexer/comments/tests.rs
+++ /dev/null
@@ -1,47 +0,0 @@
-use super::*;
-
-#[test]
-fn test_block_doc_comment_1() {
-    let comment = "/**\n * Test \n **  Test\n *   Test\n*/";
-    let stripped = strip_doc_comment_decoration(comment);
-    assert_eq!(stripped, " Test \n*  Test\n   Test");
-}
-
-#[test]
-fn test_block_doc_comment_2() {
-    let comment = "/**\n * Test\n *  Test\n*/";
-    let stripped = strip_doc_comment_decoration(comment);
-    assert_eq!(stripped, " Test\n  Test");
-}
-
-#[test]
-fn test_block_doc_comment_3() {
-    let comment = "/**\n let a: *i32;\n *a = 5;\n*/";
-    let stripped = strip_doc_comment_decoration(comment);
-    assert_eq!(stripped, " let a: *i32;\n *a = 5;");
-}
-
-#[test]
-fn test_block_doc_comment_4() {
-    let comment = "/*******************\n test\n *********************/";
-    let stripped = strip_doc_comment_decoration(comment);
-    assert_eq!(stripped, " test");
-}
-
-#[test]
-fn test_line_doc_comment() {
-    let stripped = strip_doc_comment_decoration("/// test");
-    assert_eq!(stripped, " test");
-    let stripped = strip_doc_comment_decoration("///! test");
-    assert_eq!(stripped, " test");
-    let stripped = strip_doc_comment_decoration("// test");
-    assert_eq!(stripped, " test");
-    let stripped = strip_doc_comment_decoration("// test");
-    assert_eq!(stripped, " test");
-    let stripped = strip_doc_comment_decoration("///test");
-    assert_eq!(stripped, "test");
-    let stripped = strip_doc_comment_decoration("///!test");
-    assert_eq!(stripped, "test");
-    let stripped = strip_doc_comment_decoration("//test");
-    assert_eq!(stripped, "test");
-}
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs
index 5499a3cae5f..b1b7b08c78a 100644
--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -1,6 +1,7 @@
 use crate::token::{self, Token, TokenKind};
 use crate::sess::ParseSess;
 use crate::symbol::{sym, Symbol};
+use crate::util::comments;
 
 use errors::{FatalError, DiagnosticBuilder};
 use syntax_pos::{BytePos, Pos, Span};
@@ -15,7 +16,6 @@ use log::debug;
 #[cfg(test)]
 mod tests;
 
-pub mod comments;
 mod tokentrees;
 mod unicode_chars;
 mod unescape_error_reporting;
@@ -179,7 +179,7 @@ impl<'a> StringReader<'a> {
             rustc_lexer::TokenKind::LineComment => {
                 let string = self.str_from(start);
                 // comments with only more "/"s are not doc comments
-                let tok = if is_doc_comment(string) {
+                let tok = if comments::is_line_doc_comment(string) {
                     self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment");
                     token::DocComment(Symbol::intern(string))
                 } else {
@@ -192,7 +192,7 @@ impl<'a> StringReader<'a> {
                 let string = self.str_from(start);
                 // block comments starting with "/**" or "/*!" are doc-comments
                 // but comments with only "*"s between two "/"s are not
-                let is_doc_comment = is_block_doc_comment(string);
+                let is_doc_comment = comments::is_block_doc_comment(string);
 
                 if !terminated {
                     let msg = if is_doc_comment {
@@ -643,18 +643,3 @@ impl<'a> StringReader<'a> {
         }
     }
 }
-
-fn is_doc_comment(s: &str) -> bool {
-    let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
-              s.starts_with("//!");
-    debug!("is {:?} a doc comment? {}", s, res);
-    res
-}
-
-fn is_block_doc_comment(s: &str) -> bool {
-    // Prevent `/**/` from being parsed as a doc comment
-    let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
-               s.starts_with("/*!")) && s.len() >= 5;
-    debug!("is {:?} a doc comment? {}", s, res);
-    res
-}
diff --git a/src/libsyntax/parse/lexer/tests.rs b/src/libsyntax/parse/lexer/tests.rs
index b9b82b2bcef..baa6fb59537 100644
--- a/src/libsyntax/parse/lexer/tests.rs
+++ b/src/libsyntax/parse/lexer/tests.rs
@@ -3,6 +3,7 @@ use super::*;
 use crate::symbol::Symbol;
 use crate::source_map::{SourceMap, FilePathMapping};
 use crate::token;
+use crate::util::comments::is_doc_comment;
 use crate::with_default_globals;
 
 use errors::{Handler, emitter::EmitterWriter};
diff --git a/src/libsyntax/parse/parser/attr.rs b/src/libsyntax/parse/parser/attr.rs
index 920e7a521ef..31f0a02a483 100644
--- a/src/libsyntax/parse/parser/attr.rs
+++ b/src/libsyntax/parse/parser/attr.rs
@@ -1,7 +1,7 @@
 use super::{SeqSep, Parser, TokenType, PathStyle};
 use crate::attr;
 use crate::ast;
-use crate::parse::lexer::comments;
+use crate::util::comments;
 use crate::token::{self, Nonterminal, DelimToken};
 use crate::tokenstream::{TokenStream, TokenTree};
 use crate::source_map::Span;
diff --git a/src/libsyntax/parse/parser/mod.rs b/src/libsyntax/parse/parser/mod.rs
index f7f7b0e83d4..455f4172f5f 100644
--- a/src/libsyntax/parse/parser/mod.rs
+++ b/src/libsyntax/parse/parser/mod.rs
@@ -17,7 +17,7 @@ use crate::ast::{
 };
 use crate::parse::{Directory, DirectoryOwnership};
 use crate::parse::lexer::UnmatchedBrace;
-use crate::parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration};
+use crate::util::comments::{doc_comment_style, strip_doc_comment_decoration};
 use crate::token::{self, Token, TokenKind, DelimToken};
 use crate::print::pprust;
 use crate::ptr::P;
author	Mazdak Farrokhzad <twingoow@gmail.com>	2019-10-11 14:39:52 +0200
committer	Mazdak Farrokhzad <twingoow@gmail.com>	2019-11-07 13:59:13 +0100
commit	27f97aa468b5079bfd159e6fee9a04d5501a8818 (patch)
tree	014c6f7ebc01fdde85bbd4d12d10e74a377d6f36 /src/libsyntax/parse
parent	a1571b68552b0d56d85080c5f92fdab233775de4 (diff)
download	rust-27f97aa468b5079bfd159e6fee9a04d5501a8818.tar.gz rust-27f97aa468b5079bfd159e6fee9a04d5501a8818.zip