1 files changed, 81 insertions, 209 deletions
diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs
index 6ed2a7adad1..d8f22072d7d 100644
--- a/src/libsyntax/parse/lexer/comments.rs
+++ b/src/libsyntax/parse/lexer/comments.rs
@@ -2,11 +2,10 @@ pub use CommentStyle::*;
 
 use crate::ast;
 use crate::source_map::SourceMap;
-use crate::parse::lexer::{is_block_doc_comment, is_pattern_whitespace};
-use crate::parse::lexer::{self, ParseSess, StringReader};
+use crate::parse::lexer::is_block_doc_comment;
+use crate::parse::lexer::ParseSess;
 
 use syntax_pos::{BytePos, CharPos, Pos, FileName};
-use log::debug;
 
 use std::usize;
 
@@ -135,66 +134,6 @@ pub fn strip_doc_comment_decoration(comment: &str) -> String {
     panic!("not a doc-comment: {}", comment);
 }
 
-fn push_blank_line_comment(rdr: &StringReader<'_>, comments: &mut Vec<Comment>) {
-    debug!(">>> blank-line comment");
-    comments.push(Comment {
-        style: BlankLine,
-        lines: Vec::new(),
-        pos: rdr.pos,
-    });
-}
-
-fn consume_whitespace_counting_blank_lines(
-    rdr: &mut StringReader<'_>,
-    comments: &mut Vec<Comment>
-) {
-    while is_pattern_whitespace(rdr.ch) && !rdr.is_eof() {
-        if rdr.ch_is('\n') {
-            push_blank_line_comment(rdr, &mut *comments);
-        }
-        rdr.bump();
-    }
-}
-
-fn read_shebang_comment(rdr: &mut StringReader<'_>,
-                        code_to_the_left: bool,
-                        comments: &mut Vec<Comment>) {
-    debug!(">>> shebang comment");
-    let p = rdr.pos;
-    debug!("<<< shebang comment");
-    comments.push(Comment {
-        style: if code_to_the_left { Trailing } else { Isolated },
-        lines: vec![rdr.read_one_line_comment()],
-        pos: p,
-    });
-}
-
-fn read_line_comments(rdr: &mut StringReader<'_>,
-                      code_to_the_left: bool,
-                      comments: &mut Vec<Comment>) {
-    debug!(">>> line comments");
-    let p = rdr.pos;
-    let mut lines: Vec<String> = Vec::new();
-    while rdr.ch_is('/') && rdr.nextch_is('/') {
-        let line = rdr.read_one_line_comment();
-        debug!("{}", line);
-        // Doc comments are not put in comments.
-        if is_doc_comment(&line[..]) {
-            break;
-        }
-        lines.push(line);
-        rdr.consume_non_eol_whitespace();
-    }
-    debug!("<<< line comments");
-    if !lines.is_empty() {
-        comments.push(Comment {
-            style: if code_to_the_left { Trailing } else { Isolated },
-            lines,
-            pos: p,
-        });
-    }
-}
-
 /// Returns `None` if the first `col` chars of `s` contain a non-whitespace char.
 /// Otherwise returns `Some(k)` where `k` is first char offset after that leading
 /// whitespace. Note that `k` may be outside bounds of `s`.
@@ -209,170 +148,103 @@ fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
     Some(idx)
 }
 
-fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String>, s: String, col: CharPos) {
+fn trim_whitespace_prefix(s: &str, col: CharPos) -> &str {
     let len = s.len();
-    let s1 = match all_whitespace(&s[..], col) {
-        Some(col) => {
-            if col < len {
-                s[col..len].to_string()
-            } else {
-                String::new()
-            }
-        }
+    match all_whitespace(&s, col) {
+        Some(col) => if col < len { &s[col..] } else { "" },
         None => s,
-    };
-    debug!("pushing line: {}", s1);
-    lines.push(s1);
-}
-
-fn read_block_comment(rdr: &mut StringReader<'_>,
-                      code_to_the_left: bool,
-                      comments: &mut Vec<Comment>) {
-    debug!(">>> block comment");
-    let p = rdr.pos;
-    let mut lines: Vec<String> = Vec::new();
-
-    // Count the number of chars since the start of the line by rescanning.
-    let src_index = rdr.src_index(rdr.source_file.line_begin_pos(rdr.pos));
-    let end_src_index = rdr.src_index(rdr.pos);
-    assert!(src_index <= end_src_index,
-        "src_index={}, end_src_index={}, line_begin_pos={}",
-        src_index, end_src_index, rdr.source_file.line_begin_pos(rdr.pos).to_u32());
-
-    let col = CharPos(rdr.src[src_index..end_src_index].chars().count());
-
-    rdr.bump();
-    rdr.bump();
-
-    let mut curr_line = String::from("/*");
-
-    // doc-comments are not really comments, they are attributes
-    if (rdr.ch_is('*') && !rdr.nextch_is('*')) || rdr.ch_is('!') {
-        while !(rdr.ch_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
-            curr_line.push(rdr.ch.unwrap());
-            rdr.bump();
-        }
-        if !rdr.is_eof() {
-            curr_line.push_str("*/");
-            rdr.bump();
-            rdr.bump();
-        }
-        if is_block_doc_comment(&curr_line[..]) {
-            return;
-        }
-        assert!(!curr_line.contains('\n'));
-        lines.push(curr_line);
-    } else {
-        let mut level: isize = 1;
-        while level > 0 {
-            debug!("=== block comment level {}", level);
-            if rdr.is_eof() {
-                rdr.fatal_span_(rdr.pos, rdr.pos, "unterminated block comment").raise();
-            }
-            if rdr.ch_is('\n') {
-                trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
-                curr_line = String::new();
-                rdr.bump();
-            } else {
-                curr_line.push(rdr.ch.unwrap());
-                if rdr.ch_is('/') && rdr.nextch_is('*') {
-                    rdr.bump();
-                    rdr.bump();
-                    curr_line.push('*');
-                    level += 1;
-                } else {
-                    if rdr.ch_is('*') && rdr.nextch_is('/') {
-                        rdr.bump();
-                        rdr.bump();
-                        curr_line.push('/');
-                        level -= 1;
-                    } else {
-                        rdr.bump();
-                    }
-                }
-            }
-        }
-        if !curr_line.is_empty() {
-            trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
-        }
-    }
-
-    let mut style = if code_to_the_left {
-        Trailing
-    } else {
-        Isolated
-    };
-    rdr.consume_non_eol_whitespace();
-    if !rdr.is_eof() && !rdr.ch_is('\n') && lines.len() == 1 {
-        style = Mixed;
     }
-    debug!("<<< block comment");
-    comments.push(Comment {
-        style,
-        lines,
-        pos: p,
-    });
 }
 
-
-fn consume_comment(rdr: &mut StringReader<'_>,
-                   comments: &mut Vec<Comment>,
-                   code_to_the_left: &mut bool,
-                   anything_to_the_left: &mut bool) {
-    debug!(">>> consume comment");
-    if rdr.ch_is('/') && rdr.nextch_is('/') {
-        read_line_comments(rdr, *code_to_the_left, comments);
-        *code_to_the_left = false;
-        *anything_to_the_left = false;
-    } else if rdr.ch_is('/') && rdr.nextch_is('*') {
-        read_block_comment(rdr, *code_to_the_left, comments);
-        *anything_to_the_left = true;
-    } else if rdr.ch_is('#') && rdr.nextch_is('!') {
-        read_shebang_comment(rdr, *code_to_the_left, comments);
-        *code_to_the_left = false;
-        *anything_to_the_left = false;
-    } else {
-        panic!();
+fn split_block_comment_into_lines(
+    text: &str,
+    col: CharPos,
+) -> Vec<String> {
+    let mut res: Vec<String> = vec![];
+    let mut lines = text.lines();
+    // just push the first line
+    res.extend(lines.next().map(|it| it.to_string()));
+    // for other lines, strip common whitespace prefix
+    for line in lines {
+        res.push(trim_whitespace_prefix(line, col).to_string())
     }
-    debug!("<<< consume comment");
+    res
 }
 
 // it appears this function is called only from pprust... that's
 // probably not a good thing.
-pub fn gather_comments(sess: &ParseSess, path: FileName, src: String) -> Vec<Comment>
-{
+pub fn gather_comments(sess: &ParseSess, path: FileName, src: String) -> Vec<Comment> {
     let cm = SourceMap::new(sess.source_map().path_mapping().clone());
     let source_file = cm.new_source_file(path, src);
-    let mut rdr = lexer::StringReader::new(sess, source_file, None);
+    let text = (*source_file.src.as_ref().unwrap()).clone();
 
+    let text: &str = text.as_str();
+    let start_bpos = source_file.start_pos;
+    let mut pos = 0;
     let mut comments: Vec<Comment> = Vec::new();
-    let mut code_to_the_left = false; // Only code
-    let mut anything_to_the_left = false; // Code or comments
-
-    while !rdr.is_eof() {
-        loop {
-            // Eat all the whitespace and count blank lines.
-            rdr.consume_non_eol_whitespace();
-            if rdr.ch_is('\n') {
-                if anything_to_the_left {
-                    rdr.bump(); // The line is not blank, do not count.
+    let mut code_to_the_left = false;
+
+    if let Some(shebang_len) = rustc_lexer::strip_shebang(text) {
+        comments.push(Comment {
+            style: Isolated,
+            lines: vec![text[..shebang_len].to_string()],
+            pos: start_bpos,
+        });
+        pos += shebang_len;
+    }
+
+    for token in rustc_lexer::tokenize(&text[pos..]) {
+        let token_text = &text[pos..pos + token.len];
+        match token.kind {
+            rustc_lexer::TokenKind::Whitespace => {
+                if let Some(mut idx) = token_text.find('\n') {
+                    code_to_the_left = false;
+                    while let Some(next_newline) = &token_text[idx + 1..].find('\n') {
+                        idx = idx + 1 + next_newline;
+                        comments.push(Comment {
+                            style: BlankLine,
+                            lines: vec![],
+                            pos: start_bpos + BytePos((pos + idx) as u32),
+                        });
+                    }
+                }
+            }
+            rustc_lexer::TokenKind::BlockComment { terminated: _ } => {
+                if !is_block_doc_comment(token_text) {
+                    let code_to_the_right = match text[pos + token.len..].chars().next() {
+                        Some('\r') | Some('\n') => false,
+                        _ => true,
+                    };
+                    let style = match (code_to_the_left, code_to_the_right) {
+                        (true, true) | (false, true) => Mixed,
+                        (false, false) => Isolated,
+                        (true, false) => Trailing,
+                    };
+
+                    // Count the number of chars since the start of the line by rescanning.
+                    let pos_in_file = start_bpos + BytePos(pos as u32);
+                    let line_begin_in_file = source_file.line_begin_pos(pos_in_file);
+                    let line_begin_pos = (line_begin_in_file - start_bpos).to_usize();
+                    let col = CharPos(text[line_begin_pos..pos].chars().count());
+
+                    let lines = split_block_comment_into_lines(token_text, col);
+                    comments.push(Comment { style, lines, pos: pos_in_file })
                 }
-                consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
-                code_to_the_left = false;
-                anything_to_the_left = false;
             }
-            // Eat one comment group
-            if rdr.peeking_at_comment() {
-                consume_comment(&mut rdr, &mut comments,
-                                &mut code_to_the_left, &mut anything_to_the_left);
-            } else {
-                break
+            rustc_lexer::TokenKind::LineComment => {
+                if !is_doc_comment(token_text) {
+                    comments.push(Comment {
+                        style: if code_to_the_left { Trailing } else { Isolated },
+                        lines: vec![token_text.to_string()],
+                        pos: start_bpos + BytePos(pos as u32),
+                    })
+                }
+            }
+            _ => {
+                code_to_the_left = true;
             }
         }
-
-        rdr.next_token();
-        code_to_the_left = true;
-        anything_to_the_left = true;
+        pos += token.len;
     }
 
     comments