initial draft of fix for issue #2498:

1. make /// ... and //! ... and /** ... */ and /*! ... */ into sugar for #[doc = ...] attributes. 2. add a script in etc/ to help converting doc-attributes to doc-comments 3. add some functions to core::str to help with (1)
author: Gareth Daniel Smith <garethdanielsmith@gmail.com> 2012-06-30 11:54:54 +0100
committer: Gareth Daniel Smith <garethdanielsmith@gmail.com> 2012-06-30 11:54:54 +0100
commit: 0b653ab9539140bb04941de9a36c03cf10bfc28b (patch)
tree: 491d1b3f128a281ffb3a12240e6858a919f5f3dd /src/libsyntax/parse
parent: d7823de5e2bfc749c2fb4fcfe4d65d54b28e3a92 (diff)
download: rust-0b653ab9539140bb04941de9a36c03cf10bfc28b.tar.gz
rust-0b653ab9539140bb04941de9a36c03cf10bfc28b.zip
4 files changed, 243 insertions, 42 deletions
diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs
index a349621091b..3b7f31fb79f 100644
--- a/src/libsyntax/parse/attr.rs
+++ b/src/libsyntax/parse/attr.rs
@@ -15,7 +15,8 @@ impl parser_attr for parser {
         -> attr_or_ext
     {
         let expect_item_next = vec::is_not_empty(first_item_attrs);
-        if self.token == token::POUND {
+        alt self.token {
+          token::POUND {
             let lo = self.span.lo;
             if self.look_ahead(1u) == token::LBRACKET {
                 self.bump();
@@ -30,15 +31,40 @@ impl parser_attr for parser {
                 self.bump();
                 ret some(right(self.parse_syntax_ext_naked(lo)));
             } else { ret none; }
-        } else { ret none; }
+        }
+        token::DOC_COMMENT(_) {
+          ret some(left(self.parse_outer_attributes()));
+        }
+        _ {
+          ret none;
+        }
+      }
     }
 
     // Parse attributes that appear before an item
     fn parse_outer_attributes() -> [ast::attribute]/~ {
         let mut attrs: [ast::attribute]/~ = []/~;
-        while self.token == token::POUND
-            && self.look_ahead(1u) == token::LBRACKET {
-            vec::push(attrs, self.parse_attribute(ast::attr_outer));
+        loop {
+            alt copy self.token {
+              token::POUND {
+                if self.look_ahead(1u) != token::LBRACKET {
+                    break;
+                }
+                attrs += [self.parse_attribute(ast::attr_outer)]/~;
+              }
+              token::DOC_COMMENT(s) {
+                let attr = ::attr::mk_sugared_doc_attr(
+                        *self.get_str(s), self.span.lo, self.span.hi);
+                if attr.node.style != ast::attr_outer {
+                  self.fatal("expected outer comment");
+                }
+                attrs += [attr]/~;
+                self.bump();
+              }
+              _ {
+                break;
+              }
+            }
         }
         ret attrs;
     }
@@ -55,7 +81,8 @@ impl parser_attr for parser {
         let meta_item = self.parse_meta_item();
         self.expect(token::RBRACKET);
         let mut hi = self.span.hi;
-        ret spanned(lo, hi, {style: style, value: *meta_item});
+        ret spanned(lo, hi, {style: style, value: *meta_item,
+                             is_sugared_doc: false});
     }
 
     // Parse attributes that appear after the opening of an item, each
@@ -68,22 +95,41 @@ impl parser_attr for parser {
         {inner: [ast::attribute]/~, next: [ast::attribute]/~} {
         let mut inner_attrs: [ast::attribute]/~ = []/~;
         let mut next_outer_attrs: [ast::attribute]/~ = []/~;
-        while self.token == token::POUND {
-            if self.look_ahead(1u) != token::LBRACKET {
-                // This is an extension
-                break;
-            }
-            let attr = self.parse_attribute(ast::attr_inner);
-            if self.token == token::SEMI {
+        loop {
+            alt copy self.token {
+              token::POUND {
+                if self.look_ahead(1u) != token::LBRACKET {
+                    // This is an extension
+                    break;
+                }
+                let attr = self.parse_attribute(ast::attr_inner);
+                if self.token == token::SEMI {
+                    self.bump();
+                    inner_attrs += [attr]/~;
+                } else {
+                    // It's not really an inner attribute
+                    let outer_attr =
+                        spanned(attr.span.lo, attr.span.hi,
+                            {style: ast::attr_outer, value: attr.node.value,
+                             is_sugared_doc: false});
+                    next_outer_attrs += [outer_attr]/~;
+                    break;
+                }
+              }
+              token::DOC_COMMENT(s) {
+                let attr = ::attr::mk_sugared_doc_attr(
+                        *self.get_str(s), self.span.lo, self.span.hi);
                 self.bump();
-                vec::push(inner_attrs, attr);
-            } else {
-                // It's not really an inner attribute
-                let outer_attr =
-                    spanned(attr.span.lo, attr.span.hi,
-                            {style: ast::attr_outer, value: attr.node.value});
-                vec::push(next_outer_attrs, outer_attr);
+                if attr.node.style == ast::attr_inner {
+                  inner_attrs += [attr]/~;
+                } else {
+                  next_outer_attrs += [attr]/~;
+                  break;
+                }
+              }
+              _ {
                 break;
+              }
             }
         }
         ret {inner: inner_attrs, next: next_outer_attrs};
diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs
index b6ab87ad1bc..7a6a9f0f3d7 100644
--- a/src/libsyntax/parse/comments.rs
+++ b/src/libsyntax/parse/comments.rs
@@ -8,6 +8,7 @@ export cmnt;
 export lit;
 export cmnt_style;
 export gather_comments_and_literals;
+export is_doc_comment, doc_comment_style, strip_doc_comment_decoration;
 
 enum cmnt_style {
     isolated, // No code on either side of each line of the comment
@@ -18,6 +19,81 @@ enum cmnt_style {
 
 type cmnt = {style: cmnt_style, lines: [str]/~, pos: uint};
 
+fn is_doc_comment(s: str) -> bool {
+    s.starts_with("///") ||
+    s.starts_with("//!") ||
+    s.starts_with("/**") ||
+    s.starts_with("/*!")
+}
+
+fn doc_comment_style(comment: str) -> ast::attr_style {
+    assert is_doc_comment(comment);
+    if comment.starts_with("//!") || comment.starts_with("/*!") {
+        ast::attr_inner
+    } else {
+        ast::attr_outer
+    }
+}
+
+fn strip_doc_comment_decoration(comment: str) -> str {
+
+    /// remove whitespace-only lines from the start/end of lines
+    fn vertical_trim(lines: [str]/~) -> [str]/~ {
+        let mut i = 0u, j = lines.len();
+        while i < j && lines[i].trim().is_empty() {
+            i += 1u;
+        }
+        while j > i && lines[j - 1u].trim().is_empty() {
+            j -= 1u;
+        }
+        ret lines.slice(i, j);
+    }
+
+    // drop leftmost columns that contain only values in chars
+    fn block_trim(lines: [str]/~, chars: str, max: option<uint>) -> [str]/~ {
+
+        let mut i = max.get_default(uint::max_value);
+        for lines.each {|line|
+            if line.trim().is_empty() {
+                cont;
+            }
+            for line.each_chari {|j, c|
+                if j >= i {
+                    break;
+                }
+                if !chars.contains_char(c) {
+                    i = j;
+                    break;
+                }
+            }
+        }
+
+        ret lines.map {|line|
+            let chars = str::chars(line);
+            if i > chars.len() {
+                ""
+            } else {
+                str::from_chars(chars.slice(i, chars.len()))
+            }
+        };
+    }
+
+    if comment.starts_with("//") {
+        ret comment.slice(3u, comment.len()).trim();
+    }
+
+    if comment.starts_with("/*") {
+        let lines = str::lines_any(comment.slice(3u, comment.len() - 2u));
+        let lines = vertical_trim(lines);
+        let lines = block_trim(lines, "\t ", none);
+        let lines = block_trim(lines, "*", some(1u));
+        let lines = block_trim(lines, "\t ", none);
+        ret str::connect(lines, "\n");
+    }
+
+    fail "not a doc-comment: " + comment;
+}
+
 fn read_to_eol(rdr: string_reader) -> str {
     let mut val = "";
     while rdr.curr != '\n' && !is_eof(rdr) {
@@ -57,29 +133,41 @@ fn consume_whitespace_counting_blank_lines(rdr: string_reader,
     }
 }
 
-fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt {
+
+fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
+                                                        &comments: [cmnt]/~) {
     #debug(">>> shebang comment");
     let p = rdr.chpos;
     #debug("<<< shebang comment");
-    ret {style: if code_to_the_left { trailing } else { isolated },
-         lines: [read_one_line_comment(rdr)]/~,
-         pos: p};
+    vec::push(comments, {
+        style: if code_to_the_left { trailing } else { isolated },
+        lines: [read_one_line_comment(rdr)]/~,
+        pos: p
+    });
 }
 
-fn read_line_comments(rdr: string_reader, code_to_the_left: bool) -> cmnt {
+fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
+                                                        &comments: [cmnt]/~) {
     #debug(">>> line comments");
     let p = rdr.chpos;
     let mut lines: [str]/~ = []/~;
     while rdr.curr == '/' && nextch(rdr) == '/' {
         let line = read_one_line_comment(rdr);
         log(debug, line);
+        if is_doc_comment(line) { // doc-comments are not put in comments
+            break;
+        }
         vec::push(lines, line);
         consume_non_eol_whitespace(rdr);
     }
     #debug("<<< line comments");
-    ret {style: if code_to_the_left { trailing } else { isolated },
-         lines: lines,
-         pos: p};
+    if !lines.is_empty() {
+        vec::push(comments, {
+            style: if code_to_the_left { trailing } else { isolated },
+            lines: lines,
+            pos: p
+        });
+    }
 }
 
 fn all_whitespace(s: str, begin: uint, end: uint) -> bool {
@@ -101,13 +189,27 @@ fn trim_whitespace_prefix_and_push_line(&lines: [str]/~,
     vec::push(lines, s1);
 }
 
-fn read_block_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt {
+fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
+                                                        &comments: [cmnt]/~) {
     #debug(">>> block comment");
     let p = rdr.chpos;
     let mut lines: [str]/~ = []/~;
     let mut col: uint = rdr.col;
     bump(rdr);
     bump(rdr);
+
+    // doc-comments are not really comments, they are attributes
+    if rdr.curr == '*' || rdr.curr == '!' {
+        while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
+            bump(rdr);
+        }
+        if !is_eof(rdr) {
+            bump(rdr);
+            bump(rdr);
+        }
+        ret;
+    }
+
     let mut curr_line = "/*";
     let mut level: int = 1;
     while level > 0 {
@@ -143,7 +245,7 @@ fn read_block_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt {
         style = mixed;
     }
     #debug("<<< block comment");
-    ret {style: style, lines: lines, pos: p};
+    vec::push(comments, {style: style, lines: lines, pos: p});
 }
 
 fn peeking_at_comment(rdr: string_reader) -> bool {
@@ -156,11 +258,11 @@ fn consume_comment(rdr: string_reader, code_to_the_left: bool,
                    &comments: [cmnt]/~) {
     #debug(">>> consume comment");
     if rdr.curr == '/' && nextch(rdr) == '/' {
-        vec::push(comments, read_line_comments(rdr, code_to_the_left));
+        read_line_comments(rdr, code_to_the_left, comments);
     } else if rdr.curr == '/' && nextch(rdr) == '*' {
-        vec::push(comments, read_block_comment(rdr, code_to_the_left));
+        read_block_comment(rdr, code_to_the_left, comments);
     } else if rdr.curr == '#' && nextch(rdr) == '!' {
-        vec::push(comments, read_shebang_comment(rdr, code_to_the_left));
+        read_shebang_comment(rdr, code_to_the_left, comments);
     } else { fail; }
     #debug("<<< consume comment");
 }
diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs
index 8687e011635..fec6d23a03b 100644
--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@@ -161,7 +161,11 @@ impl tt_reader_as_reader of reader for tt_reader {
 }
 
 fn string_advance_token(&&r: string_reader) {
-    consume_whitespace_and_comments(r);
+    for consume_whitespace_and_comments(r).each {|comment|
+        r.peek_tok = comment.tok;
+        r.peek_span = comment.sp;
+        ret;
+    }
 
     if is_eof(r) {
         r.peek_tok = token::EOF;
@@ -277,22 +281,41 @@ fn is_hex_digit(c: char) -> bool {
 
 fn is_bin_digit(c: char) -> bool { ret c == '0' || c == '1'; }
 
-fn consume_whitespace_and_comments(rdr: string_reader) {
+// might return a sugared-doc-attr
+fn consume_whitespace_and_comments(rdr: string_reader)
+                                -> option<{tok: token::token, sp: span}> {
     while is_whitespace(rdr.curr) { bump(rdr); }
     ret consume_any_line_comment(rdr);
 }
 
-fn consume_any_line_comment(rdr: string_reader) {
+// might return a sugared-doc-attr
+fn consume_any_line_comment(rdr: string_reader)
+                                -> option<{tok: token::token, sp: span}> {
     if rdr.curr == '/' {
         alt nextch(rdr) {
           '/' {
-            while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
-            // Restart whitespace munch.
-
-            ret consume_whitespace_and_comments(rdr);
+            bump(rdr);
+            bump(rdr);
+            // line comments starting with "///" or "//!" are doc-comments
+            if rdr.curr == '/' || rdr.curr == '!' {
+                let start_chpos = rdr.chpos - 2u;
+                let mut acc = "//";
+                while rdr.curr != '\n' && !is_eof(rdr) {
+                    str::push_char(acc, rdr.curr);
+                    bump(rdr);
+                }
+                ret some({
+                    tok: token::DOC_COMMENT(intern(*rdr.interner, @acc)),
+                    sp: ast_util::mk_sp(start_chpos, rdr.chpos)
+                });
+            } else {
+                while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
+                // Restart whitespace munch.
+                ret consume_whitespace_and_comments(rdr);
+            }
           }
           '*' { bump(rdr); bump(rdr); ret consume_block_comment(rdr); }
-          _ { ret; }
+          _ {}
         }
     } else if rdr.curr == '#' {
         if nextch(rdr) == '!' {
@@ -305,9 +328,34 @@ fn consume_any_line_comment(rdr: string_reader) {
             }
         }
     }
+    ret none;
 }
 
-fn consume_block_comment(rdr: string_reader) {
+// might return a sugared-doc-attr
+fn consume_block_comment(rdr: string_reader)
+                                -> option<{tok: token::token, sp: span}> {
+
+    // block comments starting with "/**" or "/*!" are doc-comments
+    if rdr.curr == '*' || rdr.curr == '!' {
+        let start_chpos = rdr.chpos - 2u;
+        let mut acc = "/*";
+        while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
+            str::push_char(acc, rdr.curr);
+            bump(rdr);
+        }
+        if is_eof(rdr) {
+            rdr.fatal("unterminated block doc-comment");
+        } else {
+            acc += "*/";
+            bump(rdr);
+            bump(rdr);
+            ret some({
+                tok: token::DOC_COMMENT(intern(*rdr.interner, @acc)),
+                sp: ast_util::mk_sp(start_chpos, rdr.chpos)
+            });
+        }
+    }
+
     let mut level: int = 1;
     while level > 0 {
         if is_eof(rdr) { rdr.fatal("unterminated block comment"); }
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index 8ab37e95d21..b3db69b5be6 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -80,6 +80,7 @@ enum token {
 
     //ACTUALLY(whole_nonterminal),
 
+    DOC_COMMENT(str_num),
     EOF,
 }
 
@@ -170,11 +171,15 @@ fn to_str(in: interner<@str>, t: token) -> str {
             + str::escape_default(*interner::get(in, s))
             + "\""
       }
+
       /* Name components */
       IDENT(s, _) {
         *interner::get(in, s)
       }
       UNDERSCORE { "_" }
+
+      /* Other */
+      DOC_COMMENT(s) { *interner::get(in, s) }
       EOF { "<eof>" }
     }
 }
author	Gareth Daniel Smith <garethdanielsmith@gmail.com>	2012-06-30 11:54:54 +0100
committer	Gareth Daniel Smith <garethdanielsmith@gmail.com>	2012-06-30 11:54:54 +0100
commit	0b653ab9539140bb04941de9a36c03cf10bfc28b (patch)
tree	491d1b3f128a281ffb3a12240e6858a919f5f3dd /src/libsyntax/parse
parent	d7823de5e2bfc749c2fb4fcfe4d65d54b28e3a92 (diff)
download	rust-0b653ab9539140bb04941de9a36c03cf10bfc28b.tar.gz rust-0b653ab9539140bb04941de9a36c03cf10bfc28b.zip