diff options
| author | Gareth Daniel Smith <garethdanielsmith@gmail.com> | 2012-06-30 11:54:54 +0100 |
|---|---|---|
| committer | Gareth Daniel Smith <garethdanielsmith@gmail.com> | 2012-06-30 11:54:54 +0100 |
| commit | 0b653ab9539140bb04941de9a36c03cf10bfc28b (patch) | |
| tree | 491d1b3f128a281ffb3a12240e6858a919f5f3dd /src/libsyntax/parse | |
| parent | d7823de5e2bfc749c2fb4fcfe4d65d54b28e3a92 (diff) | |
| download | rust-0b653ab9539140bb04941de9a36c03cf10bfc28b.tar.gz rust-0b653ab9539140bb04941de9a36c03cf10bfc28b.zip | |
initial draft of fix for issue #2498:
1. make /// ... and //! ... and /** ... */ and /*! ... */ into sugar for #[doc = ...] attributes. 2. add a script in etc/ to help converting doc-attributes to doc-comments 3. add some functions to core::str to help with (1)
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/attr.rs | 86 | ||||
| -rw-r--r-- | src/libsyntax/parse/comments.rs | 128 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer.rs | 66 | ||||
| -rw-r--r-- | src/libsyntax/parse/token.rs | 5 |
4 files changed, 243 insertions, 42 deletions
diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs index a349621091b..3b7f31fb79f 100644 --- a/src/libsyntax/parse/attr.rs +++ b/src/libsyntax/parse/attr.rs @@ -15,7 +15,8 @@ impl parser_attr for parser { -> attr_or_ext { let expect_item_next = vec::is_not_empty(first_item_attrs); - if self.token == token::POUND { + alt self.token { + token::POUND { let lo = self.span.lo; if self.look_ahead(1u) == token::LBRACKET { self.bump(); @@ -30,15 +31,40 @@ impl parser_attr for parser { self.bump(); ret some(right(self.parse_syntax_ext_naked(lo))); } else { ret none; } - } else { ret none; } + } + token::DOC_COMMENT(_) { + ret some(left(self.parse_outer_attributes())); + } + _ { + ret none; + } + } } // Parse attributes that appear before an item fn parse_outer_attributes() -> [ast::attribute]/~ { let mut attrs: [ast::attribute]/~ = []/~; - while self.token == token::POUND - && self.look_ahead(1u) == token::LBRACKET { - vec::push(attrs, self.parse_attribute(ast::attr_outer)); + loop { + alt copy self.token { + token::POUND { + if self.look_ahead(1u) != token::LBRACKET { + break; + } + attrs += [self.parse_attribute(ast::attr_outer)]/~; + } + token::DOC_COMMENT(s) { + let attr = ::attr::mk_sugared_doc_attr( + *self.get_str(s), self.span.lo, self.span.hi); + if attr.node.style != ast::attr_outer { + self.fatal("expected outer comment"); + } + attrs += [attr]/~; + self.bump(); + } + _ { + break; + } + } } ret attrs; } @@ -55,7 +81,8 @@ impl parser_attr for parser { let meta_item = self.parse_meta_item(); self.expect(token::RBRACKET); let mut hi = self.span.hi; - ret spanned(lo, hi, {style: style, value: *meta_item}); + ret spanned(lo, hi, {style: style, value: *meta_item, + is_sugared_doc: false}); } // Parse attributes that appear after the opening of an item, each @@ -68,22 +95,41 @@ impl parser_attr for parser { {inner: [ast::attribute]/~, next: [ast::attribute]/~} { let mut inner_attrs: [ast::attribute]/~ = []/~; let mut next_outer_attrs: [ast::attribute]/~ = []/~; - while self.token == token::POUND { - if self.look_ahead(1u) != token::LBRACKET { - // This is an extension - break; - } - let attr = self.parse_attribute(ast::attr_inner); - if self.token == token::SEMI { + loop { + alt copy self.token { + token::POUND { + if self.look_ahead(1u) != token::LBRACKET { + // This is an extension + break; + } + let attr = self.parse_attribute(ast::attr_inner); + if self.token == token::SEMI { + self.bump(); + inner_attrs += [attr]/~; + } else { + // It's not really an inner attribute + let outer_attr = + spanned(attr.span.lo, attr.span.hi, + {style: ast::attr_outer, value: attr.node.value, + is_sugared_doc: false}); + next_outer_attrs += [outer_attr]/~; + break; + } + } + token::DOC_COMMENT(s) { + let attr = ::attr::mk_sugared_doc_attr( + *self.get_str(s), self.span.lo, self.span.hi); self.bump(); - vec::push(inner_attrs, attr); - } else { - // It's not really an inner attribute - let outer_attr = - spanned(attr.span.lo, attr.span.hi, - {style: ast::attr_outer, value: attr.node.value}); - vec::push(next_outer_attrs, outer_attr); + if attr.node.style == ast::attr_inner { + inner_attrs += [attr]/~; + } else { + next_outer_attrs += [attr]/~; + break; + } + } + _ { break; + } } } ret {inner: inner_attrs, next: next_outer_attrs}; diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index b6ab87ad1bc..7a6a9f0f3d7 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -8,6 +8,7 @@ export cmnt; export lit; export cmnt_style; export gather_comments_and_literals; +export is_doc_comment, doc_comment_style, strip_doc_comment_decoration; enum cmnt_style { isolated, // No code on either side of each line of the comment @@ -18,6 +19,81 @@ enum cmnt_style { type cmnt = {style: cmnt_style, lines: [str]/~, pos: uint}; +fn is_doc_comment(s: str) -> bool { + s.starts_with("///") || + s.starts_with("//!") || + s.starts_with("/**") || + s.starts_with("/*!") +} + +fn doc_comment_style(comment: str) -> ast::attr_style { + assert is_doc_comment(comment); + if comment.starts_with("//!") || comment.starts_with("/*!") { + ast::attr_inner + } else { + ast::attr_outer + } +} + +fn strip_doc_comment_decoration(comment: str) -> str { + + /// remove whitespace-only lines from the start/end of lines + fn vertical_trim(lines: [str]/~) -> [str]/~ { + let mut i = 0u, j = lines.len(); + while i < j && lines[i].trim().is_empty() { + i += 1u; + } + while j > i && lines[j - 1u].trim().is_empty() { + j -= 1u; + } + ret lines.slice(i, j); + } + + // drop leftmost columns that contain only values in chars + fn block_trim(lines: [str]/~, chars: str, max: option<uint>) -> [str]/~ { + + let mut i = max.get_default(uint::max_value); + for lines.each {|line| + if line.trim().is_empty() { + cont; + } + for line.each_chari {|j, c| + if j >= i { + break; + } + if !chars.contains_char(c) { + i = j; + break; + } + } + } + + ret lines.map {|line| + let chars = str::chars(line); + if i > chars.len() { + "" + } else { + str::from_chars(chars.slice(i, chars.len())) + } + }; + } + + if comment.starts_with("//") { + ret comment.slice(3u, comment.len()).trim(); + } + + if comment.starts_with("/*") { + let lines = str::lines_any(comment.slice(3u, comment.len() - 2u)); + let lines = vertical_trim(lines); + let lines = block_trim(lines, "\t ", none); + let lines = block_trim(lines, "*", some(1u)); + let lines = block_trim(lines, "\t ", none); + ret str::connect(lines, "\n"); + } + + fail "not a doc-comment: " + comment; +} + fn read_to_eol(rdr: string_reader) -> str { let mut val = ""; while rdr.curr != '\n' && !is_eof(rdr) { @@ -57,29 +133,41 @@ fn consume_whitespace_counting_blank_lines(rdr: string_reader, } } -fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt { + +fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool, + &comments: [cmnt]/~) { #debug(">>> shebang comment"); let p = rdr.chpos; #debug("<<< shebang comment"); - ret {style: if code_to_the_left { trailing } else { isolated }, - lines: [read_one_line_comment(rdr)]/~, - pos: p}; + vec::push(comments, { + style: if code_to_the_left { trailing } else { isolated }, + lines: [read_one_line_comment(rdr)]/~, + pos: p + }); } -fn read_line_comments(rdr: string_reader, code_to_the_left: bool) -> cmnt { +fn read_line_comments(rdr: string_reader, code_to_the_left: bool, + &comments: [cmnt]/~) { #debug(">>> line comments"); let p = rdr.chpos; let mut lines: [str]/~ = []/~; while rdr.curr == '/' && nextch(rdr) == '/' { let line = read_one_line_comment(rdr); log(debug, line); + if is_doc_comment(line) { // doc-comments are not put in comments + break; + } vec::push(lines, line); consume_non_eol_whitespace(rdr); } #debug("<<< line comments"); - ret {style: if code_to_the_left { trailing } else { isolated }, - lines: lines, - pos: p}; + if !lines.is_empty() { + vec::push(comments, { + style: if code_to_the_left { trailing } else { isolated }, + lines: lines, + pos: p + }); + } } fn all_whitespace(s: str, begin: uint, end: uint) -> bool { @@ -101,13 +189,27 @@ fn trim_whitespace_prefix_and_push_line(&lines: [str]/~, vec::push(lines, s1); } -fn read_block_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt { +fn read_block_comment(rdr: string_reader, code_to_the_left: bool, + &comments: [cmnt]/~) { #debug(">>> block comment"); let p = rdr.chpos; let mut lines: [str]/~ = []/~; let mut col: uint = rdr.col; bump(rdr); bump(rdr); + + // doc-comments are not really comments, they are attributes + if rdr.curr == '*' || rdr.curr == '!' { + while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) { + bump(rdr); + } + if !is_eof(rdr) { + bump(rdr); + bump(rdr); + } + ret; + } + let mut curr_line = "/*"; let mut level: int = 1; while level > 0 { @@ -143,7 +245,7 @@ fn read_block_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt { style = mixed; } #debug("<<< block comment"); - ret {style: style, lines: lines, pos: p}; + vec::push(comments, {style: style, lines: lines, pos: p}); } fn peeking_at_comment(rdr: string_reader) -> bool { @@ -156,11 +258,11 @@ fn consume_comment(rdr: string_reader, code_to_the_left: bool, &comments: [cmnt]/~) { #debug(">>> consume comment"); if rdr.curr == '/' && nextch(rdr) == '/' { - vec::push(comments, read_line_comments(rdr, code_to_the_left)); + read_line_comments(rdr, code_to_the_left, comments); } else if rdr.curr == '/' && nextch(rdr) == '*' { - vec::push(comments, read_block_comment(rdr, code_to_the_left)); + read_block_comment(rdr, code_to_the_left, comments); } else if rdr.curr == '#' && nextch(rdr) == '!' { - vec::push(comments, read_shebang_comment(rdr, code_to_the_left)); + read_shebang_comment(rdr, code_to_the_left, comments); } else { fail; } #debug("<<< consume comment"); } diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 8687e011635..fec6d23a03b 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -161,7 +161,11 @@ impl tt_reader_as_reader of reader for tt_reader { } fn string_advance_token(&&r: string_reader) { - consume_whitespace_and_comments(r); + for consume_whitespace_and_comments(r).each {|comment| + r.peek_tok = comment.tok; + r.peek_span = comment.sp; + ret; + } if is_eof(r) { r.peek_tok = token::EOF; @@ -277,22 +281,41 @@ fn is_hex_digit(c: char) -> bool { fn is_bin_digit(c: char) -> bool { ret c == '0' || c == '1'; } -fn consume_whitespace_and_comments(rdr: string_reader) { +// might return a sugared-doc-attr +fn consume_whitespace_and_comments(rdr: string_reader) + -> option<{tok: token::token, sp: span}> { while is_whitespace(rdr.curr) { bump(rdr); } ret consume_any_line_comment(rdr); } -fn consume_any_line_comment(rdr: string_reader) { +// might return a sugared-doc-attr +fn consume_any_line_comment(rdr: string_reader) + -> option<{tok: token::token, sp: span}> { if rdr.curr == '/' { alt nextch(rdr) { '/' { - while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } - // Restart whitespace munch. - - ret consume_whitespace_and_comments(rdr); + bump(rdr); + bump(rdr); + // line comments starting with "///" or "//!" are doc-comments + if rdr.curr == '/' || rdr.curr == '!' { + let start_chpos = rdr.chpos - 2u; + let mut acc = "//"; + while rdr.curr != '\n' && !is_eof(rdr) { + str::push_char(acc, rdr.curr); + bump(rdr); + } + ret some({ + tok: token::DOC_COMMENT(intern(*rdr.interner, @acc)), + sp: ast_util::mk_sp(start_chpos, rdr.chpos) + }); + } else { + while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } + // Restart whitespace munch. + ret consume_whitespace_and_comments(rdr); + } } '*' { bump(rdr); bump(rdr); ret consume_block_comment(rdr); } - _ { ret; } + _ {} } } else if rdr.curr == '#' { if nextch(rdr) == '!' { @@ -305,9 +328,34 @@ fn consume_any_line_comment(rdr: string_reader) { } } } + ret none; } -fn consume_block_comment(rdr: string_reader) { +// might return a sugared-doc-attr +fn consume_block_comment(rdr: string_reader) + -> option<{tok: token::token, sp: span}> { + + // block comments starting with "/**" or "/*!" are doc-comments + if rdr.curr == '*' || rdr.curr == '!' { + let start_chpos = rdr.chpos - 2u; + let mut acc = "/*"; + while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) { + str::push_char(acc, rdr.curr); + bump(rdr); + } + if is_eof(rdr) { + rdr.fatal("unterminated block doc-comment"); + } else { + acc += "*/"; + bump(rdr); + bump(rdr); + ret some({ + tok: token::DOC_COMMENT(intern(*rdr.interner, @acc)), + sp: ast_util::mk_sp(start_chpos, rdr.chpos) + }); + } + } + let mut level: int = 1; while level > 0 { if is_eof(rdr) { rdr.fatal("unterminated block comment"); } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 8ab37e95d21..b3db69b5be6 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -80,6 +80,7 @@ enum token { //ACTUALLY(whole_nonterminal), + DOC_COMMENT(str_num), EOF, } @@ -170,11 +171,15 @@ fn to_str(in: interner<@str>, t: token) -> str { + str::escape_default(*interner::get(in, s)) + "\"" } + /* Name components */ IDENT(s, _) { *interner::get(in, s) } UNDERSCORE { "_" } + + /* Other */ + DOC_COMMENT(s) { *interner::get(in, s) } EOF { "<eof>" } } } |
