diff options
| author | Brian Anderson <banderson@mozilla.com> | 2012-11-16 18:54:48 -0800 |
|---|---|---|
| committer | Brian Anderson <banderson@mozilla.com> | 2012-11-17 18:38:39 -0800 |
| commit | 1a1e99c27d0d701f315926f400aa325ddfc8a9e7 (patch) | |
| tree | 27286028e54cd6af98f1a3294c43984306b2215e /src/libsyntax/parse | |
| parent | 428c58b9f983d31f9c7df2d48d45f6a22996692e (diff) | |
| parent | e621e68c60d02bb33a2d808071f3f07674db871c (diff) | |
| download | rust-1a1e99c27d0d701f315926f400aa325ddfc8a9e7.tar.gz rust-1a1e99c27d0d701f315926f400aa325ddfc8a9e7.zip | |
Merge remote-tracking branch 'brson/codemap'
Conflicts: src/libsyntax/ext/source_util.rs
Diffstat (limited to 'src/libsyntax/parse')
| -rw-r--r-- | src/libsyntax/parse/attr.rs | 4 | ||||
| -rw-r--r-- | src/libsyntax/parse/comments.rs | 29 | ||||
| -rw-r--r-- | src/libsyntax/parse/common.rs | 2 | ||||
| -rw-r--r-- | src/libsyntax/parse/eval.rs | 15 | ||||
| -rw-r--r-- | src/libsyntax/parse/lexer.rs | 103 | ||||
| -rw-r--r-- | src/libsyntax/parse/parser.rs | 27 |
6 files changed, 101 insertions, 79 deletions
diff --git a/src/libsyntax/parse/attr.rs b/src/libsyntax/parse/attr.rs index 42101a431d6..f0cb1d4ba3e 100644 --- a/src/libsyntax/parse/attr.rs +++ b/src/libsyntax/parse/attr.rs @@ -14,7 +14,7 @@ trait parser_attr { -> attr_or_ext; fn parse_outer_attributes() -> ~[ast::attribute]; fn parse_attribute(style: ast::attr_style) -> ast::attribute; - fn parse_attribute_naked(style: ast::attr_style, lo: uint) -> + fn parse_attribute_naked(style: ast::attr_style, lo: BytePos) -> ast::attribute; fn parse_inner_attrs_and_next() -> {inner: ~[ast::attribute], next: ~[ast::attribute]}; @@ -85,7 +85,7 @@ impl Parser: parser_attr { return self.parse_attribute_naked(style, lo); } - fn parse_attribute_naked(style: ast::attr_style, lo: uint) -> + fn parse_attribute_naked(style: ast::attr_style, lo: BytePos) -> ast::attribute { self.expect(token::LBRACKET); let meta_item = self.parse_meta_item(); diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index 4f265e1919c..2a8bbe3b6d8 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -3,6 +3,7 @@ use io::ReaderUtil; use util::interner; use lexer::{string_reader, bump, is_eof, nextch, is_whitespace, get_str_from, reader}; +use codemap::{FileMap, CharPos}; export cmnt; export lit; @@ -27,7 +28,7 @@ impl cmnt_style : cmp::Eq { } } -type cmnt = {style: cmnt_style, lines: ~[~str], pos: uint}; +type cmnt = {style: cmnt_style, lines: ~[~str], pos: BytePos}; fn is_doc_comment(s: ~str) -> bool { s.starts_with(~"///") || @@ -130,13 +131,13 @@ fn consume_non_eol_whitespace(rdr: string_reader) { fn push_blank_line_comment(rdr: string_reader, comments: &mut ~[cmnt]) { debug!(">>> blank-line comment"); let v: ~[~str] = ~[]; - comments.push({style: blank_line, lines: v, pos: rdr.chpos}); + comments.push({style: blank_line, lines: v, pos: rdr.last_pos}); } fn consume_whitespace_counting_blank_lines(rdr: string_reader, comments: &mut ~[cmnt]) { while is_whitespace(rdr.curr) && !is_eof(rdr) { - if rdr.col == 0u && rdr.curr == '\n' { + if rdr.col == CharPos(0u) && rdr.curr == '\n' { push_blank_line_comment(rdr, comments); } bump(rdr); @@ -147,7 +148,7 @@ fn consume_whitespace_counting_blank_lines(rdr: string_reader, fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool, comments: &mut ~[cmnt]) { debug!(">>> shebang comment"); - let p = rdr.chpos; + let p = rdr.last_pos; debug!("<<< shebang comment"); comments.push({ style: if code_to_the_left { trailing } else { isolated }, @@ -159,7 +160,7 @@ fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool, fn read_line_comments(rdr: string_reader, code_to_the_left: bool, comments: &mut ~[cmnt]) { debug!(">>> line comments"); - let p = rdr.chpos; + let p = rdr.last_pos; let mut lines: ~[~str] = ~[]; while rdr.curr == '/' && nextch(rdr) == '/' { let line = read_one_line_comment(rdr); @@ -180,6 +181,8 @@ fn read_line_comments(rdr: string_reader, code_to_the_left: bool, } } +// FIXME #3961: This is not the right way to convert string byte +// offsets to characters. fn all_whitespace(s: ~str, begin: uint, end: uint) -> bool { let mut i: uint = begin; while i != end { @@ -189,9 +192,11 @@ fn all_whitespace(s: ~str, begin: uint, end: uint) -> bool { } fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str], - s: ~str, col: uint) { + s: ~str, col: CharPos) { let mut s1; let len = str::len(s); + // FIXME #3961: Doing bytewise comparison and slicing with CharPos + let col = col.to_uint(); if all_whitespace(s, 0u, uint::min(len, col)) { if col < len { s1 = str::slice(s, col, len); @@ -204,9 +209,9 @@ fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str], fn read_block_comment(rdr: string_reader, code_to_the_left: bool, comments: &mut ~[cmnt]) { debug!(">>> block comment"); - let p = rdr.chpos; + let p = rdr.last_pos; let mut lines: ~[~str] = ~[]; - let mut col: uint = rdr.col; + let mut col: CharPos = rdr.col; bump(rdr); bump(rdr); @@ -279,7 +284,7 @@ fn consume_comment(rdr: string_reader, code_to_the_left: bool, debug!("<<< consume comment"); } -type lit = {lit: ~str, pos: uint}; +type lit = {lit: ~str, pos: BytePos}; fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler, path: ~str, @@ -287,8 +292,10 @@ fn gather_comments_and_literals(span_diagnostic: diagnostic::span_handler, {cmnts: ~[cmnt], lits: ~[lit]} { let src = @str::from_bytes(srdr.read_whole_stream()); let itr = parse::token::mk_fake_ident_interner(); - let rdr = lexer::new_low_level_string_reader - (span_diagnostic, codemap::new_filemap(path, src, 0u, 0u), itr); + let cm = CodeMap::new(); + let filemap = cm.new_filemap(path, src); + let rdr = lexer::new_low_level_string_reader( + span_diagnostic, filemap, itr); let mut comments: ~[cmnt] = ~[]; let mut literals: ~[lit] = ~[]; diff --git a/src/libsyntax/parse/common.rs b/src/libsyntax/parse/common.rs index 50c22c08f4f..1811951fc0e 100644 --- a/src/libsyntax/parse/common.rs +++ b/src/libsyntax/parse/common.rs @@ -205,7 +205,7 @@ impl Parser: parser_common { if self.token == token::GT { self.bump(); } else if self.token == token::BINOP(token::SHR) { - self.swap(token::GT, self.span.lo + 1u, self.span.hi); + self.swap(token::GT, self.span.lo + BytePos(1u), self.span.hi); } else { let mut s: ~str = ~"expected `"; s += token_to_str(self.reader, token::GT); diff --git a/src/libsyntax/parse/eval.rs b/src/libsyntax/parse/eval.rs index 56c9d4de9f3..f08f1954464 100644 --- a/src/libsyntax/parse/eval.rs +++ b/src/libsyntax/parse/eval.rs @@ -62,12 +62,10 @@ fn parse_companion_mod(cx: ctx, prefix: &Path, suffix: &Option<Path>) let modpath = &companion_file(prefix, suffix); if file_exists(modpath) { debug!("found companion mod"); - let (p0, r0) = new_parser_etc_from_file(cx.sess, cx.cfg, - modpath, SOURCE_FILE); + let p0 = new_parser_from_file(cx.sess, cx.cfg, + modpath, SOURCE_FILE); let inner_attrs = p0.parse_inner_attrs_and_next(); let m0 = p0.parse_mod_items(token::EOF, inner_attrs.next); - cx.sess.chpos = r0.chpos; - cx.sess.byte_pos = cx.sess.byte_pos + r0.pos; return (m0.view_items, m0.items, inner_attrs.inner); } else { return (~[], ~[], ~[]); @@ -93,9 +91,9 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: &Path, } else { prefix.push_many(file_path.components) }; - let (p0, r0) = - new_parser_etc_from_file(cx.sess, cx.cfg, - &full_path, SOURCE_FILE); + let p0 = + new_parser_from_file(cx.sess, cx.cfg, + &full_path, SOURCE_FILE); let inner_attrs = p0.parse_inner_attrs_and_next(); let mod_attrs = vec::append(attrs, inner_attrs.inner); let first_item_outer_attrs = inner_attrs.next; @@ -104,9 +102,6 @@ fn eval_crate_directive(cx: ctx, cdir: @ast::crate_directive, prefix: &Path, let i = p0.mk_item(cdir.span.lo, cdir.span.hi, /* FIXME (#2543) */ copy id, ast::item_mod(m0), vis, mod_attrs); - // Thread defids, chpos and byte_pos through the parsers - cx.sess.chpos = r0.chpos; - cx.sess.byte_pos = cx.sess.byte_pos + r0.pos; items.push(i); } ast::cdir_dir_mod(vis, id, cdirs, attrs) => { diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index 482813f3fd0..d65be043f86 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -1,5 +1,5 @@ use diagnostic::span_handler; -use codemap::span; +use codemap::{span, CodeMap, CharPos, BytePos}; use ext::tt::transcribe::{tt_reader, new_tt_reader, dup_tt_reader, tt_next_token}; @@ -21,11 +21,15 @@ trait reader { type string_reader = @{ span_diagnostic: span_handler, src: @~str, - mut col: uint, - mut pos: uint, + // The absolute offset within the codemap of the next character to read + mut pos: BytePos, + // The absolute offset within the codemap of the last character read(curr) + mut last_pos: BytePos, + // The column of the next character to read + mut col: CharPos, + // The last character to be read mut curr: char, - mut chpos: uint, - filemap: codemap::filemap, + filemap: @codemap::FileMap, interner: @token::ident_interner, /* cached: */ mut peek_tok: token::Token, @@ -33,7 +37,7 @@ type string_reader = @{ }; fn new_string_reader(span_diagnostic: span_handler, - filemap: codemap::filemap, + filemap: @codemap::FileMap, itr: @token::ident_interner) -> string_reader { let r = new_low_level_string_reader(span_diagnostic, filemap, itr); string_advance_token(r); /* fill in peek_* */ @@ -42,27 +46,29 @@ fn new_string_reader(span_diagnostic: span_handler, /* For comments.rs, which hackily pokes into 'pos' and 'curr' */ fn new_low_level_string_reader(span_diagnostic: span_handler, - filemap: codemap::filemap, + filemap: @codemap::FileMap, itr: @token::ident_interner) -> string_reader { + // Force the initial reader bump to start on a fresh line + let initial_char = '\n'; let r = @{span_diagnostic: span_diagnostic, src: filemap.src, - mut col: 0u, mut pos: 0u, mut curr: -1 as char, - mut chpos: filemap.start_pos.ch, + mut pos: filemap.start_pos, + mut last_pos: filemap.start_pos, + mut col: CharPos(0), + mut curr: initial_char, filemap: filemap, interner: itr, /* dummy values; not read */ mut peek_tok: token::EOF, - mut peek_span: ast_util::mk_sp(0u,0u)}; - if r.pos < (*filemap.src).len() { - let next = str::char_range_at(*r.src, r.pos); - r.pos = next.next; - r.curr = next.ch; - } + mut peek_span: ast_util::dummy_sp()}; + bump(r); return r; } fn dup_string_reader(&&r: string_reader) -> string_reader { @{span_diagnostic: r.span_diagnostic, src: r.src, - mut col: r.col, mut pos: r.pos, mut curr: r.curr, mut chpos: r.chpos, + mut pos: r.pos, + mut last_pos: r.last_pos, + mut col: r.col, mut curr: r.curr, filemap: r.filemap, interner: r.interner, mut peek_tok: r.peek_tok, mut peek_span: r.peek_span} } @@ -117,34 +123,48 @@ fn string_advance_token(&&r: string_reader) { if is_eof(r) { r.peek_tok = token::EOF; } else { - let start_chpos = r.chpos; + let start_bytepos = r.last_pos; r.peek_tok = next_token_inner(r); - r.peek_span = ast_util::mk_sp(start_chpos, r.chpos); + r.peek_span = ast_util::mk_sp(start_bytepos, r.last_pos); }; } -fn get_str_from(rdr: string_reader, start: uint) -> ~str unsafe { +fn byte_offset(rdr: string_reader) -> BytePos { + (rdr.pos - rdr.filemap.start_pos) +} + +fn get_str_from(rdr: string_reader, start: BytePos) -> ~str unsafe { // I'm pretty skeptical about this subtraction. What if there's a // multi-byte character before the mark? - return str::slice(*rdr.src, start - 1u, rdr.pos - 1u); + return str::slice(*rdr.src, start.to_uint() - 1u, + byte_offset(rdr).to_uint() - 1u); } fn bump(rdr: string_reader) { - if rdr.pos < (*rdr.src).len() { - rdr.col += 1u; - rdr.chpos += 1u; - if rdr.curr == '\n' { - codemap::next_line(rdr.filemap, rdr.chpos, rdr.pos); - rdr.col = 0u; - } - let next = str::char_range_at(*rdr.src, rdr.pos); - rdr.pos = next.next; + rdr.last_pos = rdr.pos; + let current_byte_offset = byte_offset(rdr).to_uint();; + if current_byte_offset < (*rdr.src).len() { + let last_char = rdr.curr; + let next = str::char_range_at(*rdr.src, current_byte_offset); + let byte_offset_diff = next.next - current_byte_offset; + rdr.pos = rdr.pos + BytePos(byte_offset_diff); rdr.curr = next.ch; + rdr.col += CharPos(1u); + if last_char == '\n' { + rdr.filemap.next_line(rdr.last_pos); + rdr.col = CharPos(0u); + } + + if byte_offset_diff > 1 { + rdr.filemap.record_multibyte_char( + BytePos(current_byte_offset), byte_offset_diff); + } } else { + // XXX: What does this accomplish? if (rdr.curr != -1 as char) { - rdr.col += 1u; - rdr.chpos += 1u; + rdr.pos = rdr.pos + BytePos(1u); + rdr.col += CharPos(1u); rdr.curr = -1 as char; } } @@ -153,8 +173,9 @@ fn is_eof(rdr: string_reader) -> bool { rdr.curr == -1 as char } fn nextch(rdr: string_reader) -> char { - if rdr.pos < (*rdr.src).len() { - return str::char_at(*rdr.src, rdr.pos); + let offset = byte_offset(rdr).to_uint(); + if offset < (*rdr.src).len() { + return str::char_at(*rdr.src, offset); } else { return -1 as char; } } @@ -211,7 +232,7 @@ fn consume_any_line_comment(rdr: string_reader) bump(rdr); // line comments starting with "///" or "//!" are doc-comments if rdr.curr == '/' || rdr.curr == '!' { - let start_chpos = rdr.chpos - 2u; + let start_bpos = rdr.pos - BytePos(2u); let mut acc = ~"//"; while rdr.curr != '\n' && !is_eof(rdr) { str::push_char(&mut acc, rdr.curr); @@ -219,7 +240,7 @@ fn consume_any_line_comment(rdr: string_reader) } return Some({ tok: token::DOC_COMMENT(rdr.interner.intern(@acc)), - sp: ast_util::mk_sp(start_chpos, rdr.chpos) + sp: ast_util::mk_sp(start_bpos, rdr.pos) }); } else { while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } @@ -232,10 +253,10 @@ fn consume_any_line_comment(rdr: string_reader) } } else if rdr.curr == '#' { if nextch(rdr) == '!' { - let cmap = codemap::new_codemap(); + let cmap = @CodeMap::new(); (*cmap).files.push(rdr.filemap); - let loc = codemap::lookup_char_pos_adj(cmap, rdr.chpos); - if loc.line == 1u && loc.col == 0u { + let loc = cmap.lookup_char_pos_adj(rdr.last_pos); + if loc.line == 1u && loc.col == CharPos(0u) { while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); } return consume_whitespace_and_comments(rdr); } @@ -250,7 +271,7 @@ fn consume_block_comment(rdr: string_reader) // block comments starting with "/**" or "/*!" are doc-comments if rdr.curr == '*' || rdr.curr == '!' { - let start_chpos = rdr.chpos - 2u; + let start_bpos = rdr.pos - BytePos(2u); let mut acc = ~"/*"; while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) { str::push_char(&mut acc, rdr.curr); @@ -264,7 +285,7 @@ fn consume_block_comment(rdr: string_reader) bump(rdr); return Some({ tok: token::DOC_COMMENT(rdr.interner.intern(@acc)), - sp: ast_util::mk_sp(start_chpos, rdr.chpos) + sp: ast_util::mk_sp(start_bpos, rdr.pos) }); } } else { @@ -590,7 +611,7 @@ fn next_token_inner(rdr: string_reader) -> token::Token { return token::LIT_INT(c2 as i64, ast::ty_char); } '"' => { - let n = rdr.chpos; + let n = byte_offset(rdr); bump(rdr); while rdr.curr != '"' { if is_eof(rdr) { diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 49c3d38ce55..2f96f6ba0a0 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -5,7 +5,7 @@ use either::{Either, Left, Right}; use std::map::HashMap; use token::{can_begin_expr, is_ident, is_ident_or_path, is_plain_ident, INTERPOLATED, special_idents}; -use codemap::{span,fss_none}; +use codemap::{span,FssNone, BytePos}; use util::interner::Interner; use ast_util::{spanned, respan, mk_sp, ident_to_path, operator_prec}; use lexer::reader; @@ -244,7 +244,7 @@ impl Parser { self.token = next.tok; self.span = next.sp; } - fn swap(next: token::Token, lo: uint, hi: uint) { + fn swap(next: token::Token, +lo: BytePos, +hi: BytePos) { self.token = next; self.span = mk_sp(lo, hi); } @@ -906,12 +906,12 @@ impl Parser { return spanned(lo, e.span.hi, {mutbl: m, ident: i, expr: e}); } - fn mk_expr(lo: uint, hi: uint, +node: expr_) -> @expr { + fn mk_expr(+lo: BytePos, +hi: BytePos, +node: expr_) -> @expr { return @{id: self.get_id(), callee_id: self.get_id(), node: node, span: mk_sp(lo, hi)}; } - fn mk_mac_expr(lo: uint, hi: uint, m: mac_) -> @expr { + fn mk_mac_expr(+lo: BytePos, +hi: BytePos, m: mac_) -> @expr { return @{id: self.get_id(), callee_id: self.get_id(), node: expr_mac({node: m, span: mk_sp(lo, hi)}), @@ -1141,7 +1141,7 @@ impl Parser { return self.mk_expr(lo, hi, ex); } - fn parse_block_expr(lo: uint, blk_mode: blk_check_mode) -> @expr { + fn parse_block_expr(lo: BytePos, blk_mode: blk_check_mode) -> @expr { self.expect(token::LBRACE); let blk = self.parse_block_tail(lo, blk_mode); return self.mk_expr(blk.span.lo, blk.span.hi, expr_block(blk)); @@ -1153,7 +1153,7 @@ impl Parser { return self.parse_syntax_ext_naked(lo); } - fn parse_syntax_ext_naked(lo: uint) -> @expr { + fn parse_syntax_ext_naked(lo: BytePos) -> @expr { match self.token { token::IDENT(_, _) => (), _ => self.fatal(~"expected a syntax expander name") @@ -2287,11 +2287,11 @@ impl Parser { // I guess that also means "already parsed the 'impure'" if // necessary, and this should take a qualifier. // some blocks start with "#{"... - fn parse_block_tail(lo: uint, s: blk_check_mode) -> blk { + fn parse_block_tail(lo: BytePos, s: blk_check_mode) -> blk { self.parse_block_tail_(lo, s, ~[]) } - fn parse_block_tail_(lo: uint, s: blk_check_mode, + fn parse_block_tail_(lo: BytePos, s: blk_check_mode, +first_item_attrs: ~[attribute]) -> blk { let mut stmts = ~[]; let mut expr = None; @@ -2589,7 +2589,7 @@ impl Parser { return {ident: id, tps: ty_params}; } - fn mk_item(lo: uint, hi: uint, +ident: ident, + fn mk_item(+lo: BytePos, +hi: BytePos, +ident: ident, +node: item_, vis: visibility, +attrs: ~[attribute]) -> @item { return @{ident: ident, @@ -3041,7 +3041,7 @@ impl Parser { items: items}; } - fn parse_item_foreign_mod(lo: uint, + fn parse_item_foreign_mod(lo: BytePos, visibility: visibility, attrs: ~[attribute], items_allowed: bool) @@ -3096,7 +3096,7 @@ impl Parser { }); } - fn parse_type_decl() -> {lo: uint, ident: ident} { + fn parse_type_decl() -> {lo: BytePos, ident: ident} { let lo = self.last_span.lo; let id = self.parse_ident(); return {lo: lo, ident: id}; @@ -3425,9 +3425,8 @@ impl Parser { }; let m = ast::mac_invoc_tt(pth, tts); let m: ast::mac = {node: m, - span: {lo: self.span.lo, - hi: self.span.hi, - expn_info: None}}; + span: mk_sp(self.span.lo, + self.span.hi)}; let item_ = item_mac(m); return iovi_item(self.mk_item(lo, self.last_span.hi, id, item_, visibility, attrs)); |
