diff options
| author | Marijn Haverbeke <marijnh@gmail.com> | 2011-04-08 16:48:17 +0200 |
|---|---|---|
| committer | Marijn Haverbeke <marijnh@gmail.com> | 2011-04-09 01:05:18 +0200 |
| commit | 094d31f5e40d086a3f1aeb4ff7ea93f59a755d4e (patch) | |
| tree | 703dec3bdbb9df9309f8d7ea198254063170237e /src | |
| parent | cae703c0b1fabbc6ed3f6501198778d18a110fc2 (diff) | |
| download | rust-094d31f5e40d086a3f1aeb4ff7ea93f59a755d4e.tar.gz rust-094d31f5e40d086a3f1aeb4ff7ea93f59a755d4e.zip | |
Make lexer buffer the whole file
This way, it won't have to go through a bunch of calls for each byte fetched.
Diffstat (limited to 'src')
| -rw-r--r-- | src/comp/front/lexer.rs | 125 | ||||
| -rw-r--r-- | src/lib/io.rs | 8 |
2 files changed, 74 insertions, 59 deletions
diff --git a/src/comp/front/lexer.rs b/src/comp/front/lexer.rs index 5a9c2b11dc0..fb3a7e2706a 100644 --- a/src/comp/front/lexer.rs +++ b/src/comp/front/lexer.rs @@ -11,24 +11,26 @@ import util.common; import util.common.new_str_hash; state type reader = state obj { - fn is_eof() -> bool; - fn curr() -> char; - fn next() -> char; - impure fn bump(); - fn mark(); - fn get_filename() -> str; - fn get_mark_pos() -> common.pos; - fn get_curr_pos() -> common.pos; - fn get_keywords() -> hashmap[str,token.token]; - fn get_reserved() -> hashmap[str,()]; + fn is_eof() -> bool; + fn curr() -> char; + fn next() -> char; + impure fn init(); + impure fn bump(); + fn mark(); + fn get_filename() -> str; + fn get_mark_pos() -> common.pos; + fn get_curr_pos() -> common.pos; + fn get_keywords() -> hashmap[str,token.token]; + fn get_reserved() -> hashmap[str,()]; }; impure fn new_reader(io.reader rdr, str filename) -> reader { - state obj reader(io.reader rdr, + state obj reader(str file, str filename, - mutable char c, - mutable char n, + uint len, + mutable uint pos, + mutable char ch, mutable uint mark_line, mutable uint mark_col, mutable uint line, @@ -36,63 +38,68 @@ impure fn new_reader(io.reader rdr, str filename) -> reader hashmap[str,token.token] keywords, hashmap[str,()] reserved) { - fn is_eof() -> bool { - ret c == (-1) as char; - } + fn is_eof() -> bool { + ret ch == -1 as char; + } - fn get_curr_pos() -> common.pos { - ret rec(line=line, col=col); - } + fn get_curr_pos() -> common.pos { + ret rec(line=line, col=col); + } - fn get_mark_pos() -> common.pos { - ret rec(line=mark_line, col=mark_col); - } + fn get_mark_pos() -> common.pos { + ret rec(line=mark_line, col=mark_col); + } - fn get_filename() -> str { - ret filename; - } + fn get_filename() -> str { + ret filename; + } - fn curr() -> char { - ret c; - } + fn curr() -> char { + ret ch; + } - fn next() -> char { - ret n; + fn next() -> char { + if (pos < len) {ret _str.char_at(file, pos);} + else {ret -1 as char;} + } + + impure fn init() { + if (pos < len) { + auto next = _str.char_range_at(file, pos); + pos = next._1; + ch = next._0; } + } - impure fn bump() { - - let char prev = c; - - c = n; - - if (c == (-1) as char) { - ret; - } - - if (prev == '\n') { + impure fn bump() { + if (pos < len) { + if (ch == '\n') { line += 1u; col = 0u; } else { col += 1u; } - - n = rdr.read_char(); + auto next = _str.char_range_at(file, pos); + pos = next._1; + ch = next._0; + } else { + ch = -1 as char; } + } - fn mark() { - mark_line = line; - mark_col = col; - } + fn mark() { + mark_line = line; + mark_col = col; + } - fn get_keywords() -> hashmap[str,token.token] { - ret keywords; - } + fn get_keywords() -> hashmap[str,token.token] { + ret keywords; + } - fn get_reserved() -> hashmap[str,()] { - ret reserved; - } + fn get_reserved() -> hashmap[str,()] { + ret reserved; } + } auto keywords = new_str_hash[token.token](); @@ -208,13 +215,14 @@ impure fn new_reader(io.reader rdr, str filename) -> reader reserved.insert("m128", ()); // IEEE 754-2008 'decimal128' reserved.insert("dec", ()); // One of m32, m64, m128 - ret reader(rdr, filename, rdr.read_char(), - rdr.read_char(), 1u, 0u, 1u, 0u, keywords, reserved); + auto file = _str.unsafe_from_bytes(rdr.read_whole_stream()); + auto rd = reader(file, filename, _str.byte_len(file), 0u, -1 as char, + 1u, 0u, 1u, 0u, keywords, reserved); + rd.init(); + ret rd; } - - fn in_range(char c, char lo, char hi) -> bool { ret lo <= c && c <= hi; } @@ -689,7 +697,6 @@ impure fn next_token(reader rdr) -> token.token { case ('"') { rdr.bump(); - // FIXME: general utf8-consumption support. while (rdr.curr() != '"') { alt (rdr.curr()) { case ('\\') { @@ -850,7 +857,7 @@ impure fn read_block_comment(reader rdr) -> cmnt { impure fn gather_comments(str path) -> vec[cmnt] { auto srdr = io.file_reader(path); - auto rdr = lexer.new_reader(srdr, path); + auto rdr = new_reader(srdr, path); let vec[cmnt] comments = vec(); while (!rdr.is_eof()) { while (true) { diff --git a/src/lib/io.rs b/src/lib/io.rs index 4a3274ac999..61f06795444 100644 --- a/src/lib/io.rs +++ b/src/lib/io.rs @@ -41,6 +41,7 @@ type reader = impure fn read_le_uint(uint size) -> uint; impure fn read_le_int(uint size) -> int; impure fn read_be_uint(uint size) -> uint; + impure fn read_whole_stream() -> vec[u8]; impure fn seek(int offset, seek_style whence); impure fn tell() -> uint; // FIXME: eventually u64 @@ -170,6 +171,13 @@ state obj new_reader(buf_reader rdr) { } ret val; } + impure fn read_whole_stream() -> vec[u8] { + let vec[u8] buf = vec(); + while (!rdr.eof()) { + buf += rdr.read(2048u); + } + ret buf; + } impure fn seek(int offset, seek_style whence) { ret rdr.seek(offset, whence); } |
