diff options
| author | bors <bors@rust-lang.org> | 2021-12-03 13:20:14 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2021-12-03 13:20:14 +0000 |
| commit | 2a9e0831d6603d87220cedd1b1293e2eb82ef55c (patch) | |
| tree | adc34a1951d77214c5f31dec639ab79ffbcb0810 /compiler/rustc_lexer/src/lib.rs | |
| parent | d47a6cc3f2dab0ef046c2bb7b76a9ee8d1a0be92 (diff) | |
| parent | 1f147a2ed7671cacd8ab423d8979a1ccfa4443ab (diff) | |
| download | rust-2a9e0831d6603d87220cedd1b1293e2eb82ef55c.tar.gz rust-2a9e0831d6603d87220cedd1b1293e2eb82ef55c.zip | |
Auto merge of #91393 - Julian-Wollersberger:lexer_optimization, r=petrochenkov
Optimize `rustc_lexer` The `cursor.first()` method in `rustc_lexer` now calls the `chars.next()` method instead of `chars.nth_char(0)`. This allows LLVM to optimize the code better. The biggest win is that `eat_while()` is now fully inlined and generates better assembly. This improves the lexer's performance by 35% in a micro-benchmark I made (Lexing all 18MB of code in the compiler directory). But lexing is only a small part of the overall compilation time, so I don't know how significant it is. Big thanks to criterion and `cargo asm`.
Diffstat (limited to 'compiler/rustc_lexer/src/lib.rs')
| -rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 20 |
1 files changed, 7 insertions, 13 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 44b002fa93f..5b8300ab530 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -227,14 +227,15 @@ pub fn first_token(input: &str) -> Token { } /// Creates an iterator that produces tokens from the input string. -pub fn tokenize(mut input: &str) -> impl Iterator<Item = Token> + '_ { +pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ { + let mut cursor = Cursor::new(input); std::iter::from_fn(move || { - if input.is_empty() { - return None; + if cursor.is_eof() { + None + } else { + cursor.reset_len_consumed(); + Some(cursor.advance_token()) } - let token = first_token(input); - input = &input[token.len..]; - Some(token) }) } @@ -832,11 +833,4 @@ impl Cursor<'_> { self.eat_while(is_id_continue); } - - /// Eats symbols while predicate returns true or until the end of file is reached. - fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { - while predicate(self.first()) && !self.is_eof() { - self.bump(); - } - } } |
