diff options
| author | Corey Farwell <coreyf@rwell.org> | 2017-02-05 09:14:40 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2017-02-05 09:14:40 -0500 |
| commit | 8163b4b1f2ce90b93ae9b16cae15fc6973e1a569 (patch) | |
| tree | 781c442e33e7e80d70ca173a2a0174beba32ca76 | |
| parent | 4f8ce9efb9ff27af430d398ca472049e3595aaa6 (diff) | |
| parent | bfabe817de438c63777aa8c01d6998b5158f7fdb (diff) | |
| download | rust-8163b4b1f2ce90b93ae9b16cae15fc6973e1a569.tar.gz rust-8163b4b1f2ce90b93ae9b16cae15fc6973e1a569.zip | |
Rollup merge of #39107 - llogiq:branchless_filter_count, r=alexcrichton
branchless .filter(_).count() I found that the branchless version is only slower if we have little to no branch misses, which usually isn't the case. I notice speedups between -5% (perfect prediction) and 60% (real world data).
| -rw-r--r-- | src/libcore/iter/mod.rs | 22 | ||||
| -rw-r--r-- | src/libcoretest/iter.rs | 6 |
2 files changed, 27 insertions, 1 deletions
diff --git a/src/libcore/iter/mod.rs b/src/libcore/iter/mod.rs index 3999db0d63c..d9b8c5ea589 100644 --- a/src/libcore/iter/mod.rs +++ b/src/libcore/iter/mod.rs @@ -1086,7 +1086,7 @@ impl<I: Iterator, P> Iterator for Filter<I, P> where P: FnMut(&I::Item) -> bool #[inline] fn next(&mut self) -> Option<I::Item> { - for x in self.iter.by_ref() { + for x in &mut self.iter { if (self.predicate)(&x) { return Some(x); } @@ -1099,6 +1099,26 @@ impl<I: Iterator, P> Iterator for Filter<I, P> where P: FnMut(&I::Item) -> bool let (_, upper) = self.iter.size_hint(); (0, upper) // can't know a lower bound, due to the predicate } + + // this special case allows the compiler to make `.filter(_).count()` + // branchless. Barring perfect branch prediction (which is unattainable in + // the general case), this will be much faster in >90% of cases (containing + // virtually all real workloads) and only a tiny bit slower in the rest. + // + // Having this specialization thus allows us to write `.filter(p).count()` + // where we would otherwise write `.map(|x| p(x) as usize).sum()`, which is + // less readable and also less backwards-compatible to Rust before 1.10. + // + // Using the branchless version will also simplify the LLVM byte code, thus + // leaving more budget for LLVM optimizations. + #[inline] + fn count(mut self) -> usize { + let mut count = 0; + for x in &mut self.iter { + count += (self.predicate)(&x) as usize; + } + count + } } #[stable(feature = "rust1", since = "1.0.0")] diff --git a/src/libcoretest/iter.rs b/src/libcoretest/iter.rs index 6d02f76c33d..c7833dbd156 100644 --- a/src/libcoretest/iter.rs +++ b/src/libcoretest/iter.rs @@ -192,6 +192,12 @@ fn test_iterator_enumerate_count() { } #[test] +fn test_iterator_filter_count() { + let xs = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + assert_eq!(xs.iter().filter(|&&x| x % 2 == 0).count(), 5); +} + +#[test] fn test_iterator_peekable() { let xs = vec![0, 1, 2, 3, 4, 5]; let mut it = xs.iter().cloned().peekable(); |
