about summary refs log tree commit diff
path: root/src/libcore/tests
diff options
context:
space:
mode:
authorManish Goregaokar <manishsmail@gmail.com>2020-07-11 08:53:16 -0700
committerGitHub <noreply@github.com>2020-07-11 08:53:16 -0700
commit1979fa86f9fd8cc53384d2dabe775bcbf012a5ad (patch)
tree44744e4fcffb6dd96a97cecb0c96357805f19275 /src/libcore/tests
parent084ac77cf29e786df7251392bed0b6e6c7ea8786 (diff)
parenta150dcc872b4f003c4a0e4cd7bb0e7c51ec791b2 (diff)
downloadrust-1979fa86f9fd8cc53384d2dabe775bcbf012a5ad.tar.gz
rust-1979fa86f9fd8cc53384d2dabe775bcbf012a5ad.zip
Rollup merge of #74066 - thomcc:optimize-is-ascii, r=nagisa
Optimize is_ascii for str and [u8].

This optimizes the `is_ascii` function for `[u8]` and `str`. I've been surprised this wasn't done for a while, so I just did it.

Benchmarks comparing before/after look like:

```
test ascii::long_readonly::is_ascii_slice_iter_all              ... bench:         174 ns/iter (+/- 79) = 40172 MB/s
test ascii::long_readonly::is_ascii_slice_libcore               ... bench:          16 ns/iter (+/- 5) = 436875 MB/s
test ascii::medium_readonly::is_ascii_slice_iter_all            ... bench:          12 ns/iter (+/- 3) = 2666 MB/s
test ascii::medium_readonly::is_ascii_slice_libcore             ... bench:           2 ns/iter (+/- 0) = 16000 MB/s
test ascii::short_readonly::is_ascii_slice_iter_all             ... bench:           3 ns/iter (+/- 0) = 2333 MB/s
test ascii::short_readonly::is_ascii_slice_libcore              ... bench:           4 ns/iter (+/- 0) = 1750 MB/s
```

(Taken on a x86_64 macbook 2.9 GHz Intel Core i9 with 6 cores)

Where `is_ascii_slice_iter_all` is the old version, and `is_ascii_slice_libcore` is the new.

I tried to document the code well, so hopefully it's understandable. It has fairly exhaustive tests ensuring size/align doesn't get violated -- because `miri` doesn't really help a lot for this sort of code right now, I tried to `debug_assert` all the safety invariants I'm depending on. (Of course, none of them are required for correctness or soundness -- just allows us to test that this sort of pointer manipulation is sound and such).

Anyway, thanks. Let me know if you have questions/desired changes.
Diffstat (limited to 'src/libcore/tests')
-rw-r--r--src/libcore/tests/ascii.rs56
1 files changed, 56 insertions, 0 deletions
diff --git a/src/libcore/tests/ascii.rs b/src/libcore/tests/ascii.rs
index 71275d40c46..57f2de16b2b 100644
--- a/src/libcore/tests/ascii.rs
+++ b/src/libcore/tests/ascii.rs
@@ -343,3 +343,59 @@ fn test_is_ascii_control() {
         " ",
     );
 }
+
+// `is_ascii` does a good amount of pointer manipulation and has
+// alignment-dependent computation. This is all sanity-checked via
+// `debug_assert!`s, so we test various sizes/alignments thoroughly versus an
+// "obviously correct" baseline function.
+#[test]
+fn test_is_ascii_align_size_thoroughly() {
+    // The "obviously-correct" baseline mentioned above.
+    fn is_ascii_baseline(s: &[u8]) -> bool {
+        s.iter().all(|b| b.is_ascii())
+    }
+
+    // Helper to repeat `l` copies of `b0` followed by `l` copies of `b1`.
+    fn repeat_concat(b0: u8, b1: u8, l: usize) -> Vec<u8> {
+        use core::iter::repeat;
+        repeat(b0).take(l).chain(repeat(b1).take(l)).collect()
+    }
+
+    // Miri is too slow for much of this, and in miri `align_offset` always
+    // returns `usize::max_value()` anyway (at the moment), so we just test
+    // lightly.
+    let iter = if cfg!(miri) { 0..5 } else { 0..100 };
+
+    for i in iter {
+        #[cfg(not(miri))]
+        let cases = &[
+            b"a".repeat(i),
+            b"\0".repeat(i),
+            b"\x7f".repeat(i),
+            b"\x80".repeat(i),
+            b"\xff".repeat(i),
+            repeat_concat(b'a', 0x80u8, i),
+            repeat_concat(0x80u8, b'a', i),
+        ];
+
+        #[cfg(miri)]
+        let cases = &[repeat_concat(b'a', 0x80u8, i)];
+
+        for case in cases {
+            for pos in 0..=case.len() {
+                // Potentially misaligned head
+                let prefix = &case[pos..];
+                assert_eq!(is_ascii_baseline(prefix), prefix.is_ascii(),);
+
+                // Potentially misaligned tail
+                let suffix = &case[..case.len() - pos];
+
+                assert_eq!(is_ascii_baseline(suffix), suffix.is_ascii(),);
+
+                // Both head and tail are potentially misaligned
+                let mid = &case[(pos / 2)..(case.len() - (pos / 2))];
+                assert_eq!(is_ascii_baseline(mid), mid.is_ascii(),);
+            }
+        }
+    }
+}