about summary refs log tree commit diff
path: root/src/libcore/tests
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2018-01-01 19:04:33 +0000
committerbors <bors@rust-lang.org>2018-01-01 19:04:33 +0000
commitb65f0bedd2f22d9661ecb7092f07746dc2ccfb0d (patch)
tree142a5b0bdf6197e8a87847d881d9b8616494f3ff /src/libcore/tests
parent5deba220d4c42b5313d7e71731ce5e8698866684 (diff)
parent5cf55165fae5c8538db5c00e252ad9ba42aaf246 (diff)
downloadrust-b65f0bedd2f22d9661ecb7092f07746dc2ccfb0d.tar.gz
rust-b65f0bedd2f22d9661ecb7092f07746dc2ccfb0d.zip
Auto merge of #46735 - Manishearth:memchr-find, r=burntsushi
Use memchr for str::find(char)

This is a 10x improvement for searching for characters.

This also contains the patches from https://github.com/rust-lang/rust/pull/46713 . Feel free to land both separately or together.

cc @mystor @alexcrichton

r? @bluss

fixes #46693
Diffstat (limited to 'src/libcore/tests')
-rw-r--r--src/libcore/tests/lib.rs2
-rw-r--r--src/libcore/tests/pattern.rs264
2 files changed, 266 insertions, 0 deletions
diff --git a/src/libcore/tests/lib.rs b/src/libcore/tests/lib.rs
index 0e445cdac35..c4b85b82981 100644
--- a/src/libcore/tests/lib.rs
+++ b/src/libcore/tests/lib.rs
@@ -28,6 +28,7 @@
 #![feature(iter_rfind)]
 #![feature(iter_rfold)]
 #![feature(nonzero)]
+#![feature(pattern)]
 #![feature(raw)]
 #![feature(refcell_replace_swap)]
 #![feature(sip_hash_13)]
@@ -61,6 +62,7 @@ mod nonzero;
 mod num;
 mod ops;
 mod option;
+mod pattern;
 mod ptr;
 mod result;
 mod slice;
diff --git a/src/libcore/tests/pattern.rs b/src/libcore/tests/pattern.rs
new file mode 100644
index 00000000000..d0fd15263b2
--- /dev/null
+++ b/src/libcore/tests/pattern.rs
@@ -0,0 +1,264 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::str::pattern::*;
+
+// This macro makes it easier to write
+// tests that do a series of iterations
+macro_rules! search_asserts {
+    ($haystack:expr, $needle:expr, $testname:expr, [$($func:ident),*], $result:expr) => {
+        let mut searcher = $needle.into_searcher($haystack);
+        let arr = [$( Step::from(searcher.$func()) ),+];
+        assert_eq!(&arr[..], &$result, $testname);
+    }
+}
+
+/// Combined enum for the results of next() and next_match()/next_reject()
+#[derive(Debug, PartialEq, Eq)]
+enum Step {
+    // variant names purposely chosen to
+    // be the same length for easy alignment
+    Matches(usize, usize),
+    Rejects(usize, usize),
+    InRange(usize, usize),
+    Done
+}
+
+use self::Step::*;
+
+impl From<SearchStep> for Step {
+    fn from(x: SearchStep) -> Self {
+        match x {
+            SearchStep::Match(a, b) => Matches(a, b),
+            SearchStep::Reject(a, b) => Rejects(a, b),
+            SearchStep::Done => Done
+        }
+    }
+}
+
+impl From<Option<(usize, usize)>> for Step {
+    fn from(x: Option<(usize, usize)>) -> Self {
+        match x {
+            Some((a, b)) => InRange(a, b),
+            None => Done
+        }
+    }
+}
+
+// ignore-tidy-linelength
+
+// FIXME(Manishearth) these tests focus on single-character searching  (CharSearcher)
+// and on next()/next_match(), not next_reject(). This is because
+// the memchr changes make next_match() for single chars complex, but next_reject()
+// continues to use next() under the hood. We should add more test cases for all
+// of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
+
+#[test]
+fn test_simple_iteration() {
+    search_asserts! ("abcdeabcd", 'a', "forward iteration for ASCII string",
+        // a            b              c              d              e              a              b              c              d              EOF
+        [next,          next,          next,          next,          next,          next,          next,          next,          next,          next],
+        [Matches(0, 1), Rejects(1, 2), Rejects(2, 3), Rejects(3, 4), Rejects(4, 5), Matches(5, 6), Rejects(6, 7), Rejects(7, 8), Rejects(8, 9), Done]
+    );
+
+    search_asserts! ("abcdeabcd", 'a', "reverse iteration for ASCII string",
+        // d            c              b              a            e                d              c              b              a             EOF
+        [next_back,     next_back,     next_back,     next_back,     next_back,     next_back,     next_back,     next_back,     next_back,     next_back],
+        [Rejects(8, 9), Rejects(7, 8), Rejects(6, 7), Matches(5, 6), Rejects(4, 5), Rejects(3, 4), Rejects(2, 3), Rejects(1, 2), Matches(0, 1), Done]
+    );
+
+    search_asserts! ("我爱我的猫", '我', "forward iteration for Chinese string",
+        // 我           愛             我             的              貓               EOF
+        [next,          next,          next,          next,           next,            next],
+        [Matches(0, 3), Rejects(3, 6), Matches(6, 9), Rejects(9, 12), Rejects(12, 15), Done]
+    );
+
+    search_asserts! ("我的猫说meow", 'm', "forward iteration for mixed string",
+        // 我           的             猫             说              m                e                o                w                EOF
+        [next,          next,          next,          next,           next,            next,            next,            next,            next],
+        [Rejects(0, 3), Rejects(3, 6), Rejects(6, 9), Rejects(9, 12), Matches(12, 13), Rejects(13, 14), Rejects(14, 15), Rejects(15, 16), Done]
+    );
+
+    search_asserts! ("我的猫说meow", '猫', "reverse iteration for mixed string",
+        // w             o                 e                m                说              猫             的             我             EOF
+        [next_back,       next_back,       next_back,       next_back,       next_back,      next_back,      next_back,    next_back,     next_back],
+        [Rejects(15, 16), Rejects(14, 15), Rejects(13, 14), Rejects(12, 13), Rejects(9, 12), Matches(6, 9), Rejects(3, 6), Rejects(0, 3), Done]
+    );
+}
+
+#[test]
+fn test_simple_search() {
+    search_asserts!("abcdeabcdeabcde", 'a', "next_match for ASCII string",
+        [next_match,    next_match,    next_match,      next_match],
+        [InRange(0, 1), InRange(5, 6), InRange(10, 11), Done]
+    );
+
+    search_asserts!("abcdeabcdeabcde", 'a', "next_match_back for ASCII string",
+        [next_match_back, next_match_back, next_match_back, next_match_back],
+        [InRange(10, 11), InRange(5, 6),   InRange(0, 1),   Done]
+    );
+
+    search_asserts!("abcdeab", 'a', "next_reject for ASCII string",
+        [next_reject,   next_reject,   next_match,    next_reject,   next_reject],
+        [InRange(1, 2), InRange(2, 3), InRange(5, 6), InRange(6, 7), Done]
+    );
+
+    search_asserts!("abcdeabcdeabcde", 'a', "next_reject_back for ASCII string",
+        [next_reject_back, next_reject_back, next_match_back, next_reject_back, next_reject_back, next_reject_back],
+        [InRange(14, 15),  InRange(13, 14),  InRange(10, 11), InRange(9, 10),   InRange(8, 9),    InRange(7, 8)]
+    );
+}
+
+// Á, 각, ก, 😀 all end in 0x81
+// 🁀, ᘀ do not end in 0x81 but contain the byte
+// ꁁ has 0x81 as its second and third bytes.
+//
+// The memchr-using implementation of next_match
+// and next_match_back temporarily violate
+// the property that the search is always on a unicode boundary,
+// which is fine as long as this never reaches next() or next_back().
+// So we test if next() is correct after each next_match() as well.
+const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a";
+
+#[test]
+fn test_stress_indices() {
+    // this isn't really a test, more of documentation on the indices of each character in the stresstest string
+
+    search_asserts!(STRESS, 'x', "Indices of characters in stress test",
+        [next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next],
+        [Rejects(0, 2), // Á
+         Rejects(2, 3), // a
+         Rejects(3, 7), // 🁀
+         Rejects(7, 8), // b
+         Rejects(8, 10), // Á
+         Rejects(10, 13), // ꁁ
+         Rejects(13, 14), // f
+         Rejects(14, 15), // g
+         Rejects(15, 19), // 😀
+         Rejects(19, 22), // 각
+         Rejects(22, 25), // ก
+         Rejects(25, 28), // ᘀ
+         Rejects(28, 31), // 각
+         Rejects(31, 32), // a
+         Rejects(32, 34), // Á
+         Rejects(34, 37), // 각
+         Rejects(37, 40), // ꁁ
+         Rejects(40, 43), // ก
+         Rejects(43, 47), // 😀
+         Rejects(47, 48), // a
+         Done]
+    );
+}
+
+#[test]
+fn test_forward_search_shared_bytes() {
+    search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character",
+        [next_match,    next_match,     next_match,      next_match],
+        [InRange(0, 2), InRange(8, 10), InRange(32, 34), Done]
+    );
+
+    search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character; check if next() still works",
+        [next_match,    next,          next_match,     next,             next_match,     next,            next_match],
+        [InRange(0, 2), Rejects(2, 3), InRange(8, 10), Rejects(10, 13), InRange(32, 34), Rejects(34, 37), Done]
+    );
+
+    search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character",
+        [next_match,      next,            next_match,      next_match,      next_match],
+        [InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done]
+    );
+
+    search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match,      next,            next_match],
+        [InRange(19, 22), Rejects(22, 25), InRange(28, 31), Rejects(31, 32), InRange(34, 37), Rejects(37, 40), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Forward search for four-byte emoji",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Forward search for four-byte emoji; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
+    );
+}
+
+#[test]
+fn test_reverse_search_shared_bytes() {
+    search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character",
+        [next_match_back, next_match_back, next_match_back, next_match_back],
+        [InRange(32, 34), InRange(8, 10),  InRange(0, 2),   Done]
+    );
+
+    search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,     next_match_back, next_back],
+        [InRange(32, 34), Rejects(31, 32), InRange(8, 10),  Rejects(7, 8), InRange(0, 2),   Done]
+    );
+
+    search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character",
+        [next_match_back, next_back,        next_match_back, next_match_back, next_match_back],
+        [InRange(34, 37), Rejects(32, 34), InRange(28, 31),  InRange(19, 22), Done]
+    );
+
+    search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(34, 37), Rejects(32, 34), InRange(28, 31), Rejects(25, 28), InRange(19, 22), Rejects(15, 19), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes",
+        [next_match_back, next_back,       next_match_back, next_back,      next_match_back],
+        [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,      next_match_back],
+        [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
+    );
+}