about summary refs log tree commit diff
path: root/library/core/tests/pattern.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/core/tests/pattern.rs')
-rw-r--r--library/core/tests/pattern.rs503
1 files changed, 503 insertions, 0 deletions
diff --git a/library/core/tests/pattern.rs b/library/core/tests/pattern.rs
new file mode 100644
index 00000000000..d4bec996d89
--- /dev/null
+++ b/library/core/tests/pattern.rs
@@ -0,0 +1,503 @@
+use std::str::pattern::*;
+
+// This macro makes it easier to write
+// tests that do a series of iterations
+macro_rules! search_asserts {
+    ($haystack:expr, $needle:expr, $testname:expr, [$($func:ident),*], $result:expr) => {
+        let mut searcher = $needle.into_searcher($haystack);
+        let arr = [$( Step::from(searcher.$func()) ),*];
+        assert_eq!(&arr[..], &$result, $testname);
+    }
+}
+
+/// Combined enum for the results of next() and next_match()/next_reject()
+#[derive(Debug, PartialEq, Eq)]
+enum Step {
+    // variant names purposely chosen to
+    // be the same length for easy alignment
+    Matches(usize, usize),
+    Rejects(usize, usize),
+    InRange(usize, usize),
+    Done,
+}
+
+use self::Step::*;
+
+impl From<SearchStep> for Step {
+    fn from(x: SearchStep) -> Self {
+        match x {
+            SearchStep::Match(a, b) => Matches(a, b),
+            SearchStep::Reject(a, b) => Rejects(a, b),
+            SearchStep::Done => Done,
+        }
+    }
+}
+
+impl From<Option<(usize, usize)>> for Step {
+    fn from(x: Option<(usize, usize)>) -> Self {
+        match x {
+            Some((a, b)) => InRange(a, b),
+            None => Done,
+        }
+    }
+}
+
+// FIXME(Manishearth) these tests focus on single-character searching  (CharSearcher)
+// and on next()/next_match(), not next_reject(). This is because
+// the memchr changes make next_match() for single chars complex, but next_reject()
+// continues to use next() under the hood. We should add more test cases for all
+// of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
+
+#[test]
+fn test_simple_iteration() {
+    search_asserts!(
+        "abcdeabcd",
+        'a',
+        "forward iteration for ASCII string",
+        // a            b              c              d              e              a              b              c              d              EOF
+        [next, next, next, next, next, next, next, next, next, next],
+        [
+            Matches(0, 1),
+            Rejects(1, 2),
+            Rejects(2, 3),
+            Rejects(3, 4),
+            Rejects(4, 5),
+            Matches(5, 6),
+            Rejects(6, 7),
+            Rejects(7, 8),
+            Rejects(8, 9),
+            Done
+        ]
+    );
+
+    search_asserts!(
+        "abcdeabcd",
+        'a',
+        "reverse iteration for ASCII string",
+        // d            c              b              a            e                d              c              b              a             EOF
+        [
+            next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back,
+            next_back, next_back
+        ],
+        [
+            Rejects(8, 9),
+            Rejects(7, 8),
+            Rejects(6, 7),
+            Matches(5, 6),
+            Rejects(4, 5),
+            Rejects(3, 4),
+            Rejects(2, 3),
+            Rejects(1, 2),
+            Matches(0, 1),
+            Done
+        ]
+    );
+
+    search_asserts!(
+        "我爱我的猫",
+        '我',
+        "forward iteration for Chinese string",
+        // 我           愛             我             的              貓               EOF
+        [next, next, next, next, next, next],
+        [Matches(0, 3), Rejects(3, 6), Matches(6, 9), Rejects(9, 12), Rejects(12, 15), Done]
+    );
+
+    search_asserts!(
+        "我的猫说meow",
+        'm',
+        "forward iteration for mixed string",
+        // 我           的             猫             说              m                e                o                w                EOF
+        [next, next, next, next, next, next, next, next, next],
+        [
+            Rejects(0, 3),
+            Rejects(3, 6),
+            Rejects(6, 9),
+            Rejects(9, 12),
+            Matches(12, 13),
+            Rejects(13, 14),
+            Rejects(14, 15),
+            Rejects(15, 16),
+            Done
+        ]
+    );
+
+    search_asserts!(
+        "我的猫说meow",
+        '猫',
+        "reverse iteration for mixed string",
+        // w             o                 e                m                说              猫             的             我             EOF
+        [
+            next_back, next_back, next_back, next_back, next_back, next_back, next_back, next_back,
+            next_back
+        ],
+        [
+            Rejects(15, 16),
+            Rejects(14, 15),
+            Rejects(13, 14),
+            Rejects(12, 13),
+            Rejects(9, 12),
+            Matches(6, 9),
+            Rejects(3, 6),
+            Rejects(0, 3),
+            Done
+        ]
+    );
+}
+
+#[test]
+fn test_simple_search() {
+    search_asserts!(
+        "abcdeabcdeabcde",
+        'a',
+        "next_match for ASCII string",
+        [next_match, next_match, next_match, next_match],
+        [InRange(0, 1), InRange(5, 6), InRange(10, 11), Done]
+    );
+
+    search_asserts!(
+        "abcdeabcdeabcde",
+        'a',
+        "next_match_back for ASCII string",
+        [next_match_back, next_match_back, next_match_back, next_match_back],
+        [InRange(10, 11), InRange(5, 6), InRange(0, 1), Done]
+    );
+
+    search_asserts!(
+        "abcdeab",
+        'a',
+        "next_reject for ASCII string",
+        [next_reject, next_reject, next_match, next_reject, next_reject],
+        [InRange(1, 2), InRange(2, 3), InRange(5, 6), InRange(6, 7), Done]
+    );
+
+    search_asserts!(
+        "abcdeabcdeabcde",
+        'a',
+        "next_reject_back for ASCII string",
+        [
+            next_reject_back,
+            next_reject_back,
+            next_match_back,
+            next_reject_back,
+            next_reject_back,
+            next_reject_back
+        ],
+        [
+            InRange(14, 15),
+            InRange(13, 14),
+            InRange(10, 11),
+            InRange(9, 10),
+            InRange(8, 9),
+            InRange(7, 8)
+        ]
+    );
+}
+
+// Á, 각, ก, 😀 all end in 0x81
+// 🁀, ᘀ do not end in 0x81 but contain the byte
+// ꁁ has 0x81 as its second and third bytes.
+//
+// The memchr-using implementation of next_match
+// and next_match_back temporarily violate
+// the property that the search is always on a unicode boundary,
+// which is fine as long as this never reaches next() or next_back().
+// So we test if next() is correct after each next_match() as well.
+const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a";
+
+#[test]
+fn test_stress_indices() {
+    // this isn't really a test, more of documentation on the indices of each character in the stresstest string
+
+    search_asserts!(
+        STRESS,
+        'x',
+        "Indices of characters in stress test",
+        [
+            next, next, next, next, next, next, next, next, next, next, next, next, next, next,
+            next, next, next, next, next, next, next
+        ],
+        [
+            Rejects(0, 2),   // Á
+            Rejects(2, 3),   // a
+            Rejects(3, 7),   // 🁀
+            Rejects(7, 8),   // b
+            Rejects(8, 10),  // Á
+            Rejects(10, 13), // ꁁ
+            Rejects(13, 14), // f
+            Rejects(14, 15), // g
+            Rejects(15, 19), // 😀
+            Rejects(19, 22), // 각
+            Rejects(22, 25), // ก
+            Rejects(25, 28), // ᘀ
+            Rejects(28, 31), // 각
+            Rejects(31, 32), // a
+            Rejects(32, 34), // Á
+            Rejects(34, 37), // 각
+            Rejects(37, 40), // ꁁ
+            Rejects(40, 43), // ก
+            Rejects(43, 47), // 😀
+            Rejects(47, 48), // a
+            Done
+        ]
+    );
+}
+
+#[test]
+fn test_forward_search_shared_bytes() {
+    search_asserts!(
+        STRESS,
+        'Á',
+        "Forward search for two-byte Latin character",
+        [next_match, next_match, next_match, next_match],
+        [InRange(0, 2), InRange(8, 10), InRange(32, 34), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        'Á',
+        "Forward search for two-byte Latin character; check if next() still works",
+        [next_match, next, next_match, next, next_match, next, next_match],
+        [
+            InRange(0, 2),
+            Rejects(2, 3),
+            InRange(8, 10),
+            Rejects(10, 13),
+            InRange(32, 34),
+            Rejects(34, 37),
+            Done
+        ]
+    );
+
+    search_asserts!(
+        STRESS,
+        '각',
+        "Forward search for three-byte Hangul character",
+        [next_match, next, next_match, next_match, next_match],
+        [InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        '각',
+        "Forward search for three-byte Hangul character; check if next() still works",
+        [next_match, next, next_match, next, next_match, next, next_match],
+        [
+            InRange(19, 22),
+            Rejects(22, 25),
+            InRange(28, 31),
+            Rejects(31, 32),
+            InRange(34, 37),
+            Rejects(37, 40),
+            Done
+        ]
+    );
+
+    search_asserts!(
+        STRESS,
+        'ก',
+        "Forward search for three-byte Thai character",
+        [next_match, next, next_match, next, next_match],
+        [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        'ก',
+        "Forward search for three-byte Thai character; check if next() still works",
+        [next_match, next, next_match, next, next_match],
+        [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        '😁',
+        "Forward search for four-byte emoji",
+        [next_match, next, next_match, next, next_match],
+        [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        '😁',
+        "Forward search for four-byte emoji; check if next() still works",
+        [next_match, next, next_match, next, next_match],
+        [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        'ꁁ',
+        "Forward search for three-byte Yi character with repeated bytes",
+        [next_match, next, next_match, next, next_match],
+        [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        'ꁁ',
+        "Forward search for three-byte Yi character with repeated bytes; check if next() still works",
+        [next_match, next, next_match, next, next_match],
+        [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
+    );
+}
+
+#[test]
+fn test_reverse_search_shared_bytes() {
+    search_asserts!(
+        STRESS,
+        'Á',
+        "Reverse search for two-byte Latin character",
+        [next_match_back, next_match_back, next_match_back, next_match_back],
+        [InRange(32, 34), InRange(8, 10), InRange(0, 2), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        'Á',
+        "Reverse search for two-byte Latin character; check if next_back() still works",
+        [next_match_back, next_back, next_match_back, next_back, next_match_back, next_back],
+        [InRange(32, 34), Rejects(31, 32), InRange(8, 10), Rejects(7, 8), InRange(0, 2), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        '각',
+        "Reverse search for three-byte Hangul character",
+        [next_match_back, next_back, next_match_back, next_match_back, next_match_back],
+        [InRange(34, 37), Rejects(32, 34), InRange(28, 31), InRange(19, 22), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        '각',
+        "Reverse search for three-byte Hangul character; check if next_back() still works",
+        [
+            next_match_back,
+            next_back,
+            next_match_back,
+            next_back,
+            next_match_back,
+            next_back,
+            next_match_back
+        ],
+        [
+            InRange(34, 37),
+            Rejects(32, 34),
+            InRange(28, 31),
+            Rejects(25, 28),
+            InRange(19, 22),
+            Rejects(15, 19),
+            Done
+        ]
+    );
+
+    search_asserts!(
+        STRESS,
+        'ก',
+        "Reverse search for three-byte Thai character",
+        [next_match_back, next_back, next_match_back, next_back, next_match_back],
+        [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        'ก',
+        "Reverse search for three-byte Thai character; check if next_back() still works",
+        [next_match_back, next_back, next_match_back, next_back, next_match_back],
+        [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        '😁',
+        "Reverse search for four-byte emoji",
+        [next_match_back, next_back, next_match_back, next_back, next_match_back],
+        [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        '😁',
+        "Reverse search for four-byte emoji; check if next_back() still works",
+        [next_match_back, next_back, next_match_back, next_back, next_match_back],
+        [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        'ꁁ',
+        "Reverse search for three-byte Yi character with repeated bytes",
+        [next_match_back, next_back, next_match_back, next_back, next_match_back],
+        [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
+    );
+
+    search_asserts!(
+        STRESS,
+        'ꁁ',
+        "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works",
+        [next_match_back, next_back, next_match_back, next_back, next_match_back],
+        [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
+    );
+}
+
+#[test]
+fn double_ended_regression_test() {
+    // https://github.com/rust-lang/rust/issues/47175
+    // Ensures that double ended searching comes to a convergence
+    search_asserts!(
+        "abcdeabcdeabcde",
+        'a',
+        "alternating double ended search",
+        [next_match, next_match_back, next_match, next_match_back],
+        [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
+    );
+    search_asserts!(
+        "abcdeabcdeabcde",
+        'a',
+        "triple double ended search for a",
+        [next_match, next_match_back, next_match_back, next_match_back],
+        [InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
+    );
+    search_asserts!(
+        "abcdeabcdeabcde",
+        'd',
+        "triple double ended search for d",
+        [next_match, next_match_back, next_match_back, next_match_back],
+        [InRange(3, 4), InRange(13, 14), InRange(8, 9), Done]
+    );
+    search_asserts!(
+        STRESS,
+        'Á',
+        "Double ended search for two-byte Latin character",
+        [next_match, next_match_back, next_match, next_match_back],
+        [InRange(0, 2), InRange(32, 34), InRange(8, 10), Done]
+    );
+    search_asserts!(
+        STRESS,
+        '각',
+        "Reverse double ended search for three-byte Hangul character",
+        [next_match_back, next_back, next_match, next, next_match_back, next_match],
+        [InRange(34, 37), Rejects(32, 34), InRange(19, 22), Rejects(22, 25), InRange(28, 31), Done]
+    );
+    search_asserts!(
+        STRESS,
+        'ก',
+        "Double ended search for three-byte Thai character",
+        [next_match, next_back, next, next_match_back, next_match],
+        [InRange(22, 25), Rejects(47, 48), Rejects(25, 28), InRange(40, 43), Done]
+    );
+    search_asserts!(
+        STRESS,
+        '😁',
+        "Double ended search for four-byte emoji",
+        [next_match_back, next, next_match, next_back, next_match],
+        [InRange(43, 47), Rejects(0, 2), InRange(15, 19), Rejects(40, 43), Done]
+    );
+    search_asserts!(
+        STRESS,
+        'ꁁ',
+        "Double ended search for three-byte Yi character with repeated bytes",
+        [next_match, next, next_match_back, next_back, next_match],
+        [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(34, 37), Done]
+    );
+}