about summary refs log tree commit diff
diff options
context:
space:
mode:
authorManish Goregaokar <manishsmail@gmail.com>2017-12-18 03:48:07 -0800
committerManish Goregaokar <manishsmail@gmail.com>2017-12-18 10:18:01 -0800
commit9b92a4419d6a76a9de6d56adb3084d97e3e31d20 (patch)
tree12140f58b03c45f467d13606d632ba019613479a
parentbc5535557662fb7851d80ff1538b5518af921571 (diff)
downloadrust-9b92a4419d6a76a9de6d56adb3084d97e3e31d20.tar.gz
rust-9b92a4419d6a76a9de6d56adb3084d97e3e31d20.zip
Add stresstests for shared bytes for pattern API
-rw-r--r--src/libcore/tests/pattern.rs154
1 files changed, 153 insertions, 1 deletions
diff --git a/src/libcore/tests/pattern.rs b/src/libcore/tests/pattern.rs
index bb0a618f6b8..7fe274a79ed 100644
--- a/src/libcore/tests/pattern.rs
+++ b/src/libcore/tests/pattern.rs
@@ -21,7 +21,7 @@ enum Step {
     Done
 }
 
-use Step::*;
+use self::Step::*;
 
 impl From<SearchStep> for Step {
     fn from(x: SearchStep) -> Self {
@@ -42,6 +42,12 @@ impl From<Option<(usize, usize)>> for Step {
     }
 }
 
+// XXXManishearth these tests focus on single-character searching  (CharSearcher)
+// and on next()/next_match(), not next_reject(). This is because
+// the memchr changes make next_match() for single chars complex, but next_reject()
+// continues to use next() under the hood. We should add more test cases for all
+// of these, as well as tests for StrSearcher and higher level tests for str::find() (etc)
+
 #[test]
 fn test_simple_iteration() {
     search_asserts! ("abcdeabcd", 'a', "forward iteration for ASCII string",
@@ -98,3 +104,149 @@ fn test_simple_search() {
     );
 }
 
+// Á, 각, ก, 😀 all end in 0x81
+// 🁀, ᘀ do not end in 0x81 but contain the byte
+// ꁁ has 0x81 as its second and third bytes.
+//
+// The memchr-using implementation of next_match
+// and next_match_back temporarily violate
+// the property that the search is always on a unicode boundary,
+// which is fine as long as this never reaches next() or next_back().
+// So we test if next() is correct after each next_match() as well.
+const STRESS: &str = "Áa🁀bÁꁁfg😁각กᘀ각aÁ각ꁁก😁a";
+
+#[test]
+fn test_stress_indices() {
+    // this isn't really a test, more of documentation on the indices of each character in the stresstest string
+
+    search_asserts!(STRESS, 'x', "Indices of characters in stress test",
+        [next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next, next],
+        [Rejects(0, 2), // Á
+         Rejects(2, 3), // a
+         Rejects(3, 7), // 🁀
+         Rejects(7, 8), // b
+         Rejects(8, 10), // Á
+         Rejects(10, 13), // ꁁ
+         Rejects(13, 14), // f
+         Rejects(14, 15), // g
+         Rejects(15, 19), // 😀
+         Rejects(19, 22), // 각
+         Rejects(22, 25), // ก
+         Rejects(25, 28), // ᘀ
+         Rejects(28, 31), // 각
+         Rejects(31, 32), // a
+         Rejects(32, 34), // Á
+         Rejects(34, 37), // 각
+         Rejects(37, 40), // ꁁ
+         Rejects(40, 43), // ก
+         Rejects(43, 47), // 😀
+         Rejects(47, 48), // a
+         Done]
+    );
+}
+
+#[test]
+fn test_forward_search_shared_bytes() {
+    search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character",
+        [next_match,    next_match,     next_match,      next_match],
+        [InRange(0, 2), InRange(8, 10), InRange(32, 34), Done]
+    );
+
+    search_asserts!(STRESS, 'Á', "Forward search for two-byte Latin character; check if next() still works",
+        [next_match,    next,          next_match,     next,             next_match,     next,            next_match],
+        [InRange(0, 2), Rejects(2, 3), InRange(8, 10), Rejects(10, 13), InRange(32, 34), Rejects(34, 37), Done]
+    );
+
+    search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character",
+        [next_match,      next,            next_match,      next_match,      next_match],
+        [InRange(19, 22), Rejects(22, 25), InRange(28, 31), InRange(34, 37), Done]
+    );
+
+    search_asserts!(STRESS, '각', "Forward search for three-byte Hangul character; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match,      next,            next_match],
+        [InRange(19, 22), Rejects(22, 25), InRange(28, 31), Rejects(31, 32), InRange(34, 37), Rejects(37, 40), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Forward search for three-byte Thai character; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(22, 25), Rejects(25, 28), InRange(40, 43), Rejects(43, 47), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Forward search for four-byte emoji",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Forward search for four-byte emoji; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(15, 19), Rejects(19, 22), InRange(43, 47), Rejects(47, 48), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Forward search for three-byte Yi character with repeated bytes; check if next() still works",
+        [next_match,      next,            next_match,      next,            next_match],
+        [InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(40, 43), Done]
+    );
+}
+
+#[test]
+fn test_reverse_search_shared_bytes() {
+    search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character",
+        [next_match_back, next_match_back, next_match_back, next_match_back],
+        [InRange(32, 34), InRange(8, 10),  InRange(0, 2),   Done]
+    );
+
+    search_asserts!(STRESS, 'Á', "Reverse search for two-byte Latin character; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,     next_match_back, next_back],
+        [InRange(32, 34), Rejects(31, 32), InRange(8, 10),  Rejects(7, 8), InRange(0, 2),   Done]
+    );
+
+    search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character",
+        [next_match_back, next_back,        next_match_back, next_match_back, next_match_back],
+        [InRange(34, 37), Rejects(32, 34), InRange(28, 31),  InRange(19, 22), Done]
+    );
+
+    search_asserts!(STRESS, '각', "Reverse search for three-byte Hangul character; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(34, 37), Rejects(32, 34), InRange(28, 31), Rejects(25, 28), InRange(19, 22), Rejects(15, 19), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
+    );
+
+    search_asserts!(STRESS, 'ก', "Reverse search for three-byte Thai character; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(40, 43), Rejects(37, 40), InRange(22, 25), Rejects(19, 22), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
+    );
+
+    search_asserts!(STRESS, '😁', "Reverse search for four-byte emoji; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,       next_match_back],
+        [InRange(43, 47), Rejects(40, 43), InRange(15, 19), Rejects(14, 15), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes",
+        [next_match_back, next_back,       next_match_back, next_back,      next_match_back],
+        [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
+    );
+
+    search_asserts!(STRESS, 'ꁁ', "Reverse search for three-byte Yi character with repeated bytes; check if next_back() still works",
+        [next_match_back, next_back,       next_match_back, next_back,      next_match_back],
+        [InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
+    );
+}