about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2014-08-23 18:00:59 +0000
committerbors <bors@rust-lang.org>2014-08-23 18:00:59 +0000
commit03fd90be459650160a4edefbc78588a938db2f8c (patch)
treeaba5af2841c7862a894de3bff3e65ef7206336d6
parent3e3209ab4a7e256730d7c6733f2d800a5dbfdaef (diff)
parent9a43492f59bfc38ed819e361c3cf99aa7b972e15 (diff)
downloadrust-03fd90be459650160a4edefbc78588a938db2f8c.tar.gz
rust-03fd90be459650160a4edefbc78588a938db2f8c.zip
auto merge of #16612 : nham/rust/twoway_searcher_fix, r=alexcrichton
There is a check in TwoWaySearcher::new to determine whether the needle is periodic. This is needed because during searching when a match fails, we cannot advance the position by the entire length of the needle when it is periodic, but can only advance by the length of the period.

The reason "bananas".contains("nana") (and similar searches) were returning false was because the periodicity check was wrong.

Closes #16589

Also, thanks to @Gankro, who came up with many buggy examples.
-rw-r--r--src/libcore/str.rs14
-rw-r--r--src/libcoretest/str.rs20
2 files changed, 33 insertions, 1 deletions
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 076eb8bbe6a..859a651d14c 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -419,6 +419,8 @@ struct TwoWaySearcher {
     memory: uint
 }
 
+// This is the Two-Way search algorithm, which was introduced in the paper:
+// Crochemore, M., Perrin, D., 1991, Two-way string-matching, Journal of the ACM 38(3):651-675.
 impl TwoWaySearcher {
     fn new(needle: &[u8]) -> TwoWaySearcher {
         let (critPos1, period1) = TwoWaySearcher::maximal_suffix(needle, false);
@@ -437,7 +439,14 @@ impl TwoWaySearcher {
         let byteset = needle.iter()
                             .fold(0, |a, &b| (1 << ((b & 0x3f) as uint)) | a);
 
-        if needle.slice_to(critPos) == needle.slice_from(needle.len() - critPos) {
+
+        // The logic here (calculating critPos and period, the final if statement to see which
+        // period to use for the TwoWaySearcher) is essentially an implementation of the
+        // "small-period" function from the paper (p. 670)
+        //
+        // In the paper they check whether `needle.slice_to(critPos)` is a suffix of
+        // `needle.slice(critPos, critPos + period)`, which is precisely what this does
+        if needle.slice_to(critPos) == needle.slice(period, period + critPos) {
             TwoWaySearcher {
                 critPos: critPos,
                 period: period,
@@ -508,6 +517,9 @@ impl TwoWaySearcher {
         }
     }
 
+    // returns (i, p) where i is the "critical position", the starting index of
+    // of maximal suffix, and p is the period of the suffix
+    // see p. 668 of the paper
     #[inline]
     fn maximal_suffix(arr: &[u8], reversed: bool) -> (uint, uint) {
         let mut left = -1; // Corresponds to i in the paper
diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs
index bac8d509b13..be2275dcd4a 100644
--- a/src/libcoretest/str.rs
+++ b/src/libcoretest/str.rs
@@ -8,7 +8,27 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
+fn check_contains_all_substrings(s: &str) {
+    assert!(s.contains(""));
+    for i in range(0, s.len()) {
+        for j in range(i+1, s.len() + 1) {
+            assert!(s.contains(s.slice(i, j)));
+        }
+    }
+}
+
 #[test]
 fn strslice_issue_16589() {
     assert!("bananas".contains("nana"));
+
+    // prior to the fix for #16589, x.contains("abcdabcd") returned false
+    // test all substrings for good measure
+    check_contains_all_substrings("012345678901234567890123456789bcdabcdabcd");
+}
+
+
+#[test]
+fn test_strslice_contains() {
+    let x = "There are moments, Jeeves, when one asks oneself, 'Do trousers matter?'";
+    check_contains_all_substrings(x);
 }