about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMarvin Löbel <loebel.marvin@gmail.com>2015-02-17 23:47:08 +0100
committerMarvin Löbel <loebel.marvin@gmail.com>2015-02-20 00:58:07 +0100
commita641996796f0ab11021671c0ce70a3c975bb4e37 (patch)
tree5724986b305b79e56e25603688f8f62a37e5838d
parentc1de0a0f9ea9863407363ce31bb698e9988215ee (diff)
downloadrust-a641996796f0ab11021671c0ce70a3c975bb4e37.tar.gz
rust-a641996796f0ab11021671c0ce70a3c975bb4e37.zip
Fix tidy and rebase fallout
Added a few bugfixes and additional testcases
-rw-r--r--src/libcollections/str.rs33
-rw-r--r--src/libcore/str/mod.rs15
-rw-r--r--src/libcore/str/pattern.rs27
-rw-r--r--src/libcoretest/str.rs146
4 files changed, 171 insertions, 50 deletions
diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
index d355421039e..e86cf462cab 100644
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@@ -2894,22 +2894,6 @@ mod bench {
     }
 
     #[bench]
-    fn split_unicode_not_ascii(b: &mut Bencher) {
-        struct NotAscii(char);
-        impl CharEq for NotAscii {
-            fn matches(&mut self, c: char) -> bool {
-                let NotAscii(cc) = *self;
-                cc == c
-            }
-            fn only_ascii(&self) -> bool { false }
-        }
-        let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
-
-        b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
-    }
-
-
-    #[bench]
     fn split_ascii(b: &mut Bencher) {
         let s = "Mary had a little lamb, Little lamb, little-lamb.";
         let len = s.split(' ').count();
@@ -2918,23 +2902,6 @@ mod bench {
     }
 
     #[bench]
-    fn split_not_ascii(b: &mut Bencher) {
-        struct NotAscii(char);
-        impl CharEq for NotAscii {
-            #[inline]
-            fn matches(&mut self, c: char) -> bool {
-                let NotAscii(cc) = *self;
-                cc == c
-            }
-            fn only_ascii(&self) -> bool { false }
-        }
-        let s = "Mary had a little lamb, Little lamb, little-lamb.";
-        let len = s.split(' ').count();
-
-        b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
-    }
-
-    #[bench]
     fn split_extern_fn(b: &mut Bencher) {
         let s = "Mary had a little lamb, Little lamb, little-lamb.";
         let len = s.split(' ').count();
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs
index a9308302033..820ad4d8586 100644
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -156,7 +156,6 @@ impl FromStr for bool {
 
 /// An error returned when parsing a `bool` from a string fails.
 #[derive(Debug, Clone, PartialEq)]
-#[allow(missing_copy_implementations)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct ParseBoolError { _priv: () }
 
@@ -235,7 +234,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
 pub unsafe fn from_c_str(s: *const i8) -> &'static str {
     let s = s as *const u8;
     let mut len = 0;
-    while *s.offset(len as int) != 0 {
+    while *s.offset(len as isize) != 0 {
         len += 1;
     }
     let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
@@ -258,7 +257,7 @@ impl CharEq for char {
     fn matches(&mut self, c: char) -> bool { *self == c }
 
     #[inline]
-    fn only_ascii(&self) -> bool { (*self as usize) < 128 }
+    fn only_ascii(&self) -> bool { (*self as u32) < 128 }
 }
 
 impl<F> CharEq for F where F: FnMut(char) -> bool {
@@ -764,7 +763,8 @@ impl TwoWaySearcher {
     // How far we can jump when we encounter a mismatch is all based on the fact
     // that (u, v) is a critical factorization for the needle.
     #[inline]
-    fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> Option<(usize, usize)> {
+    fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
+    -> Option<(usize, usize)> {
         'search: loop {
             // Check that we have room to search in
             if self.position + needle.len() > haystack.len() {
@@ -955,6 +955,7 @@ Section: Comparing strings
 /// to compare &[u8] byte slices that are not necessarily valid UTF-8.
 #[inline]
 fn eq_slice_(a: &str, b: &str) -> bool {
+    // NOTE: In theory n should be libc::size_t and not usize, but libc is not available here
     #[allow(improper_ctypes)]
     extern { fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; }
     a.len() == b.len() && unsafe {
@@ -1489,7 +1490,7 @@ impl StrExt for str {
     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
     where P::Searcher: DoubleEndedSearcher<'a> {
         let mut i = 0;
-        let mut j = self.len();
+        let mut j = 0;
         let mut matcher = pat.into_searcher(self);
         if let Some((a, b)) = matcher.next_reject() {
             i = a;
@@ -1507,7 +1508,7 @@ impl StrExt for str {
 
     #[inline]
     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
-        let mut i = 0;
+        let mut i = self.len();
         let mut matcher = pat.into_searcher(self);
         if let Some((a, _)) = matcher.next_reject() {
             i = a;
@@ -1521,7 +1522,7 @@ impl StrExt for str {
     #[inline]
     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
     where P::Searcher: ReverseSearcher<'a> {
-        let mut j = self.len();
+        let mut j = 0;
         let mut matcher = pat.into_searcher(self);
         if let Some((_, b)) = matcher.next_reject_back() {
             j = b;
diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs
index 501fc27b376..9cd5510db37 100644
--- a/src/libcore/str/pattern.rs
+++ b/src/libcore/str/pattern.rs
@@ -58,6 +58,7 @@ pub trait Pattern<'a>: Sized {
 
 // Searcher
 
+#[derive(Copy, Clone, Eq, PartialEq, Debug)]
 pub enum SearchStep {
     Match(usize, usize),
     Reject(usize, usize),
@@ -190,7 +191,7 @@ impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
 
 // Impl for &str
 
-// TODO: Optimize the naive implementation here
+// Todo: Optimize the naive implementation here
 
 #[derive(Clone)]
 pub struct StrSearcher<'a, 'b> {
@@ -235,13 +236,16 @@ unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b>  {
         },
         |m: &mut StrSearcher| {
             // Forward step for nonempty needle
-            let possible_match = &m.haystack[m.start .. m.start + m.needle.len()];
+            // Compare if bytes are equal
+            let possible_match = &m.haystack.as_bytes()[m.start .. m.start + m.needle.len()];
             let current_start = m.start;
-            if possible_match == m.needle {
+            if possible_match == m.needle.as_bytes() {
                 m.start += m.needle.len();
                 SearchStep::Match(current_start, m.start)
             } else {
-                m.start += possible_match.chars().next().unwrap().len_utf8();
+                // Skip a char
+                let haystack_suffix = &m.haystack[m.start..];
+                m.start += haystack_suffix.chars().next().unwrap().len_utf8();
                 SearchStep::Reject(current_start, m.start)
             }
         })
@@ -262,13 +266,16 @@ unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b>  {
         },
         |m: &mut StrSearcher| {
             // Backward step for nonempty needle
-            let possible_match = &m.haystack[m.end - m.needle.len() .. m.end];
+            // Compare if bytes are equal
+            let possible_match = &m.haystack.as_bytes()[m.end - m.needle.len() .. m.end];
             let current_end = m.end;
-            if possible_match == m.needle {
+            if possible_match == m.needle.as_bytes() {
                 m.end -= m.needle.len();
                 SearchStep::Match(m.end, current_end)
             } else {
-                m.end -= possible_match.chars().rev().next().unwrap().len_utf8();
+                // Skip a char
+                let haystack_prefix = &m.haystack[..m.end];
+                m.end -= haystack_prefix.chars().rev().next().unwrap().len_utf8();
                 SearchStep::Reject(m.end, current_end)
             }
         })
@@ -290,6 +297,9 @@ where F: FnOnce(&mut StrSearcher) -> SearchStep,
     } else if m.start + m.needle.len() <= m.end {
         // Case for needle != ""
         g(&mut m)
+    } else if m.start < m.end {
+        m.done = true;
+        SearchStep::Reject(m.start, m.end)
     } else {
         m.done = true;
         SearchStep::Done
@@ -352,7 +362,8 @@ impl<'a, F> Pattern<'a> for F where F: FnMut(char) -> bool {
 
 use ops::Deref;
 
-impl<'a, 'b, P: 'b + ?Sized, T: Deref<Target = P> + ?Sized> Pattern<'a> for &'b T where &'b P: Pattern<'a> {
+impl<'a, 'b, P: 'b + ?Sized, T: Deref<Target = P> + ?Sized> Pattern<'a> for &'b T
+where &'b P: Pattern<'a> {
     type Searcher =   <&'b P as Pattern<'a>>::Searcher;
     associated_items!(<&'b P as Pattern<'a>>::Searcher,
                       s, (&**s));
diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs
index acd8cc42c72..beb746d25b6 100644
--- a/src/libcoretest/str.rs
+++ b/src/libcoretest/str.rs
@@ -1,4 +1,4 @@
-// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
@@ -139,8 +139,150 @@ fn test_utf16_code_units() {
                vec![0xE9, 0xD83D, 0xDCA9])
 }
 
+#[test]
+fn starts_with_in_unicode() {
+    assert!(!"├── Cargo.toml".starts_with("# "));
+}
 
-// rm x86_64-unknown-linux-gnu/stage1/test/coretesttest-x86_64-unknown-linux-gnu; env PLEASE_BENCH=1 make check-stage1-coretest TESTNAME=str::bench
+#[test]
+fn starts_short_long() {
+    assert!(!"".starts_with("##"));
+    assert!(!"##".starts_with("####"));
+    assert!("####".starts_with("##"));
+    assert!(!"##ä".starts_with("####"));
+    assert!("####ä".starts_with("##"));
+    assert!(!"##".starts_with("####ä"));
+    assert!("##ä##".starts_with("##ä"));
+
+    assert!("".starts_with(""));
+    assert!("ä".starts_with(""));
+    assert!("#ä".starts_with(""));
+    assert!("##ä".starts_with(""));
+    assert!("ä###".starts_with(""));
+    assert!("#ä##".starts_with(""));
+    assert!("##ä#".starts_with(""));
+}
+
+#[test]
+fn contains_weird_cases() {
+    assert!("* \t".contains_char(' '));
+    assert!(!"* \t".contains_char('?'));
+    assert!(!"* \t".contains_char('\u{1F4A9}'));
+}
+
+#[test]
+fn trim_ws() {
+    assert_eq!(" \t  a \t  ".trim_left_matches(|c: char| c.is_whitespace()),
+                    "a \t  ");
+    assert_eq!(" \t  a \t  ".trim_right_matches(|c: char| c.is_whitespace()),
+               " \t  a");
+    assert_eq!(" \t  a \t  ".trim_matches(|c: char| c.is_whitespace()),
+                    "a");
+    assert_eq!(" \t   \t  ".trim_left_matches(|c: char| c.is_whitespace()),
+                         "");
+    assert_eq!(" \t   \t  ".trim_right_matches(|c: char| c.is_whitespace()),
+               "");
+    assert_eq!(" \t   \t  ".trim_matches(|c: char| c.is_whitespace()),
+               "");
+}
+
+mod pattern {
+    use std::str::Pattern;
+    use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
+    use std::str::SearchStep::{self, Match, Reject, Done};
+
+    macro_rules! make_test {
+        ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => {
+            mod $name {
+                use std::str::Pattern;
+                use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
+                use std::str::SearchStep::{self, Match, Reject, Done};
+                use super::{cmp_search_to_vec};
+                #[test]
+                fn fwd() {
+                    cmp_search_to_vec(false, $p, $h, vec![$($e),*]);
+                }
+                #[test]
+                fn bwd() {
+                    cmp_search_to_vec(true, $p, $h, vec![$($e),*]);
+                }
+            }
+        }
+    }
+
+    fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str,
+                                             right: Vec<SearchStep>)
+    where P::Searcher: ReverseSearcher<'a>
+    {
+        let mut searcher = pat.into_searcher(haystack);
+        let mut v = vec![];
+        loop {
+            match if !rev {searcher.next()} else {searcher.next_back()} {
+                Match(a, b) => v.push(Match(a, b)),
+                Reject(a, b) => v.push(Reject(a, b)),
+                Done => break,
+            }
+        }
+        if rev {
+            v.reverse();
+        }
+        assert_eq!(v, right);
+    }
+
+    make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [
+        Reject(0, 1),
+        Match (1, 3),
+        Reject(3, 4),
+        Match (4, 6),
+        Reject(6, 7),
+    ]);
+    make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [
+        Match(0, 0),
+        Match(1, 1),
+        Match(2, 2),
+        Match(3, 3),
+        Match(4, 4),
+        Match(5, 5),
+        Match(6, 6),
+        Match(7, 7),
+    ]);
+    make_test!(str_searcher_mulibyte_haystack, " ", "├──", [
+        Reject(0, 3),
+        Reject(3, 6),
+        Reject(6, 9),
+    ]);
+    make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [
+        Match(0, 0),
+        Match(3, 3),
+        Match(6, 6),
+        Match(9, 9),
+    ]);
+    make_test!(str_searcher_empty_needle_empty_haystack, "", "", [
+        Match(0, 0),
+    ]);
+    make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [
+    ]);
+    make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [
+        Reject(0, 1),
+        Match (1, 2),
+        Match (2, 3),
+        Reject(3, 4),
+        Match (4, 5),
+        Match (5, 6),
+        Reject(6, 7),
+    ]);
+    make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [
+        Reject(0, 3),
+        Reject(3, 6),
+        Reject(6, 9),
+    ]);
+    make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [
+        Reject(0, 1),
+        Reject(1, 2),
+        Reject(2, 3),
+    ]);
+
+}
 
 mod bench {
     macro_rules! make_test_inner {