Added string pattern traits and basic implementantions

author: Marvin Löbel <loebel.marvin@gmail.com> 2014-12-30 21:54:17 +0100
committer: Marvin Löbel <loebel.marvin@gmail.com> 2015-02-20 00:32:59 +0100
commit: 54f0bead8158eaf948c93d1cae93b60978937417 (patch)
tree: f00cfcfa18db7183e05d3d441d0bcaf1489b79c0
parent: d68eb3d24843d2e269989563d45ceda920391fe0 (diff)
download: rust-54f0bead8158eaf948c93d1cae93b60978937417.tar.gz
rust-54f0bead8158eaf948c93d1cae93b60978937417.zip
3 files changed, 289 insertions, 126 deletions
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs
index eec997b9f10..fb0c4c4f34f 100644
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -36,12 +36,16 @@ use result::Result::{self, Ok, Err};
 use slice::{self, SliceExt};
 use usize;
 
+pub use self::pattern::{Pattern, Matcher, ReverseMatcher, DoubleEndedMatcher};
+
+mod pattern;
+
 macro_rules! delegate_iter {
     (exact $te:ty : $ti:ty) => {
         delegate_iter!{$te : $ti}
         impl<'a> ExactSizeIterator for $ti {
             #[inline]
-            fn len(&self) -> usize {
+            fn len(&self) -> uint {
                 self.0.len()
             }
         }
@@ -56,7 +60,7 @@ macro_rules! delegate_iter {
                 self.0.next()
             }
             #[inline]
-            fn size_hint(&self) -> (usize, Option<usize>) {
+            fn size_hint(&self) -> (uint, Option<uint>) {
                 self.0.size_hint()
             }
         }
@@ -78,7 +82,7 @@ macro_rules! delegate_iter {
                 self.0.next()
             }
             #[inline]
-            fn size_hint(&self) -> (usize, Option<usize>) {
+            fn size_hint(&self) -> (uint, Option<uint>) {
                 self.0.size_hint()
             }
         }
@@ -100,7 +104,7 @@ macro_rules! delegate_iter {
                 self.0.next()
             }
             #[inline]
-            fn size_hint(&self) -> (usize, Option<usize>) {
+            fn size_hint(&self) -> (uint, Option<uint>) {
                 self.0.size_hint()
             }
         }
@@ -149,6 +153,7 @@ impl FromStr for bool {
 
 /// An error returned when parsing a `bool` from a string fails.
 #[derive(Debug, Clone, PartialEq)]
+#[allow(missing_copy_implementations)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct ParseBoolError { _priv: () }
 
@@ -178,7 +183,7 @@ pub enum Utf8Error {
     /// The offset is guaranteed to be in bounds of the slice in question, and
     /// the byte at the specified offset was the first invalid byte in the
     /// sequence detected.
-    InvalidByte(usize),
+    InvalidByte(uint),
 
     /// The byte slice was invalid because more bytes were needed but no more
     /// bytes were available.
@@ -227,7 +232,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
 pub unsafe fn from_c_str(s: *const i8) -> &'static str {
     let s = s as *const u8;
     let mut len = 0;
-    while *s.offset(len as isize) != 0 {
+    while *s.offset(len as int) != 0 {
         len += 1;
     }
     let v: &'static [u8] = ::mem::transmute(Slice { data: s, len: len });
@@ -250,7 +255,7 @@ impl CharEq for char {
     fn matches(&mut self, c: char) -> bool { *self == c }
 
     #[inline]
-    fn only_ascii(&self) -> bool { (*self as u32) < 128 }
+    fn only_ascii(&self) -> bool { (*self as uint) < 128 }
 }
 
 impl<F> CharEq for F where F: FnMut(char) -> bool {
@@ -383,7 +388,7 @@ impl<'a> Iterator for Chars<'a> {
     }
 
     #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) {
+    fn size_hint(&self) -> (uint, Option<uint>) {
         let (len, _) = self.iter.size_hint();
         (len.saturating_add(3) / 4, Some(len))
     }
@@ -428,16 +433,16 @@ impl<'a> DoubleEndedIterator for Chars<'a> {
 #[derive(Clone)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct CharIndices<'a> {
-    front_offset: usize,
+    front_offset: uint,
     iter: Chars<'a>,
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<'a> Iterator for CharIndices<'a> {
-    type Item = (usize, char);
+    type Item = (uint, char);
 
     #[inline]
-    fn next(&mut self) -> Option<(usize, char)> {
+    fn next(&mut self) -> Option<(uint, char)> {
         let (pre_len, _) = self.iter.iter.size_hint();
         match self.iter.next() {
             None => None,
@@ -451,7 +456,7 @@ impl<'a> Iterator for CharIndices<'a> {
     }
 
     #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) {
+    fn size_hint(&self) -> (uint, Option<uint>) {
         self.iter.size_hint()
     }
 }
@@ -459,7 +464,7 @@ impl<'a> Iterator for CharIndices<'a> {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<'a> DoubleEndedIterator for CharIndices<'a> {
     #[inline]
-    fn next_back(&mut self) -> Option<(usize, char)> {
+    fn next_back(&mut self) -> Option<(uint, char)> {
         match self.iter.next_back() {
             None => None,
             Some(ch) => {
@@ -512,7 +517,7 @@ struct CharSplits<'a, Sep> {
 struct CharSplitsN<'a, Sep> {
     iter: CharSplits<'a, Sep>,
     /// The number of splits remaining
-    count: usize,
+    count: uint,
     invert: bool,
 }
 
@@ -636,7 +641,7 @@ impl<'a, Sep: CharEq> Iterator for CharSplitsN<'a, Sep> {
 /// within a larger string using naive search
 #[derive(Clone)]
 struct NaiveSearcher {
-    position: usize
+    position: uint
 }
 
 impl NaiveSearcher {
@@ -644,7 +649,7 @@ impl NaiveSearcher {
         NaiveSearcher { position: 0 }
     }
 
-    fn next(&mut self, haystack: &[u8], needle: &[u8]) -> Option<(usize, usize)> {
+    fn next(&mut self, haystack: &[u8], needle: &[u8]) -> Option<(uint, uint)> {
         while self.position + needle.len() <= haystack.len() {
             if &haystack[self.position .. self.position + needle.len()] == needle {
                 let match_pos = self.position;
@@ -663,13 +668,13 @@ impl NaiveSearcher {
 #[derive(Clone)]
 struct TwoWaySearcher {
     // constants
-    crit_pos: usize,
-    period: usize,
+    crit_pos: uint,
+    period: uint,
     byteset: u64,
 
     // variables
-    position: usize,
-    memory: usize
+    position: uint,
+    memory: uint
 }
 
 /*
@@ -756,7 +761,7 @@ impl TwoWaySearcher {
 
         // This isn't in the original algorithm, as far as I'm aware.
         let byteset = needle.iter()
-                            .fold(0, |a, &b| (1 << ((b & 0x3f) as usize)) | a);
+                            .fold(0, |a, &b| (1 << ((b & 0x3f) as uint)) | a);
 
         // A particularly readable explanation of what's going on here can be found
         // in Crochemore and Rytter's book "Text Algorithms", ch 13. Specifically
@@ -794,8 +799,7 @@ impl TwoWaySearcher {
     // How far we can jump when we encounter a mismatch is all based on the fact
     // that (u, v) is a critical factorization for the needle.
     #[inline]
-    fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool)
-            -> Option<(usize, usize)> {
+    fn next(&mut self, haystack: &[u8], needle: &[u8], long_period: bool) -> Option<(uint, uint)> {
         'search: loop {
             // Check that we have room to search in
             if self.position + needle.len() > haystack.len() {
@@ -805,7 +809,7 @@ impl TwoWaySearcher {
             // Quickly skip by large portions unrelated to our substring
             if (self.byteset >>
                     ((haystack[self.position + needle.len() - 1] & 0x3f)
-                     as usize)) & 1 == 0 {
+                     as uint)) & 1 == 0 {
                 self.position += needle.len();
                 if !long_period {
                     self.memory = 0;
@@ -852,7 +856,7 @@ impl TwoWaySearcher {
     // Specifically, returns (i, p), where i is the starting index of v in some
     // critical factorization (u, v) and p = period(v)
     #[inline]
-    fn maximal_suffix(arr: &[u8], reversed: bool) -> (usize, usize) {
+    fn maximal_suffix(arr: &[u8], reversed: bool) -> (uint, uint) {
         let mut left = -1; // Corresponds to i in the paper
         let mut right = 0; // Corresponds to j in the paper
         let mut offset = 1; // Corresponds to k in the paper
@@ -897,6 +901,7 @@ impl TwoWaySearcher {
 /// within a larger string using a dynamically chosen search algorithm
 #[derive(Clone)]
 enum Searcher {
+    EmptyNeedle { pos: usize, done: bool },
     Naive(NaiveSearcher),
     TwoWay(TwoWaySearcher),
     TwoWayLong(TwoWaySearcher)
@@ -904,11 +909,16 @@ enum Searcher {
 
 impl Searcher {
     fn new(haystack: &[u8], needle: &[u8]) -> Searcher {
+        if needle.len() == 0 {
+            Searcher::EmptyNeedle {
+                pos: 0,
+                done: false
+            }
         // FIXME: Tune this.
         // FIXME(#16715): This unsigned integer addition will probably not
         // overflow because that would mean that the memory almost solely
         // consists of the needle. Needs #16715 to be formally fixed.
-        if needle.len() + 20 > haystack.len() {
+        } else if needle.len() + 20 > haystack.len() {
             Naive(NaiveSearcher::new())
         } else {
             let searcher = TwoWaySearcher::new(needle);
@@ -938,23 +948,37 @@ pub struct MatchIndices<'a> {
 #[unstable(feature = "core", reason = "type may be removed")]
 pub struct SplitStr<'a> {
     it: MatchIndices<'a>,
-    last_end: usize,
+    last_end: uint,
     finished: bool
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
 impl<'a> Iterator for MatchIndices<'a> {
-    type Item = (usize, usize);
+    type Item = (uint, uint);
 
     #[inline]
-    fn next(&mut self) -> Option<(usize, usize)> {
+    fn next(&mut self) -> Option<(uint, uint)> {
         match self.searcher {
             Naive(ref mut searcher)
                 => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes()),
             TwoWay(ref mut searcher)
                 => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), false),
             TwoWayLong(ref mut searcher)
-                => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true)
+                => searcher.next(self.haystack.as_bytes(), self.needle.as_bytes(), true),
+            Searcher::EmptyNeedle { ref mut pos, ref mut done } => {
+                if !*done {
+                    let r = Some((*pos, *pos));
+                    if *pos == self.haystack.len() {
+                        *done = true;
+                    } else {
+                        use char::CharExt;
+                        *pos += self.haystack.char_at(*pos).len_utf8();
+                    }
+                    r
+                } else {
+                    None
+                }
+            }
         }
     }
 }
@@ -994,7 +1018,7 @@ Section: Comparing strings
 fn eq_slice_(a: &str, b: &str) -> bool {
     // NOTE: In theory n should be libc::size_t and not usize, but libc is not available here
     #[allow(improper_ctypes)]
-    extern { fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; }
+    extern { fn memcmp(s1: *const i8, s2: *const i8, n: uint) -> i32; }
     a.len() == b.len() && unsafe {
         memcmp(a.as_ptr() as *const i8,
                b.as_ptr() as *const i8,
@@ -1051,7 +1075,7 @@ fn run_utf8_validation_iterator(iter: &mut slice::Iter<u8>)
         // ASCII characters are always valid, so only large
         // bytes need more examination.
         if first >= 128 {
-            let w = UTF8_CHAR_WIDTH[first as usize] as usize;
+            let w = UTF8_CHAR_WIDTH[first as uint] as uint;
             let second = next!();
             // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
             //        first  C2 80        last DF BF
@@ -1126,7 +1150,7 @@ pub struct CharRange {
     /// Current `char`
     pub ch: char,
     /// Index of the first byte of the next `char`
-    pub next: usize,
+    pub next: uint,
 }
 
 /// Mask of the value bits of a continuation byte
@@ -1211,10 +1235,10 @@ mod traits {
     /// // &s[3 .. 100];
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    impl ops::Index<ops::Range<usize>> for str {
+    impl ops::Index<ops::Range<uint>> for str {
         type Output = str;
         #[inline]
-        fn index(&self, index: &ops::Range<usize>) -> &str {
+        fn index(&self, index: &ops::Range<uint>) -> &str {
             // is_char_boundary checks that the index is in [0, .len()]
             if index.start <= index.end &&
                self.is_char_boundary(index.start) &&
@@ -1234,10 +1258,10 @@ mod traits {
     /// Panics when `end` does not point to a valid character, or is
     /// out of bounds.
     #[stable(feature = "rust1", since = "1.0.0")]
-    impl ops::Index<ops::RangeTo<usize>> for str {
+    impl ops::Index<ops::RangeTo<uint>> for str {
         type Output = str;
         #[inline]
-        fn index(&self, index: &ops::RangeTo<usize>) -> &str {
+        fn index(&self, index: &ops::RangeTo<uint>) -> &str {
             // is_char_boundary checks that the index is in [0, .len()]
             if self.is_char_boundary(index.end) {
                 unsafe { self.slice_unchecked(0, index.end) }
@@ -1254,10 +1278,10 @@ mod traits {
     /// Panics when `begin` does not point to a valid character, or is
     /// out of bounds.
     #[stable(feature = "rust1", since = "1.0.0")]
-    impl ops::Index<ops::RangeFrom<usize>> for str {
+    impl ops::Index<ops::RangeFrom<uint>> for str {
         type Output = str;
         #[inline]
-        fn index(&self, index: &ops::RangeFrom<usize>) -> &str {
+        fn index(&self, index: &ops::RangeFrom<uint>) -> &str {
             // is_char_boundary checks that the index is in [0, .len()]
             if self.is_char_boundary(index.start) {
                 unsafe { self.slice_unchecked(index.start, self.len()) }
@@ -1328,46 +1352,49 @@ pub trait StrExt {
     // NB there are no docs here are they're all located on the StrExt trait in
     // libcollections, not here.
 
-    fn contains(&self, pat: &str) -> bool;
-    fn contains_char<P: CharEq>(&self, pat: P) -> bool;
+    fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
+    fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
     fn chars<'a>(&'a self) -> Chars<'a>;
     fn bytes<'a>(&'a self) -> Bytes<'a>;
     fn char_indices<'a>(&'a self) -> CharIndices<'a>;
     fn split<'a, P: CharEq>(&'a self, pat: P) -> Split<'a, P>;
-    fn splitn<'a, P: CharEq>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
+    fn splitn<'a, P: CharEq>(&'a self, count: uint, pat: P) -> SplitN<'a, P>;
     fn split_terminator<'a, P: CharEq>(&'a self, pat: P) -> SplitTerminator<'a, P>;
-    fn rsplitn<'a, P: CharEq>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>;
+    fn rsplitn<'a, P: CharEq>(&'a self, count: uint, pat: P) -> RSplitN<'a, P>;
     fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a>;
     fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a>;
     fn lines<'a>(&'a self) -> Lines<'a>;
     fn lines_any<'a>(&'a self) -> LinesAny<'a>;
-    fn char_len(&self) -> usize;
-    fn slice_chars<'a>(&'a self, begin: usize, end: usize) -> &'a str;
-    unsafe fn slice_unchecked<'a>(&'a self, begin: usize, end: usize) -> &'a str;
+    fn char_len(&self) -> uint;
+    fn slice_chars<'a>(&'a self, begin: uint, end: uint) -> &'a str;
+    unsafe fn slice_unchecked<'a>(&'a self, begin: uint, end: uint) -> &'a str;
     fn starts_with(&self, pat: &str) -> bool;
     fn ends_with(&self, pat: &str) -> bool;
-    fn trim_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
-    fn trim_left_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
-    fn trim_right_matches<'a, P: CharEq>(&'a self, pat: P) -> &'a str;
-    fn is_char_boundary(&self, index: usize) -> bool;
-    fn char_range_at(&self, start: usize) -> CharRange;
-    fn char_range_at_reverse(&self, start: usize) -> CharRange;
-    fn char_at(&self, i: usize) -> char;
-    fn char_at_reverse(&self, i: usize) -> char;
+    fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
+        where P::Matcher: DoubleEndedMatcher<'a>;
+    fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
+    fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
+        where P::Matcher: ReverseMatcher<'a>;
+    fn is_char_boundary(&self, index: uint) -> bool;
+    fn char_range_at(&self, start: uint) -> CharRange;
+    fn char_range_at_reverse(&self, start: uint) -> CharRange;
+    fn char_at(&self, i: uint) -> char;
+    fn char_at_reverse(&self, i: uint) -> char;
     fn as_bytes<'a>(&'a self) -> &'a [u8];
-    fn find<P: CharEq>(&self, pat: P) -> Option<usize>;
-    fn rfind<P: CharEq>(&self, pat: P) -> Option<usize>;
-    fn find_str(&self, pat: &str) -> Option<usize>;
+    fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>;
+    fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>
+        where P::Matcher: ReverseMatcher<'a>;
+    fn find_str(&self, pat: &str) -> Option<uint>;
     fn slice_shift_char<'a>(&'a self) -> Option<(char, &'a str)>;
-    fn subslice_offset(&self, inner: &str) -> usize;
+    fn subslice_offset(&self, inner: &str) -> uint;
     fn as_ptr(&self) -> *const u8;
-    fn len(&self) -> usize;
+    fn len(&self) -> uint;
     fn is_empty(&self) -> bool;
     fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
 }
 
 #[inline(never)]
-fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
+fn slice_error_fail(s: &str, begin: uint, end: uint) -> ! {
     assert!(begin <= end);
     panic!("index {} and/or {} in `{}` do not lie on character boundary",
           begin, end, s);
@@ -1375,13 +1402,13 @@ fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
 
 impl StrExt for str {
     #[inline]
-    fn contains(&self, needle: &str) -> bool {
-        self.find_str(needle).is_some()
+    fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
+        pat.is_contained_in(self)
     }
 
     #[inline]
-    fn contains_char<P: CharEq>(&self, pat: P) -> bool {
-        self.find(pat).is_some()
+    fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
+        pat.is_contained_in(self)
     }
 
     #[inline]
@@ -1411,7 +1438,7 @@ impl StrExt for str {
     }
 
     #[inline]
-    fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> {
+    fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
         SplitN(CharSplitsN {
             iter: self.split(pat).0,
             count: count,
@@ -1428,7 +1455,7 @@ impl StrExt for str {
     }
 
     #[inline]
-    fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> {
+    fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
         RSplitN(CharSplitsN {
             iter: self.split(pat).0,
             count: count,
@@ -1438,7 +1465,6 @@ impl StrExt for str {
 
     #[inline]
     fn match_indices<'a>(&'a self, sep: &'a str) -> MatchIndices<'a> {
-        assert!(!sep.is_empty());
         MatchIndices {
             haystack: self,
             needle: sep,
@@ -1472,9 +1498,9 @@ impl StrExt for str {
     }
 
     #[inline]
-    fn char_len(&self) -> usize { self.chars().count() }
+    fn char_len(&self) -> uint { self.chars().count() }
 
-    fn slice_chars(&self, begin: usize, end: usize) -> &str {
+    fn slice_chars(&self, begin: uint, end: uint) -> &str {
         assert!(begin <= end);
         let mut count = 0;
         let mut begin_byte = None;
@@ -1498,9 +1524,9 @@ impl StrExt for str {
     }
 
     #[inline]
-    unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
+    unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
         mem::transmute(Slice {
-            data: self.as_ptr().offset(begin as isize),
+            data: self.as_ptr().offset(begin as int),
             len: end - begin,
         })
     }
@@ -1518,41 +1544,71 @@ impl StrExt for str {
     }
 
     #[inline]
-    fn trim_matches<P: CharEq>(&self, mut pat: P) -> &str {
-        let cur = match self.find(|c: char| !pat.matches(c)) {
-            None => "",
-            Some(i) => unsafe { self.slice_unchecked(i, self.len()) }
-        };
-        match cur.rfind(|c: char| !pat.matches(c)) {
-            None => "",
-            Some(i) => {
-                let right = cur.char_range_at(i).next;
-                unsafe { cur.slice_unchecked(0, right) }
+    fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
+    where P::Matcher: DoubleEndedMatcher<'a> {
+        let mut i = 0;
+        let mut matcher = pat.into_matcher(self);
+        let mut possible_end_match = None;
+        while let Some((a, b)) = Matcher::next(&mut matcher) {
+            if a == i {
+                i = b;
+            } else {
+                possible_end_match = Some((a, b));
+                break;
+            }
+        }
+        let mut j = self.len();
+        while let Some((a, b)) = ReverseMatcher::next_back(&mut matcher)
+                .or_else(|| possible_end_match.take()) {
+            if b == j {
+                j = a;
+            } else {
+                break;
             }
         }
+        unsafe {
+            // Matcher is known to return valid indices
+            self.slice_unchecked(i, j)
+        }
     }
 
     #[inline]
-    fn trim_left_matches<P: CharEq>(&self, mut pat: P) -> &str {
-        match self.find(|c: char| !pat.matches(c)) {
-            None => "",
-            Some(first) => unsafe { self.slice_unchecked(first, self.len()) }
+    fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &str {
+        let mut i = 0;
+        let mut matcher = pat.into_matcher(self);
+        while let Some((a, b)) = Matcher::next(&mut matcher) {
+            if a == i {
+                i = b;
+            } else {
+                break;
+            }
+        }
+        unsafe {
+            // Matcher is known to return valid indices
+            self.slice_unchecked(i, self.len())
         }
     }
 
     #[inline]
-    fn trim_right_matches<P: CharEq>(&self, mut pat: P) -> &str {
-        match self.rfind(|c: char| !pat.matches(c)) {
-            None => "",
-            Some(last) => {
-                let next = self.char_range_at(last).next;
-                unsafe { self.slice_unchecked(0, next) }
+    fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &str
+    where P::Matcher: ReverseMatcher<'a> {
+        let mut i = self.len();
+        let mut matcher = pat.into_matcher(self);
+        while let Some((a, b)) = ReverseMatcher::next_back(&mut matcher) {
+            if b == i {
+                i = a;
+            } else {
+                break;
             }
         }
+        unsafe {
+            // Matcher is known to return valid indices
+            self.slice_unchecked(0, i)
+        }
     }
 
     #[inline]
-    fn is_char_boundary(&self, index: usize) -> bool {
+    fn is_char_boundary(&self, index: uint) -> bool {
         if index == self.len() { return true; }
         match self.as_bytes().get(index) {
             None => false,
@@ -1561,13 +1617,13 @@ impl StrExt for str {
     }
 
     #[inline]
-    fn char_range_at(&self, i: usize) -> CharRange {
+    fn char_range_at(&self, i: uint) -> CharRange {
         let (c, n) = char_range_at_raw(self.as_bytes(), i);
         CharRange { ch: unsafe { mem::transmute(c) }, next: n }
     }
 
     #[inline]
-    fn char_range_at_reverse(&self, start: usize) -> CharRange {
+    fn char_range_at_reverse(&self, start: uint) -> CharRange {
         let mut prev = start;
 
         prev = prev.saturating_sub(1);
@@ -1576,14 +1632,14 @@ impl StrExt for str {
         }
 
         // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
-        fn multibyte_char_range_at_reverse(s: &str, mut i: usize) -> CharRange {
+        fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
             // while there is a previous byte == 10......
             while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 {
                 i -= 1;
             }
 
             let mut val = s.as_bytes()[i] as u32;
-            let w = UTF8_CHAR_WIDTH[val as usize] as usize;
+            let w = UTF8_CHAR_WIDTH[val as uint] as uint;
             assert!((w != 0));
 
             val = utf8_first_byte!(val, w);
@@ -1598,12 +1654,12 @@ impl StrExt for str {
     }
 
     #[inline]
-    fn char_at(&self, i: usize) -> char {
+    fn char_at(&self, i: uint) -> char {
         self.char_range_at(i).ch
     }
 
     #[inline]
-    fn char_at_reverse(&self, i: usize) -> char {
+    fn char_at_reverse(&self, i: uint) -> char {
         self.char_range_at_reverse(i).ch
     }
 
@@ -1612,29 +1668,16 @@ impl StrExt for str {
         unsafe { mem::transmute(self) }
     }
 
-    fn find<P: CharEq>(&self, mut pat: P) -> Option<usize> {
-        if pat.only_ascii() {
-            self.bytes().position(|b| pat.matches(b as char))
-        } else {
-            for (index, c) in self.char_indices() {
-                if pat.matches(c) { return Some(index); }
-            }
-            None
-        }
+    fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint> {
+        Matcher::next(&mut pat.into_matcher(self)).map(|(i, _)| i)
     }
 
-    fn rfind<P: CharEq>(&self, mut pat: P) -> Option<usize> {
-        if pat.only_ascii() {
-            self.bytes().rposition(|b| pat.matches(b as char))
-        } else {
-            for (index, c) in self.char_indices().rev() {
-                if pat.matches(c) { return Some(index); }
-            }
-            None
-        }
+    fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<uint>
+    where P::Matcher: ReverseMatcher<'a> {
+        ReverseMatcher::next_back(&mut pat.into_matcher(self)).map(|(i, _)| i)
     }
 
-    fn find_str(&self, needle: &str) -> Option<usize> {
+    fn find_str(&self, needle: &str) -> Option<uint> {
         if needle.is_empty() {
             Some(0)
         } else {
@@ -1655,10 +1698,10 @@ impl StrExt for str {
         }
     }
 
-    fn subslice_offset(&self, inner: &str) -> usize {
-        let a_start = self.as_ptr() as usize;
+    fn subslice_offset(&self, inner: &str) -> uint {
+        let a_start = self.as_ptr() as uint;
         let a_end = a_start + self.len();
-        let b_start = inner.as_ptr() as usize;
+        let b_start = inner.as_ptr() as uint;
         let b_end = b_start + inner.len();
 
         assert!(a_start <= b_start);
@@ -1672,7 +1715,7 @@ impl StrExt for str {
     }
 
     #[inline]
-    fn len(&self) -> usize { self.repr().len }
+    fn len(&self) -> uint { self.repr().len }
 
     #[inline]
     fn is_empty(&self) -> bool { self.len() == 0 }
@@ -1685,15 +1728,15 @@ impl StrExt for str {
 /// index of the next code point.
 #[inline]
 #[unstable(feature = "core")]
-pub fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
+pub fn char_range_at_raw(bytes: &[u8], i: uint) -> (u32, usize) {
     if bytes[i] < 128u8 {
         return (bytes[i] as u32, i + 1);
     }
 
     // Multibyte case is a fn to allow char_range_at to inline cleanly
-    fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
+    fn multibyte_char_range_at(bytes: &[u8], i: uint) -> (u32, usize) {
         let mut val = bytes[i] as u32;
-        let w = UTF8_CHAR_WIDTH[val as usize] as usize;
+        let w = UTF8_CHAR_WIDTH[val as uint] as uint;
         assert!((w != 0));
 
         val = utf8_first_byte!(val, w);
@@ -1720,7 +1763,7 @@ impl<'a> Iterator for Lines<'a> {
     #[inline]
     fn next(&mut self) -> Option<&'a str> { self.inner.next() }
     #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
+    fn size_hint(&self) -> (uint, Option<uint>) { self.inner.size_hint() }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -1736,7 +1779,7 @@ impl<'a> Iterator for LinesAny<'a> {
     #[inline]
     fn next(&mut self) -> Option<&'a str> { self.inner.next() }
     #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) { self.inner.size_hint() }
+    fn size_hint(&self) -> (uint, Option<uint>) { self.inner.size_hint() }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs
new file mode 100644
index 00000000000..13b0d1df45e
--- /dev/null
+++ b/src/libcore/str/pattern.rs
@@ -0,0 +1,113 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![allow(missing_docs)]
+
+use prelude::*;
+use super::CharEq;
+
+// Pattern
+
+pub trait Pattern<'a>: Sized {
+    type Matcher: Matcher<'a>;
+    fn into_matcher(self, haystack: &'a str) -> Self::Matcher;
+
+    #[inline]
+    fn is_contained_in(self, haystack: &'a str) -> bool {
+        Matcher::next(&mut self.into_matcher(haystack)).is_some()
+    }
+}
+
+// Matcher
+
+pub unsafe trait Matcher<'a> {
+    fn haystack(&self) -> &'a str;
+    fn next(&mut self) -> Option<(usize, usize)>;
+}
+
+pub unsafe trait ReverseMatcher<'a>: Matcher<'a> {
+    fn next_back(&mut self) -> Option<(usize, usize)>;
+}
+
+pub trait DoubleEndedMatcher<'a>: ReverseMatcher<'a> {}
+
+// Impl for CharEq
+
+struct CharEqMatcher<'a, C>(C, &'a str, super::CharIndices<'a>);
+
+impl<'a, C: CharEq> Pattern<'a> for C {
+    type Matcher = CharEqMatcher<'a, C>;
+
+    #[inline]
+    fn into_matcher(self, haystack: &'a str) -> CharEqMatcher<'a, C> {
+        CharEqMatcher(self, haystack, haystack.char_indices())
+    }
+}
+
+unsafe impl<'a, C: CharEq> Matcher<'a> for CharEqMatcher<'a, C> {
+    #[inline]
+    fn haystack(&self) -> &'a str {
+        self.1
+    }
+
+    #[inline]
+    fn next(&mut self) -> Option<(usize, usize)> {
+        while let Some((i, c)) = self.2.next() {
+            if self.0.matches(c) {
+                return Some((i, i + c.len_utf8()));
+            }
+        }
+        None
+    }
+}
+
+unsafe impl<'a, C: CharEq> ReverseMatcher<'a> for CharEqMatcher<'a, C> {
+    #[inline]
+    fn next_back(&mut self) -> Option<(usize, usize)> {
+        while let Some((i, c)) = self.2.next_back() {
+            if self.0.matches(c) {
+                return Some((i, i + c.len_utf8()));
+            }
+        }
+        None
+    }
+}
+
+impl<'a, C: CharEq> DoubleEndedMatcher<'a> for CharEqMatcher<'a, C> {}
+
+// Impl for &str
+
+struct StrMatcher<'a>(super::MatchIndices<'a>);
+
+impl<'a> Pattern<'a> for &'a str {
+    type Matcher = StrMatcher<'a>;
+
+    #[inline]
+    fn into_matcher(self, haystack: &'a str) -> StrMatcher<'a> {
+        let mi = super::MatchIndices {
+            haystack: haystack,
+            needle: self,
+            searcher: super::Searcher::new(haystack.as_bytes(), self.as_bytes())
+        };
+        StrMatcher(mi)
+    }
+}
+
+unsafe impl<'a> Matcher<'a> for StrMatcher<'a>  {
+    #[inline]
+    fn haystack(&self) -> &'a str {
+        self.0.haystack
+    }
+
+    #[inline]
+    fn next(&mut self) -> Option<(usize, usize)> {
+        self.0.next()
+    }
+}
diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs
index 308c5282a92..ddbec47eeff 100644
--- a/src/libcoretest/str.rs
+++ b/src/libcoretest/str.rs
@@ -9,6 +9,13 @@
 // except according to those terms.
 
 #[test]
+fn test_empty_match_indices() {
+    let data = "aä中!";
+    let vec: Vec<_> = data.match_indices("").collect();
+    assert_eq!(vec, vec![(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]);
+}
+
+#[test]
 fn test_bool_from_str() {
     assert_eq!("true".parse().ok(), Some(true));
     assert_eq!("false".parse().ok(), Some(false));
author	Marvin Löbel <loebel.marvin@gmail.com>	2014-12-30 21:54:17 +0100
committer	Marvin Löbel <loebel.marvin@gmail.com>	2015-02-20 00:32:59 +0100
commit	54f0bead8158eaf948c93d1cae93b60978937417 (patch)
tree	f00cfcfa18db7183e05d3d441d0bcaf1489b79c0
parent	d68eb3d24843d2e269989563d45ceda920391fe0 (diff)
download	rust-54f0bead8158eaf948c93d1cae93b60978937417.tar.gz rust-54f0bead8158eaf948c93d1cae93b60978937417.zip