about summary refs log tree commit diff
path: root/src/libcore/str/pattern.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/libcore/str/pattern.rs')
-rw-r--r--src/libcore/str/pattern.rs257
1 files changed, 219 insertions, 38 deletions
diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs
index 077c4c8f7b4..2b77d877cf4 100644
--- a/src/libcore/str/pattern.rs
+++ b/src/libcore/str/pattern.rs
@@ -16,99 +16,280 @@ use super::CharEq;
 // Pattern
 
 pub trait Pattern<'a>: Sized {
-    type Matcher: Matcher<'a>;
-    fn into_matcher(self, haystack: &'a str) -> Self::Matcher;
+    type Searcher: Searcher<'a>;
+    fn into_matcher(self, haystack: &'a str) -> Self::Searcher;
 
     #[inline]
     fn is_contained_in(self, haystack: &'a str) -> bool {
-        Matcher::next(&mut self.into_matcher(haystack)).is_some()
+        self.into_matcher(haystack).next_match().is_some()
+    }
+
+    #[inline]
+    fn match_starts_at(self, haystack: &'a str, idx: usize) -> bool {
+        let mut matcher = self.into_matcher(haystack);
+        loop {
+            match matcher.next() {
+                SearchStep::Match(i, _) if i == idx => return true,
+                SearchStep::Match(i, _)
+                | SearchStep::Reject(i, _) if i >= idx => break,
+                SearchStep::Done => break,
+                _ => continue,
+            }
+        }
+        false
+    }
+
+    #[inline]
+    fn match_ends_at(self, haystack: &'a str, idx: usize) -> bool
+    where Self::Searcher: ReverseSearcher<'a> {
+        let mut matcher = self.into_matcher(haystack);
+        loop {
+            match matcher.next_back() {
+                SearchStep::Match(_, j) if idx == j => return true,
+                SearchStep::Match(_, j)
+                | SearchStep::Reject(_, j) if idx >= j => break,
+                SearchStep::Done => break,
+                _ => continue,
+            }
+        }
+        false
     }
 }
 
-// Matcher
+// Searcher
+
+pub enum SearchStep {
+    Match(usize, usize),
+    Reject(usize, usize),
+    Done
+}
 
-pub unsafe trait Matcher<'a> {
+pub unsafe trait Searcher<'a> {
     fn haystack(&self) -> &'a str;
-    fn next(&mut self) -> Option<(usize, usize)>;
+    fn next(&mut self) -> SearchStep;
+    #[inline]
+    fn next_match(&mut self) -> Option<(usize, usize)> {
+        loop {
+            match self.next() {
+                SearchStep::Match(a, b) => return Some((a, b)),
+                SearchStep::Done => return None,
+                _ => continue,
+            }
+        }
+    }
+    #[inline]
+    fn next_reject(&mut self) -> Option<(usize, usize)>{
+        loop {
+            match self.next() {
+                SearchStep::Reject(a, b) => return Some((a, b)),
+                SearchStep::Done => return None,
+                _ => continue,
+            }
+        }
+    }
 }
 
-pub unsafe trait ReverseMatcher<'a>: Matcher<'a> {
-    fn next_back(&mut self) -> Option<(usize, usize)>;
+pub unsafe trait ReverseSearcher<'a>: Searcher<'a> {
+    fn next_back(&mut self) -> SearchStep;
+    #[inline]
+    fn next_match_back(&mut self) -> Option<(usize, usize)>{
+        loop {
+            match self.next_back() {
+                SearchStep::Match(a, b) => return Some((a, b)),
+                SearchStep::Done => return None,
+                _ => continue,
+            }
+        }
+    }
+    #[inline]
+    fn next_reject_back(&mut self) -> Option<(usize, usize)>{
+        loop {
+            match self.next_back() {
+                SearchStep::Reject(a, b) => return Some((a, b)),
+                SearchStep::Done => return None,
+                _ => continue,
+            }
+        }
+    }
 }
 
-pub trait DoubleEndedMatcher<'a>: ReverseMatcher<'a> {}
+pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
 
 // Impl for CharEq
 
-struct CharEqMatcher<'a, C>(C, &'a str, super::CharIndices<'a>);
+pub struct CharEqSearcher<'a, C> {
+    char_eq: C,
+    haystack: &'a str,
+    char_indices: super::CharIndices<'a>,
+    #[allow(dead_code)]
+    ascii_only: bool,
+}
 
 impl<'a, C: CharEq> Pattern<'a> for C {
-    type Matcher = CharEqMatcher<'a, C>;
+    type Searcher = CharEqSearcher<'a, C>;
 
     #[inline]
-    fn into_matcher(self, haystack: &'a str) -> CharEqMatcher<'a, C> {
-        CharEqMatcher(self, haystack, haystack.char_indices())
+    fn into_matcher(self, haystack: &'a str) -> CharEqSearcher<'a, C> {
+        CharEqSearcher {
+            ascii_only: self.only_ascii(),
+            haystack: haystack,
+            char_eq: self,
+            char_indices: haystack.char_indices(),
+        }
     }
 }
 
-unsafe impl<'a, C: CharEq> Matcher<'a> for CharEqMatcher<'a, C> {
+unsafe impl<'a, C: CharEq> Searcher<'a> for CharEqSearcher<'a, C> {
     #[inline]
     fn haystack(&self) -> &'a str {
-        self.1
+        self.haystack
     }
 
     #[inline]
-    fn next(&mut self) -> Option<(usize, usize)> {
-        while let Some((i, c)) = self.2.next() {
-            if self.0.matches(c) {
-                return Some((i, i + c.len_utf8()));
+    fn next(&mut self) -> SearchStep {
+        let s = &mut self.char_indices;
+        // Compare lengths of the internal byte slice iterator
+        // to find length of current char
+        let (pre_len, _) = s.iter.iter.size_hint();
+        if let Some((i, c)) = s.next() {
+            let (len, _) = s.iter.iter.size_hint();
+            let char_len = pre_len - len;
+            if self.char_eq.matches(c) {
+                return SearchStep::Match(i, i + char_len);
+            } else {
+                return SearchStep::Reject(i, i + char_len);
             }
         }
-        None
+        SearchStep::Done
     }
 }
 
-unsafe impl<'a, C: CharEq> ReverseMatcher<'a> for CharEqMatcher<'a, C> {
+unsafe impl<'a, C: CharEq> ReverseSearcher<'a> for CharEqSearcher<'a, C> {
     #[inline]
-    fn next_back(&mut self) -> Option<(usize, usize)> {
-        while let Some((i, c)) = self.2.next_back() {
-            if self.0.matches(c) {
-                return Some((i, i + c.len_utf8()));
+    fn next_back(&mut self) -> SearchStep {
+        let s = &mut self.char_indices;
+        // Compare lengths of the internal byte slice iterator
+        // to find length of current char
+        let (pre_len, _) = s.iter.iter.size_hint();
+        if let Some((i, c)) = s.next_back() {
+            let (len, _) = s.iter.iter.size_hint();
+            let char_len = pre_len - len;
+            if self.char_eq.matches(c) {
+                return SearchStep::Match(i, i + char_len);
+            } else {
+                return SearchStep::Reject(i, i + char_len);
             }
         }
-        None
+        SearchStep::Done
     }
 }
 
-impl<'a, C: CharEq> DoubleEndedMatcher<'a> for CharEqMatcher<'a, C> {}
+impl<'a, C: CharEq> DoubleEndedSearcher<'a> for CharEqSearcher<'a, C> {}
 
 // Impl for &str
 
+// TODO: Optimize the naive implementation here
+
 #[derive(Clone)]
-pub struct StrMatcher<'a, 'b>(super::OldMatchIndices<'a, 'b>);
+pub struct StrSearcher<'a, 'b> {
+    haystack: &'a str,
+    needle: &'b str,
+    start: usize,
+    end: usize,
+    done: bool,
+}
 
 impl<'a, 'b> Pattern<'a> for &'b str {
-    type Matcher = StrMatcher<'a, 'b>;
+    type Searcher = StrSearcher<'a, 'b>;
 
     #[inline]
-    fn into_matcher(self, haystack: &'a str) -> StrMatcher<'a, 'b> {
-        let mi = super::OldMatchIndices {
+    fn into_matcher(self, haystack: &'a str) -> StrSearcher<'a, 'b> {
+        StrSearcher {
             haystack: haystack,
             needle: self,
-            searcher: super::Searcher::new(haystack.as_bytes(), self.as_bytes())
-        };
-        StrMatcher(mi)
+            start: 0,
+            end: haystack.len(),
+            done: false,
+        }
     }
 }
 
-unsafe impl<'a, 'b> Matcher<'a> for StrMatcher<'a, 'b>  {
+unsafe impl<'a, 'b> Searcher<'a> for StrSearcher<'a, 'b>  {
     #[inline]
     fn haystack(&self) -> &'a str {
-        self.0.haystack
+        self.haystack
     }
 
     #[inline]
-    fn next(&mut self) -> Option<(usize, usize)> {
-        self.0.next()
+    fn next(&mut self) -> SearchStep {
+        str_search_step(self,
+        |m: &mut StrSearcher| {
+            // Forward step for empty needle
+            let current_start = m.start;
+            if !m.done {
+                m.start = m.haystack.char_range_at(current_start).next;
+            }
+            SearchStep::Match(current_start, current_start)
+        },
+        |m: &mut StrSearcher| {
+            // Forward step for nonempty needle
+            let possible_match = &m.haystack[m.start .. m.start + m.needle.len()];
+            let current_start = m.start;
+            if possible_match == m.needle {
+                m.start += m.needle.len();
+                SearchStep::Match(current_start, m.start)
+            } else {
+                m.start += possible_match.chars().next().unwrap().len_utf8();
+                SearchStep::Reject(current_start, m.start)
+            }
+        })
+    }
+}
+
+unsafe impl<'a, 'b> ReverseSearcher<'a> for StrSearcher<'a, 'b>  {
+    #[inline]
+    fn next_back(&mut self) -> SearchStep {
+        str_search_step(self,
+        |m: &mut StrSearcher| {
+            // Backward step for empty needle
+            let current_end = m.end;
+            if !m.done {
+                m.end = m.haystack.char_range_at_reverse(current_end).next;
+            }
+            SearchStep::Match(current_end, current_end)
+        },
+        |m: &mut StrSearcher| {
+            // Backward step for nonempty needle
+            let possible_match = &m.haystack[m.end - m.needle.len() .. m.end];
+            let current_end = m.end;
+            if possible_match == m.needle {
+                m.end -= m.needle.len();
+                SearchStep::Match(m.end, current_end)
+            } else {
+                m.end -= possible_match.chars().rev().next().unwrap().len_utf8();
+                SearchStep::Reject(m.end, current_end)
+            }
+        })
+    }
+}
+
+fn str_search_step<F, G>(mut m: &mut StrSearcher, f: F, g: G) -> SearchStep
+where F: FnOnce(&mut StrSearcher) -> SearchStep,
+      G: FnOnce(&mut StrSearcher) -> SearchStep
+{
+    if m.done {
+        SearchStep::Done
+    } else if m.needle.len() == 0 && m.start <= m.end {
+        // Case for needle == ""
+        if m.start == m.end {
+            m.done = true;
+        }
+        f(&mut m)
+    } else if m.start + m.needle.len() <= m.end {
+        // Case for needle != ""
+        g(&mut m)
+    } else {
+        m.done = true;
+        SearchStep::Done
     }
 }