std::str: Use iterators instead of while loops for CharSplitIterator

Embed an iterator in the CharSplitIterator struct, and combine that with the former bool `only_ascii`; so use an enum instead.
author: blake2-ppc <blake2-ppc> 2013-08-19 15:34:48 +0200
committer: blake2-ppc <blake2-ppc> 2013-08-19 16:11:45 +0200
commit: 8fe83028870ac6ac48e99a38d2992bedc26ec0d7 (patch)
tree: 821bd65530ba050829b9e7da1abd79bef75d7f4a /src
parent: 30ab96b27229000d3754e7dee64fc431b5105150 (diff)
download: rust-8fe83028870ac6ac48e99a38d2992bedc26ec0d7.tar.gz
rust-8fe83028870ac6ac48e99a38d2992bedc26ec0d7.zip
1 files changed, 45 insertions, 33 deletions
diff --git a/src/libstd/str.rs b/src/libstd/str.rs
index 7fde1c9f03c..d8f723d9c78 100644
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
@@ -23,7 +23,7 @@ use clone::Clone;
 use container::{Container, Mutable};
 use iter::Times;
 use iterator::{Iterator, FromIterator, Extendable};
-use iterator::{Filter, AdditiveIterator, Map};
+use iterator::{Filter, AdditiveIterator, Map, Enumerate};
 use iterator::{Invert, DoubleEndedIterator};
 use libc;
 use num::{Saturating, Zero};
@@ -359,9 +359,18 @@ pub type ByteIterator<'self> =
 /// Use with the `std::iterator` module.
 pub type ByteRevIterator<'self> = Invert<ByteIterator<'self>>;
 
+/// An iterator over byte index and either &u8 or char
+#[deriving(Clone)]
+enum OffsetIterator<'self> {
+    // use ByteIterator here when it can be cloned
+    ByteOffset(Enumerate<vec::VecIterator<'self, u8>>),
+    CharOffset(CharOffsetIterator<'self>),
+}
+
 /// An iterator over the substrings of a string, separated by `sep`.
 #[deriving(Clone)]
 pub struct CharSplitIterator<'self,Sep> {
+    priv iter: OffsetIterator<'self>,
     priv string: &'self str,
     priv position: uint,
     priv sep: Sep,
@@ -370,7 +379,6 @@ pub struct CharSplitIterator<'self,Sep> {
     /// Whether an empty string at the end is allowed
     priv allow_trailing_empty: bool,
     priv finished: bool,
-    priv only_ascii: bool
 }
 
 /// An iterator over the words of a string, separated by an sequence of whitespace
@@ -386,39 +394,39 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep>
     fn next(&mut self) -> Option<&'self str> {
         if self.finished { return None }
 
-        let l = self.string.len();
         let start = self.position;
-
-        if self.only_ascii {
-            // this gives a *huge* speed up for splitting on ASCII
-            // characters (e.g. '\n' or ' ')
-            while self.position < l && self.count > 0 {
-                let byte = self.string[self.position];
-
-                if self.sep.matches(byte as char) {
-                    let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
-                    self.position += 1;
-                    self.count -= 1;
-                    return Some(slice);
-                }
-                self.position += 1;
-            }
-        } else {
-            while self.position < l && self.count > 0 {
-                let CharRange {ch, next} = self.string.char_range_at(self.position);
-
-                if self.sep.matches(ch) {
-                    let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
-                    self.position = next;
-                    self.count -= 1;
-                    return Some(slice);
-                }
-                self.position = next;
+        let len = self.string.len();
+
+        if self.count > 0 {
+            match self.iter {
+                // this gives a *huge* speed up for splitting on ASCII
+                // characters (e.g. '\n' or ' ')
+                ByteOffset(ref mut iter) =>
+                    for (idx, &byte) in *iter {
+                        if self.sep.matches(byte as char) {
+                            self.position = idx + 1;
+                            self.count -= 1;
+                            return Some(unsafe {
+                                raw::slice_bytes(self.string, start, idx)
+                            })
+                        }
+                    },
+                CharOffset(ref mut iter) =>
+                    for (idx, ch) in *iter {
+                        if self.sep.matches(ch) {
+                            // skip over the separator
+                            self.position = self.string.char_range_at(idx).next;
+                            self.count -= 1;
+                            return Some(unsafe {
+                                raw::slice_bytes(self.string, start, idx)
+                            })
+                        }
+                    },
             }
         }
         self.finished = true;
-        if self.allow_trailing_empty || start < l {
-            Some(unsafe { raw::slice_bytes(self.string, start, l) })
+        if self.allow_trailing_empty || start < len {
+            Some(unsafe { raw::slice_bytes(self.string, start, len) })
         } else {
             None
         }
@@ -1327,15 +1335,19 @@ impl<'self> StrSlice<'self> for &'self str {
     #[inline]
     fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
         -> CharSplitIterator<'self, Sep> {
-        let only_ascii = sep.only_ascii();
+        let iter = if sep.only_ascii() {
+            ByteOffset(self.as_bytes().iter().enumerate())
+        } else {
+            CharOffset(self.char_offset_iter())
+        };
         CharSplitIterator {
+            iter: iter,
             string: *self,
             position: 0,
             sep: sep,
             count: count,
             allow_trailing_empty: allow_trailing_empty,
             finished: false,
-            only_ascii: only_ascii
         }
     }
author	blake2-ppc <blake2-ppc>	2013-08-19 15:34:48 +0200
committer	blake2-ppc <blake2-ppc>	2013-08-19 16:11:45 +0200
commit	8fe83028870ac6ac48e99a38d2992bedc26ec0d7 (patch)
tree	821bd65530ba050829b9e7da1abd79bef75d7f4a /src
parent	30ab96b27229000d3754e7dee64fc431b5105150 (diff)
download	rust-8fe83028870ac6ac48e99a38d2992bedc26ec0d7.tar.gz rust-8fe83028870ac6ac48e99a38d2992bedc26ec0d7.zip