about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPatrick Walton <pcwalton@mimiga.net>2011-12-15 16:56:33 -0800
committerPatrick Walton <pcwalton@mimiga.net>2011-12-15 17:39:53 -0800
commitfd1dd76977f808ec40796d6ca66e4bf98de6de9c (patch)
tree8cea66fdfdd74af92cd74fb472f82854af538c01
parent1f8f6054d283f7a2d5e5277f422463bfaecbe139 (diff)
downloadrust-fd1dd76977f808ec40796d6ca66e4bf98de6de9c.tar.gz
rust-fd1dd76977f808ec40796d6ca66e4bf98de6de9c.zip
stdlib: Add a str::split_str() to split on a delimiter string of any length
-rw-r--r--src/libcore/str.rs46
-rw-r--r--src/test/stdtest/str.rs14
2 files changed, 55 insertions, 5 deletions
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 31c3516b862..8a532fcc753 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -6,11 +6,11 @@ String manipulation.
 
 export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len,
        byte_len_range, index,
-       rindex, find, starts_with, ends_with, substr, slice, split, concat,
-       connect, to_upper, replace, char_slice, trim_left, trim_right, trim,
-       unshift_char, shift_char, pop_char, push_char, is_utf8, from_chars,
-       to_chars, char_len, char_len_range, char_at, bytes, is_ascii,
-       shift_byte, pop_byte,
+       rindex, find, starts_with, ends_with, substr, slice, split, split_str,
+       concat, connect, to_upper, replace, char_slice, trim_left, trim_right,
+       trim, unshift_char, shift_char, pop_char, push_char, is_utf8,
+       from_chars, to_chars, char_len, char_len_range, char_at, bytes,
+       is_ascii, shift_byte, pop_byte,
        unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at,
        str_from_cstr, sbuf, as_buf, push_byte, utf8_char_width, safe_slice,
        contains, iter_chars, loop_chars, loop_chars_sub,
@@ -745,6 +745,42 @@ fn split(s: str, sep: u8) -> [str] {
 }
 
 /*
+Function: split_str
+
+Splits a string at each occurrence of the given separator string. Empty
+leading fields are suppressed, and empty trailing fields are preserved.
+
+Returns:
+
+A vector containing all the strings between each occurrence of the separator.
+*/
+fn split_str(s: str, sep: str) -> [str] {
+    assert byte_len(sep) > 0u;
+    let v: [str] = [], accum = "", sep_match = 0u, leading = true;
+    for c: u8 in s {
+        // Did we match the entire separator?
+        if sep_match == byte_len(sep) {
+            if !leading { v += [accum]; }
+            accum = "";
+            sep_match = 0u;
+        }
+
+        if c == sep[sep_match] {
+            sep_match += 1u;
+        } else {
+            sep_match = 0u;
+            accum += unsafe_from_byte(c);
+            leading = false;
+        }
+    }
+
+    if byte_len(accum) > 0u { v += [accum]; }
+    if sep_match == byte_len(sep) { v += [""]; }
+
+    ret v;
+}
+
+/*
 Function: concat
 
 Concatenate a vector of strings
diff --git a/src/test/stdtest/str.rs b/src/test/stdtest/str.rs
index d540f32ea51..b5199048364 100644
--- a/src/test/stdtest/str.rs
+++ b/src/test/stdtest/str.rs
@@ -60,6 +60,20 @@ fn test_split() {
 }
 
 #[test]
+fn test_split_str() {
+    fn t(s: str, sep: str, i: int, k: str) {
+        let v = str::split_str(s, sep);
+        assert str::eq(v[i], k);
+    }
+    t("abc::hello::there", "::", 0, "abc");
+    t("abc::hello::there", "::", 1, "hello");
+    t("abc::hello::there", "::", 2, "there");
+    t("::hello::there", "::", 0, "hello");
+    t("hello::there::", "::", 2, "");
+    t("::hello::there::", "::", 2, "");
+}
+
+#[test]
 fn test_find() {
     fn t(haystack: str, needle: str, i: int) {
         let j: int = str::find(haystack, needle);