about summary refs log tree commit diff
diff options
context:
space:
mode:
authorKevin Cantu <me@kevincantu.org>2012-02-01 20:31:01 -0800
committerBrian Anderson <banderson@mozilla.com>2012-02-07 16:25:35 -0800
commita3f5626ad1b5bc47ceddfb0d600cf6fd8a6dad8c (patch)
tree9615505c4b94fd5e84128a40b21d5917e91a75bb
parent159aebc28bdd3e7667cb269d64dee844699dc3b0 (diff)
downloadrust-a3f5626ad1b5bc47ceddfb0d600cf6fd8a6dad8c.tar.gz
rust-a3f5626ad1b5bc47ceddfb0d600cf6fd8a6dad8c.zip
String split renaming:
* Renamed str::split -> str::split_byte
* Renamed str::splitn -> str::splitn_byte
* Renamed str::split_func -> str::split
* Renamed str::split_char -> str::split_char
* Renamed str::split_chars_iter -> str::split_char_iter
* Added u8::is_ascii
* Fixed the behavior of str::split_str, so that it matches split_chars
  and split (i.e. ["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", "."))
* Fixed str::split_byte and str::splitn_byte so that they handle
  splitting UTF-8 strings on a given UTF-8/ASCII byte and also handle ""
  as the others do
-rw-r--r--src/cargo/pgp.rs2
-rw-r--r--src/comp/back/link.rs5
-rw-r--r--src/comp/back/rpath.rs4
-rw-r--r--src/comp/metadata/cstore.rs2
-rw-r--r--src/compiletest/runtest.rs8
-rw-r--r--src/libcore/str.rs293
-rw-r--r--src/libcore/u8.rs3
-rw-r--r--src/libstd/fs.rs13
-rw-r--r--src/libstd/net.rs3
-rw-r--r--src/test/bench/sudoku.rs3
10 files changed, 192 insertions, 144 deletions
diff --git a/src/cargo/pgp.rs b/src/cargo/pgp.rs
index 928ff88b58d..b22a225e46b 100644
--- a/src/cargo/pgp.rs
+++ b/src/cargo/pgp.rs
@@ -94,7 +94,7 @@ fn verify(root: str, data: str, sig: str, keyfp: str) -> bool {
     let p = gpg(["--homedir", path, "--with-fingerprint", "--verify", sig,
                  data]);
     let res = "Primary key fingerprint: " + keyfp;
-    for line in str::split(p.err, '\n' as u8) {
+    for line in str::split_byte(p.err, '\n' as u8) {
         if line == res {
             ret true;
         }
diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs
index f2efb9cdc6c..68810fcd81b 100644
--- a/src/comp/back/link.rs
+++ b/src/comp/back/link.rs
@@ -443,7 +443,8 @@ fn build_link_meta(sess: session, c: ast::crate, output: str,
               none {
                 let name =
                     {
-                        let os = str::split(fs::basename(output), '.' as u8);
+                        let os = str::split_byte(
+                                   fs::basename(output), '.' as u8);
                         if (vec::len(os) < 2u) {
                             sess.fatal(#fmt("Output file name %s doesn't\
                               appear to have an extension", output));
@@ -578,7 +579,7 @@ fn link_binary(sess: session,
             } else { ret filename; }
         };
         fn rmext(filename: str) -> str {
-            let parts = str::split(filename, '.' as u8);
+            let parts = str::split_byte(filename, '.' as u8);
             vec::pop(parts);
             ret str::connect(parts, ".");
         }
diff --git a/src/comp/back/rpath.rs b/src/comp/back/rpath.rs
index a2030e10fab..27eb9933275 100644
--- a/src/comp/back/rpath.rs
+++ b/src/comp/back/rpath.rs
@@ -128,8 +128,8 @@ fn get_relative_to(abs1: fs::path, abs2: fs::path) -> fs::path {
            abs1, abs2);
     let normal1 = fs::normalize(abs1);
     let normal2 = fs::normalize(abs2);
-    let split1 = str::split(normal1, os_fs::path_sep as u8);
-    let split2 = str::split(normal2, os_fs::path_sep as u8);
+    let split1 = str::split_byte(normal1, os_fs::path_sep as u8);
+    let split2 = str::split_byte(normal2, os_fs::path_sep as u8);
     let len1 = vec::len(split1);
     let len2 = vec::len(split2);
     assert len1 > 0u;
diff --git a/src/comp/metadata/cstore.rs b/src/comp/metadata/cstore.rs
index 2201388897b..c6e44bfed05 100644
--- a/src/comp/metadata/cstore.rs
+++ b/src/comp/metadata/cstore.rs
@@ -120,7 +120,7 @@ fn get_used_libraries(cstore: cstore) -> [str] {
 }
 
 fn add_used_link_args(cstore: cstore, args: str) {
-    p(cstore).used_link_args += str::split(args, ' ' as u8);
+    p(cstore).used_link_args += str::split_byte(args, ' ' as u8);
 }
 
 fn get_used_link_args(cstore: cstore) -> [str] {
diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs
index a3920ccb184..375625fc794 100644
--- a/src/compiletest/runtest.rs
+++ b/src/compiletest/runtest.rs
@@ -199,7 +199,7 @@ fn check_error_patterns(props: test_props,
 
     let next_err_idx = 0u;
     let next_err_pat = props.error_patterns[next_err_idx];
-    for line: str in str::split(procres.stdout, '\n' as u8) {
+    for line: str in str::split_byte(procres.stdout, '\n' as u8) {
         if str::find(line, next_err_pat) > 0 {
             #debug("found error pattern %s", next_err_pat);
             next_err_idx += 1u;
@@ -246,7 +246,7 @@ fn check_expected_errors(expected_errors: [errors::expected_error],
     //    filename:line1:col1: line2:col2: *warning:* msg
     // where line1:col1: is the starting point, line2:col2:
     // is the ending point, and * represents ANSI color codes.
-    for line: str in str::split(procres.stdout, '\n' as u8) {
+    for line: str in str::split_byte(procres.stdout, '\n' as u8) {
         let was_expected = false;
         vec::iteri(expected_errors) {|i, ee|
             if !found_flags[i] {
@@ -349,7 +349,7 @@ fn split_maybe_args(argstr: option<str>) -> [str] {
     }
 
     alt argstr {
-      option::some(s) { rm_whitespace(str::split(s, ' ' as u8)) }
+      option::some(s) { rm_whitespace(str::split_byte(s, ' ' as u8)) }
       option::none { [] }
     }
 }
@@ -411,7 +411,7 @@ fn output_base_name(config: config, testfile: str) -> str {
     let base = config.build_base;
     let filename =
         {
-            let parts = str::split(fs::basename(testfile), '.' as u8);
+            let parts = str::split_byte(fs::basename(testfile), '.' as u8);
             parts = vec::slice(parts, 0u, vec::len(parts) - 1u);
             str::connect(parts, ".")
         };
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 88b9ff5d283..16cc0fddf2b 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -38,10 +38,10 @@ export
    chars,
    substr,
    slice,
+   split_byte,
+   splitn_byte,
    split,
-   splitn,
    split_str,
-   split_func,
    split_char,
    lines,
    lines_any,
@@ -63,8 +63,8 @@ export
    map,
    bytes_iter,
    chars_iter,
-   split_chars_iter,
-   splitn_chars_iter,
+   split_char_iter,
+   splitn_char_iter,
    words_iter,
    lines_iter,
 
@@ -397,8 +397,6 @@ fn bytes(s: str) -> [u8] unsafe {
 Function: chars
 
 Convert a string to a vector of characters
-
-FIXME: rename to 'chars'
 */
 fn chars(s: str) -> [char] {
     let buf: [char] = [];
@@ -446,108 +444,109 @@ fn slice(s: str, begin: uint, end: uint) -> str {
     from_chars(vec::slice(chars(s), begin, end))
 }
 
-/*
-Function: split
-
-Split a string at each occurance of a given separator
-
-Returns:
+// Function: split_byte
+//
+// Splits a string into substrings at each occurrence of a given byte
+//
+// The byte must be a valid UTF-8/ASCII byte
+fn split_byte(ss: str, sep: u8) -> [str] unsafe {
+    // still safe if we only split on an ASCII byte
+    assert u8::is_ascii(sep);
+
+    let vv = [];
+    let start = 0u, current = 0u;
+
+    str::bytes_iter(ss) {|cc|
+        if sep == cc {
+            vec::push(vv, str::unsafe::slice_bytes(ss, start, current));
+            start = current + 1u;
+        }
+        current += 1u;
+    }
 
-A vector containing all the strings between each occurance of the separator
+    vec::push(vv, str::unsafe::slice_bytes(ss, start, current));
+    ret vv;
+}
 
-FIXME: should be renamed to split_byte
-*/
-fn split(s: str, sep: u8) -> [str] {
-    let v: [str] = [];
-    let accum: str = "";
-    let ends_with_sep: bool = false;
-    for c: u8 in s {
-        if c == sep {
-            v += [accum];
-            accum = "";
-            ends_with_sep = true;
-        } else { accum += from_byte(c); ends_with_sep = false; }
+// Function: splitn_byte
+//
+// Splits a string into substrings at each occurrence of a given byte
+// up to 'count' times
+//
+// The byte must be a valid UTF-8/ASCII byte
+fn splitn_byte(ss: str, sep: u8, count: uint) -> [str] unsafe {
+    // still safe if we only split on an ASCII byte
+    assert u8::is_ascii(sep);
+
+    let vv = [];
+    let start = 0u, current = 0u, len = byte_len(ss);
+    let splits_done = 0u;
+
+    while splits_done < count && current < len {
+        if sep == ss[current] {
+            vec::push(vv, str::unsafe::slice_bytes(ss, start, current));
+            start = current + 1u;
+            splits_done += 1u;
+        }
+        current += 1u;
     }
-    if byte_len(accum) != 0u || ends_with_sep { v += [accum]; }
-    ret v;
+
+    vec::push(vv, str::unsafe::slice_bytes(ss, start, len));
+    ret vv;
 }
 
 /*
-Function: splitn
-
-Split a string at each occurance of a given separator up to count times.
+Function: split_str
 
-Returns:
+Splits a string into a vector of the substrings separated by a given string
 
-A vector containing all the strings between each occurance of the separator
+Note that this has recently been changed.  For example:
+>  assert ["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".")
 
-FIXME: rename to 'splitn_char'
+FIXME: Boyer-Moore variation
 */
-fn splitn(s: str, sep: u8, count: uint) -> [str] {
-    let v = [];
-    let accum = "";
-    let n = count;
-    let ends_with_sep: bool = false;
-    for c in s {
-        if n > 0u && c == sep {
-            n -= 1u;
-            v += [accum];
-            accum = "";
-            ends_with_sep = true;
-        } else { accum += from_byte(c); ends_with_sep = false; }
-    }
-    if byte_len(accum) != 0u || ends_with_sep { v += [accum]; }
-    ret v;
-}
-
-/*
-Function: split_str
+fn split_str(ss: str, sep: str) -> [str] unsafe {
+    // unsafe is justified: we are splitting
+    // UTF-8 with UTF-8, so the results will be OK
 
-Splits a string at each occurrence of the given separator string. Empty
-leading fields are suppressed, and empty trailing fields are preserved.
+    let sep_len = str::byte_len(sep);
+    assert sep_len > 0u;
+    let vv = [];
+    let start = 0u, start_match = 0u, current = 0u, matching = 0u;
 
-Returns:
+    str::bytes_iter(ss) {|cc|
+        if sep[matching] == cc {
+            matching += 1u;
+        } else {
+            start_match += 1u;
+        }
 
-A vector containing all the strings between each occurrence of the separator.
+        if matching == sep_len {
+            // found a separator
+            // push whatever is before it, including ""
+            vec::push(vv, str::unsafe::slice_bytes(ss, start, start_match));
 
-FIXME: should behave like split and split_char:
-         assert ["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".");
-*/
-fn split_str(s: str, sep: str) -> [str] {
-    assert byte_len(sep) > 0u;
-    let v: [str] = [], accum = [], sep_match = 0u, leading = true;
-    for c: u8 in s {
-        // Did we match the entire separator?
-        if sep_match == byte_len(sep) {
-            if !leading { vec::push(v, from_bytes(accum)); }
-            accum = [];
-            sep_match = 0u;
+            // reset cursors and counters
+            start = current + 1u;
+            start_match = current + 1u;
+            matching = 0u;
         }
 
-        if c == sep[sep_match] {
-            sep_match += 1u;
-        } else {
-            sep_match = 0u;
-            vec::push(accum, c);
-            leading = false;
-        }
+        current += 1u;
     }
 
-    if vec::len(accum) > 0u { vec::push(v, from_bytes(accum)); }
-    if sep_match == byte_len(sep) { vec::push(v, ""); }
-
-    ret v;
+    // whether we have a "", or something meaningful, push it
+    vec::push(vv, str::unsafe::slice_bytes(ss, start, current));
+    ret vv;
 }
 
 /*
-Function: split_func
+Function: split
 
-Splits a string into substrings using a function
+Splits a string into substrings using a character function
 (unicode safe)
-
-FIXME: rename to 'split'
 */
-fn split_func(ss: str, sepfn: fn(cc: char)->bool) -> [str] {
+fn split(ss: str, sepfn: fn(cc: char)->bool) -> [str] {
     let vv: [str] = [];
     let accum: str = "";
     let ends_with_sep: bool = false;
@@ -573,9 +572,11 @@ fn split_func(ss: str, sepfn: fn(cc: char)->bool) -> [str] {
 Function: split_char
 
 Splits a string into a vector of the substrings separated by a given character
+
+FIXME: also add  splitn_char
 */
 fn split_char(ss: str, cc: char) -> [str] {
-   split_func(ss, {|kk| kk == cc})
+   split(ss, {|kk| kk == cc})
 }
 
 /*
@@ -585,7 +586,7 @@ Splits a string into a vector of the substrings
 separated by LF ('\n')
 */
 fn lines(ss: str) -> [str] {
-    split_func(ss, {|cc| cc == '\n'})
+    split(ss, {|cc| cc == '\n'})
 }
 
 /*
@@ -605,7 +606,7 @@ Splits a string into a vector of the substrings
 separated by whitespace
 */
 fn words(ss: str) -> [str] {
-    ret vec::filter( split_func(ss, {|cc| char::is_whitespace(cc)}),
+    ret vec::filter( split(ss, {|cc| char::is_whitespace(cc)}),
                      {|w| 0u < str::char_len(w)});
 }
 
@@ -794,25 +795,25 @@ fn chars_iter(s: str, it: fn(char)) {
 }
 
 /*
-Function: split_chars_iter
+Function: split_char_iter
 
 Apply a function to each substring after splitting
 by character
 */
-fn split_chars_iter(ss: str, cc: char, ff: fn(&&str)) {
+fn split_char_iter(ss: str, cc: char, ff: fn(&&str)) {
    vec::iter(split_char(ss, cc), ff)
 }
 
 /*
-Function: splitn_chars_iter
+Function: splitn_char_iter
 
 Apply a function to each substring after splitting
 by character, up to nn times
 
 FIXME: make this use chars when splitn/splitn_char is fixed
 */
-fn splitn_chars_iter(ss: str, sep: u8, count: uint, ff: fn(&&str)) {
-   vec::iter(splitn(ss, sep, count), ff)
+fn splitn_char_iter(ss: str, sep: u8, count: uint, ff: fn(&&str)) unsafe {
+   vec::iter(splitn_byte(ss, sep, count), ff)
 }
 
 /*
@@ -880,7 +881,7 @@ Returns:
 
 The index of the first occurance of `needle`, or -1 if not found.
 
-FIXME: UTF-8?
+FIXME: UTF-8
 */
 fn find(haystack: str, needle: str) -> int {
     let haystack_len: int = byte_len(haystack) as int;
@@ -960,12 +961,10 @@ Section: String properties
 Function: is_ascii
 
 Determines if a string contains only ASCII characters
-
-FIXME: possibly implement using char::is_ascii when it exists
 */
 fn is_ascii(s: str) -> bool {
     let i: uint = byte_len(s);
-    while i > 0u { i -= 1u; if s[i] & 128u8 != 0u8 { ret false; } }
+    while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { ret false; } }
     ret true;
 }
 
@@ -997,7 +996,7 @@ Function: byte_len
 
 Returns the length in bytes of a string
 
-FIXME: rename to 'len_bytes'?
+FIXME: rename to 'len_bytes'
 */
 pure fn byte_len(s: str) -> uint unsafe {
     let v: [u8] = ::unsafe::reinterpret_cast(s);
@@ -1013,7 +1012,7 @@ Function: char_len
 
 Count the number of unicode characters in a string
 
-FIXME: rename to 'len_chars'?
+FIXME: rename to 'len_chars'
 */
 fn char_len(s: str) -> uint {
     ret char_len_range(s, 0u, byte_len(s));
@@ -1315,7 +1314,6 @@ fn reserve(&ss: str, nn: uint) {
 // These functions may create invalid UTF-8 strings and eat your baby.
 mod unsafe {
    export
-      // UNSAFE
       from_bytes,
       from_byte,
       slice_bytes,
@@ -1339,7 +1337,7 @@ mod unsafe {
    unsafe fn from_byte(u: u8) -> str { unsafe::from_bytes([u]) }
 
    /*
-   Function: slice
+   Function: slice_bytes
 
    Takes a bytewise (not UTF-8) slice from a string.
    Returns the substring from [`begin`..`end`).
@@ -1374,7 +1372,6 @@ mod unsafe {
        assert (end <= byte_len(s));
        ret slice_bytes(s, begin, end);
    }
-
 }
 
 
@@ -1418,25 +1415,39 @@ mod tests {
     }
 
     #[test]
-    fn test_split() {
+    fn test_split_byte() {
         fn t(s: str, c: char, u: [str]) {
-            log(debug, "split: " + s);
-            let v = split(s, c as u8);
-            #debug("split to: ");
+            log(debug, "split_byte: " + s);
+            let v = split_byte(s, c as u8);
+            #debug("split_byte to: ");
             log(debug, v);
             assert (vec::all2(v, u, { |a,b| a == b }));
         }
         t("abc.hello.there", '.', ["abc", "hello", "there"]);
         t(".hello.there", '.', ["", "hello", "there"]);
         t("...hello.there.", '.', ["", "", "", "hello", "there", ""]);
+
+        assert ["", "", "", "hello", "there", ""]
+            == split_byte("...hello.there.", '.' as u8);
+
+        assert [""] == split_byte("", 'z' as u8);
+        assert ["",""] == split_byte("z", 'z' as u8);
+        assert ["ok"] == split_byte("ok", 'z' as u8);
     }
 
     #[test]
-    fn test_splitn() {
+    fn test_split_byte_2() {
+        let data = "ประเทศไทย中华Việt Nam";
+        assert ["ประเทศไทย中华", "iệt Nam"]
+            == split_byte(data, 'V' as u8);
+    }
+
+    #[test]
+    fn test_splitn_byte() {
         fn t(s: str, c: char, n: uint, u: [str]) {
-            log(debug, "splitn: " + s);
-            let v = splitn(s, c as u8, n);
-            #debug("split to: ");
+            log(debug, "splitn_byte: " + s);
+            let v = splitn_byte(s, c as u8, n);
+            #debug("split_byte to: ");
             log(debug, v);
             #debug("comparing vs. ");
             log(debug, u);
@@ -1450,6 +1461,20 @@ mod tests {
         t(".hello.there", '.', 1u, ["", "hello.there"]);
         t("...hello.there.", '.', 3u, ["", "", "", "hello.there."]);
         t("...hello.there.", '.', 5u, ["", "", "", "hello", "there", ""]);
+
+        assert [""] == splitn_byte("", 'z' as u8, 5u);
+        assert ["",""] == splitn_byte("z", 'z' as u8, 5u);
+        assert ["ok"] == splitn_byte("ok", 'z' as u8, 5u);
+        assert ["z"] == splitn_byte("z", 'z' as u8, 0u);
+        assert ["w.x.y"] == splitn_byte("w.x.y", '.' as u8, 0u);
+        assert ["w","x.y"] == splitn_byte("w.x.y", '.' as u8, 1u);
+    }
+
+    #[test]
+    fn test_splitn_byte_2() {
+        let data = "ประเทศไทย中华Việt Nam";
+        assert ["ประเทศไทย中华", "iệt Nam"]
+            == splitn_byte(data, 'V' as u8, 1u);
     }
 
     #[test]
@@ -1459,34 +1484,48 @@ mod tests {
             assert eq(v[i], k);
         }
 
-        //FIXME: should behave like split and split_char:
-        //assert ["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".");
-
         t("abc::hello::there", "::", 0, "abc");
         t("abc::hello::there", "::", 1, "hello");
         t("abc::hello::there", "::", 2, "there");
-        t("::hello::there", "::", 0, "hello");
+        t("::hello::there", "::", 0, "");
         t("hello::there::", "::", 2, "");
-        t("::hello::there::", "::", 2, "");
-        t("ประเทศไทย中华Việt Nam", "中华", 0, "ประเทศไทย");
-        t("ประเทศไทย中华Việt Nam", "中华", 1, "Việt Nam");
+        t("::hello::there::", "::", 3, "");
+
+        let data = "ประเทศไทย中华Việt Nam";
+        assert ["ประเทศไทย", "Việt Nam"]
+            == split_str (data, "中华");
+
+        assert ["", "XXX", "YYY", ""]
+            == split_str("zzXXXzzYYYzz", "zz");
+
+        assert ["zz", "zYYYz"]
+            == split_str("zzXXXzYYYz", "XXX");
+
+
+        assert ["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".");
+        assert [""] == split_str("", ".");
+        assert ["",""] == split_str("zz", "zz");
+        assert ["ok"] == split_str("ok", "z");
+        assert ["","z"] == split_str("zzz", "zz");
+        assert ["","","z"] == split_str("zzzzz", "zz");
     }
 
+
     #[test]
-    fn test_split_func () {
+    fn test_split () {
         let data = "ประเทศไทย中华Việt Nam";
         assert ["ประเทศไทย中", "Việt Nam"]
-            == split_func (data, {|cc| cc == '华'});
+            == split (data, {|cc| cc == '华'});
 
         assert ["", "", "XXX", "YYY", ""]
-            == split_func("zzXXXzYYYz", char::is_lowercase);
+            == split("zzXXXzYYYz", char::is_lowercase);
 
         assert ["zz", "", "", "z", "", "", "z"]
-            == split_func("zzXXXzYYYz", char::is_uppercase);
+            == split("zzXXXzYYYz", char::is_uppercase);
 
-        assert ["",""] == split_func("z", {|cc| cc == 'z'});
-        assert [""] == split_func("", {|cc| cc == 'z'});
-        assert ["ok"] == split_func("ok", {|cc| cc == 'z'});
+        assert ["",""] == split("z", {|cc| cc == 'z'});
+        assert [""] == split("", {|cc| cc == 'z'});
+        assert ["ok"] == split("ok", {|cc| cc == 'z'});
     }
 
     #[test]
@@ -1891,12 +1930,12 @@ mod tests {
     }
 
     #[test]
-    fn test_split_chars_iter() {
+    fn test_split_char_iter() {
         let data = "\nMary had a little lamb\nLittle lamb\n";
 
         let ii = 0;
 
-        split_chars_iter(data, ' ') {|xx|
+        split_char_iter(data, ' ') {|xx|
             alt ii {
               0 { assert "\nMary" == xx; }
               1 { assert "had"    == xx; }
@@ -1909,12 +1948,12 @@ mod tests {
     }
 
     #[test]
-    fn test_splitn_chars_iter() {
+    fn test_splitn_char_iter() {
         let data = "\nMary had a little lamb\nLittle lamb\n";
 
         let ii = 0;
 
-        splitn_chars_iter(data, ' ' as u8, 2u) {|xx|
+        splitn_char_iter(data, ' ' as u8, 2u) {|xx|
             alt ii {
               0 { assert "\nMary" == xx; }
               1 { assert "had"    == xx; }
diff --git a/src/libcore/u8.rs b/src/libcore/u8.rs
index b025751020b..399f5654367 100644
--- a/src/libcore/u8.rs
+++ b/src/libcore/u8.rs
@@ -49,6 +49,9 @@ pure fn ge(x: u8, y: u8) -> bool { ret x >= y; }
 /* Predicate: gt */
 pure fn gt(x: u8, y: u8) -> bool { ret x > y; }
 
+/* Predicate: is_ascii */
+pure fn is_ascii(x: u8) -> bool { ret 0u8 == x & 128u8; }
+
 /*
 Function: range
 
diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs
index 75bde25afb8..ef0ff7e6b57 100644
--- a/src/libstd/fs.rs
+++ b/src/libstd/fs.rs
@@ -252,12 +252,16 @@ the first element of the returned vector will be the drive letter
 followed by a colon.
 */
 fn split(p: path) -> [path] {
-    let split1 = str::split(p, os_fs::path_sep as u8);
+    // FIXME: use UTF-8 safe str, and/or various other string formats
+    let split1 = str::split_byte(p, os_fs::path_sep as u8);
     let split2 = [];
     for s in split1 {
-        split2 += str::split(s, os_fs::alt_path_sep as u8);
+        split2 += str::split_byte(s, os_fs::alt_path_sep as u8);
     }
-    ret split2;
+
+    // filter out ""
+    let split3 = vec::filter(split2, {|seg| "" != seg});
+    ret split3;
 }
 
 /*
@@ -270,9 +274,10 @@ path includes directory components then they are included in the filename part
 of the result pair.
 */
 fn splitext(p: path) -> (str, str) {
+    // FIXME: use UTF-8 safe str, and/or various other string formats
     if str::is_empty(p) { ("", "") }
     else {
-        let parts = str::split(p, '.' as u8);
+        let parts = str::split_byte(p, '.' as u8);
         if vec::len(parts) > 1u {
             let base = str::connect(vec::init(parts), ".");
             let ext = "." + option::get(vec::last(parts));
diff --git a/src/libstd/net.rs b/src/libstd/net.rs
index 706221d95c2..68f8f29748a 100644
--- a/src/libstd/net.rs
+++ b/src/libstd/net.rs
@@ -49,7 +49,8 @@ Failure:
 String must be a valid IPv4 address
 */
 fn parse_addr(ip: str) -> ip_addr {
-    let parts = vec::map(str::split(ip, "."[0]), {|s| uint::from_str(s) });
+    let parts = vec::map(str::split_byte(ip, "."[0]),
+                         {|s| uint::from_str(s) });
     if vec::len(parts) != 4u { fail "Too many dots in IP address"; }
     for i in parts { if i > 255u { fail "Invalid IP Address part."; } }
     ipv4(parts[0] as u8, parts[1] as u8, parts[2] as u8, parts[3] as u8)
diff --git a/src/test/bench/sudoku.rs b/src/test/bench/sudoku.rs
index 7a068eb51b2..61a083f1f53 100644
--- a/src/test/bench/sudoku.rs
+++ b/src/test/bench/sudoku.rs
@@ -33,8 +33,7 @@ fn read_grid(f: io::reader) -> grid_t {
 
     let g = vec::init_fn(10u, {|_i| vec::init_elt_mut(10u, 0 as u8) });
     while !f.eof() {
-        // FIXME: replace with unicode compliant call
-        let comps = str::split(str::trim(f.read_line()), ',' as u8);
+        let comps = str::split_byte(str::trim(f.read_line()), ',' as u8);
         if vec::len(comps) >= 3u {
             let row     = uint::from_str(comps[0]) as u8;
             let col     = uint::from_str(comps[1]) as u8;