about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMarvin Löbel <loebel.marvin@gmail.com>2013-09-09 01:45:14 +0200
committerMarvin Löbel <loebel.marvin@gmail.com>2013-09-09 15:07:22 +0200
commit3a3934244c53cfe3024431cec934b2e2901d50d0 (patch)
treed6982996f9bf6dbba425fd38fa3e8fdd1ec3c65e
parentfd49f6dce11033496a87d08d66114a86b2d85d59 (diff)
downloadrust-3a3934244c53cfe3024431cec934b2e2901d50d0.tar.gz
rust-3a3934244c53cfe3024431cec934b2e2901d50d0.zip
Some work on std::ascii: Marked unsafe function unsafe, added moving implementations
-rw-r--r--src/libstd/str/ascii.rs97
1 files changed, 77 insertions, 20 deletions
diff --git a/src/libstd/str/ascii.rs b/src/libstd/str/ascii.rs
index 2dd93feef05..ec2d7566177 100644
--- a/src/libstd/str/ascii.rs
+++ b/src/libstd/str/ascii.rs
@@ -16,9 +16,8 @@ use str::StrSlice;
 use str::OwnedStr;
 use container::Container;
 use cast;
-use ptr;
 use iter::Iterator;
-use vec::{CopyableVector, ImmutableVector};
+use vec::{CopyableVector, ImmutableVector, MutableVector};
 use to_bytes::IterBytes;
 use option::{Some, None};
 
@@ -61,7 +60,7 @@ impl Ascii {
 impl ToStr for Ascii {
     #[inline]
     fn to_str(&self) -> ~str {
-        // self.chr is allways a valid utf8 byte, no need for the check
+        // self.chr is always a valid utf8 byte, no need for the check
         unsafe { str::raw::from_byte(self.chr) }
     }
 }
@@ -253,16 +252,29 @@ impl ToBytesConsume for ~[Ascii] {
     }
 }
 
-/// Extension methods for ASCII-subset only operations on strings
-pub trait StrAsciiExt {
+/// Extension methods for ASCII-subset only operations on owned strings
+pub trait OwnedStrAsciiExt {
     /// Convert the string to ASCII upper case:
     /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
     /// but non-ASCII letters are unchanged.
-    fn to_ascii_upper(&self) -> ~str;
+    fn into_ascii_upper(self) -> ~str;
 
     /// Convert the string to ASCII lower case:
     /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
     /// but non-ASCII letters are unchanged.
+    fn into_ascii_lower(self) -> ~str;
+}
+
+/// Extension methods for ASCII-subset only operations on string slices
+pub trait StrAsciiExt {
+    /// Makes a copy of the string in ASCII upper case:
+    /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z',
+    /// but non-ASCII letters are unchanged.
+    fn to_ascii_upper(&self) -> ~str;
+
+    /// Makes a copy of the string in ASCII lower case:
+    /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z',
+    /// but non-ASCII letters are unchanged.
     fn to_ascii_lower(&self) -> ~str;
 
     /// Check that two strings are an ASCII case-insensitive match.
@@ -274,12 +286,12 @@ pub trait StrAsciiExt {
 impl<'self> StrAsciiExt for &'self str {
     #[inline]
     fn to_ascii_upper(&self) -> ~str {
-        map_bytes(*self, ASCII_UPPER_MAP)
+        unsafe { str_copy_map_bytes(*self, ASCII_UPPER_MAP) }
     }
 
     #[inline]
     fn to_ascii_lower(&self) -> ~str {
-        map_bytes(*self, ASCII_LOWER_MAP)
+        unsafe { str_copy_map_bytes(*self, ASCII_LOWER_MAP) }
     }
 
     #[inline]
@@ -289,20 +301,34 @@ impl<'self> StrAsciiExt for &'self str {
     }
 }
 
+impl OwnedStrAsciiExt for ~str {
+    #[inline]
+    fn into_ascii_upper(self) -> ~str {
+        unsafe { str_map_bytes(self, ASCII_UPPER_MAP) }
+    }
+
+    #[inline]
+    fn into_ascii_lower(self) -> ~str {
+        unsafe { str_map_bytes(self, ASCII_LOWER_MAP) }
+    }
+}
+
 #[inline]
-fn map_bytes(string: &str, map: &'static [u8]) -> ~str {
-    let len = string.len();
-    let mut result = str::with_capacity(len);
-    unsafe {
-        do result.as_mut_buf |mut buf, _| {
-            for c in string.as_bytes().iter() {
-                *buf = map[*c];
-                buf = ptr::mut_offset(buf, 1)
-            }
-        }
-        str::raw::set_len(&mut result, len);
+unsafe fn str_map_bytes(string: ~str, map: &'static [u8]) -> ~str {
+    let mut bytes = string.into_bytes();
+
+    for b in bytes.mut_iter() {
+        *b = map[*b];
     }
-    result
+
+    str::raw::from_utf8_owned(bytes)
+}
+
+#[inline]
+unsafe fn str_copy_map_bytes(string: &str, map: &'static [u8]) -> ~str {
+    let bytes = string.byte_iter().map(|b| map[b]).to_owned_vec();
+
+    str::raw::from_utf8_owned(bytes)
 }
 
 static ASCII_LOWER_MAP: &'static [u8] = &[
@@ -495,6 +521,37 @@ mod tests {
     }
 
     #[test]
+    fn test_into_ascii_upper() {
+        assert_eq!((~"url()URL()uRl()ürl").into_ascii_upper(), ~"URL()URL()URL()üRL");
+        assert_eq!((~"hıKß").into_ascii_upper(), ~"HıKß");
+
+        let mut i = 0;
+        while i <= 500 {
+            let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
+                        else { i };
+            assert_eq!(from_char(from_u32(i).unwrap()).into_ascii_upper(),
+                       from_char(from_u32(upper).unwrap()))
+            i += 1;
+        }
+    }
+
+    #[test]
+    fn test_into_ascii_lower() {
+        assert_eq!((~"url()URL()uRl()Ürl").into_ascii_lower(), ~"url()url()url()Ürl");
+        // Dotted capital I, Kelvin sign, Sharp S.
+        assert_eq!((~"HİKß").into_ascii_lower(), ~"hİKß");
+
+        let mut i = 0;
+        while i <= 500 {
+            let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
+                        else { i };
+            assert_eq!(from_char(from_u32(i).unwrap()).into_ascii_lower(),
+                       from_char(from_u32(lower).unwrap()))
+            i += 1;
+        }
+    }
+
+    #[test]
     fn test_eq_ignore_ascii_case() {
         assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
         assert!(!"Ürl".eq_ignore_ascii_case("ürl"));