about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2014-06-30 19:26:35 +0000
committerbors <bors@rust-lang.org>2014-06-30 19:26:35 +0000
commita345c543344b8ff4f3aeecb816856101443bf907 (patch)
tree3b17729e65b86e277270cf316892f7e65443018d
parent94343da1bdf4de84d0ece90d920400697ad7e143 (diff)
parent3d84b4be3d7691026993d5c733bc26cc637e7c50 (diff)
downloadrust-a345c543344b8ff4f3aeecb816856101443bf907.tar.gz
rust-a345c543344b8ff4f3aeecb816856101443bf907.zip
auto merge of #14613 : schmee/rust/utf16-iterator, r=huonw
Closes #14358.

~~The tests are not yet moved to `utf16_iter`, so this probably won't compile. I'm submitting this PR anyway so it can be reviewed and since it was mentioned in #14611.~~ EDIT: Tests now use `utf16_iter`.

This deprecates `.to_utf16`. `x.to_utf16()` should be replaced by either `x.utf16_iter().collect::<Vec<u16>>()` (the type annotation may be optional), or just `x.utf16_iter()` directly, if it can be used in an iterator context.

[breaking-change]

cc @huonw
-rw-r--r--src/libcollections/str.rs21
-rw-r--r--src/libcore/str.rs46
-rw-r--r--src/libnative/io/c_win32.rs4
-rw-r--r--src/libnative/io/file_win32.rs2
-rw-r--r--src/libnative/io/process.rs14
-rw-r--r--src/librustdoc/flock.rs3
-rw-r--r--src/libstd/dynamic_lib.rs7
-rw-r--r--src/libstd/os.rs14
8 files changed, 84 insertions, 27 deletions
diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
index b5424d1683f..10309adef20 100644
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@@ -803,15 +803,9 @@ pub trait StrAllocating: Str {
     }
 
     /// Converts to a vector of `u16` encoded as UTF-16.
+    #[deprecated = "use `utf16_units` instead"]
     fn to_utf16(&self) -> Vec<u16> {
-        let me = self.as_slice();
-        let mut u = Vec::new();
-        for ch in me.chars() {
-            let mut buf = [0u16, ..2];
-            let n = ch.encode_utf16(buf /* as mut slice! */);
-            u.push_all(buf.slice_to(n));
-        }
-        u
+        self.as_slice().utf16_units().collect::<Vec<u16>>()
     }
 
     /// Given a string, make a new string with repeated copies of it.
@@ -1619,14 +1613,17 @@ mod tests {
 
         for p in pairs.iter() {
             let (s, u) = (*p).clone();
+            let s_as_utf16 = s.as_slice().utf16_units().collect::<Vec<u16>>();
+            let u_as_string = from_utf16(u.as_slice()).unwrap();
+
             assert!(is_utf16(u.as_slice()));
-            assert_eq!(s.to_utf16(), u);
+            assert_eq!(s_as_utf16, u);
 
-            assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
+            assert_eq!(u_as_string, s);
             assert_eq!(from_utf16_lossy(u.as_slice()), s);
 
-            assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
-            assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
+            assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
+            assert_eq!(u_as_string.as_slice().utf16_units().collect::<Vec<u16>>(), u);
         }
     }
 
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index de23e04393b..b336c57efa0 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -16,6 +16,7 @@
 
 use mem;
 use char;
+use char::Char;
 use clone::Clone;
 use cmp;
 use cmp::{PartialEq, Eq};
@@ -24,7 +25,7 @@ use default::Default;
 use iter::{Filter, Map, Iterator};
 use iter::{DoubleEndedIterator, ExactSize};
 use iter::range;
-use num::Saturating;
+use num::{CheckedMul, Saturating};
 use option::{None, Option, Some};
 use raw::Repr;
 use slice::ImmutableVector;
@@ -557,6 +558,41 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
     }
 }
 
+/// External iterator for a string's UTF16 codeunits.
+/// Use with the `std::iter` module.
+#[deriving(Clone)]
+pub struct Utf16CodeUnits<'a> {
+    chars: Chars<'a>,
+    extra: u16
+}
+
+impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
+    #[inline]
+    fn next(&mut self) -> Option<u16> {
+        if self.extra != 0 {
+            let tmp = self.extra;
+            self.extra = 0;
+            return Some(tmp);
+        }
+
+        let mut buf = [0u16, ..2];
+        self.chars.next().map(|ch| {
+            let n = ch.encode_utf16(buf /* as mut slice! */);
+            if n == 2 { self.extra = buf[1]; }
+            buf[0]
+        })
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (uint, Option<uint>) {
+        let (low, high) = self.chars.size_hint();
+        // every char gets either one u16 or two u16,
+        // so this iterator is between 1 or 2 times as
+        // long as the underlying iterator.
+        (low, high.and_then(|n| n.checked_mul(&2)))
+    }
+}
+
 /*
 Section: Comparing strings
 */
@@ -1609,6 +1645,9 @@ pub trait StrSlice<'a> {
     /// and that it is not reallocated (e.g. by pushing to the
     /// string).
     fn as_ptr(&self) -> *const u8;
+
+    /// Return an iterator of `u16` over the string encoded as UTF-16.
+    fn utf16_units(&self) -> Utf16CodeUnits<'a>;
 }
 
 impl<'a> StrSlice<'a> for &'a str {
@@ -1957,6 +1996,11 @@ impl<'a> StrSlice<'a> for &'a str {
     fn as_ptr(&self) -> *const u8 {
         self.repr().data
     }
+
+    #[inline]
+    fn utf16_units(&self) -> Utf16CodeUnits<'a> {
+        Utf16CodeUnits{ chars: self.chars(), extra: 0}
+    }
 }
 
 impl<'a> Default for &'a str {
diff --git a/src/libnative/io/c_win32.rs b/src/libnative/io/c_win32.rs
index 7e58102d241..802526c9196 100644
--- a/src/libnative/io/c_win32.rs
+++ b/src/libnative/io/c_win32.rs
@@ -70,6 +70,7 @@ extern "system" {
 
 pub mod compat {
     use std::intrinsics::{atomic_store_relaxed, transmute};
+    use std::iter::Iterator;
     use libc::types::os::arch::extra::{LPCWSTR, HMODULE, LPCSTR, LPVOID};
 
     extern "system" {
@@ -82,7 +83,8 @@ pub mod compat {
     // layer (after it's loaded) shouldn't be any slower than a regular DLL
     // call.
     unsafe fn store_func(ptr: *mut uint, module: &str, symbol: &str, fallback: uint) {
-        let module = module.to_utf16().append_one(0);
+        let module: Vec<u16> = module.utf16_units().collect();
+        let module = module.append_one(0);
         symbol.with_c_str(|symbol| {
             let handle = GetModuleHandleW(module.as_ptr());
             let func: uint = transmute(GetProcAddress(handle, symbol));
diff --git a/src/libnative/io/file_win32.rs b/src/libnative/io/file_win32.rs
index 3195fa4f2d4..98553603313 100644
--- a/src/libnative/io/file_win32.rs
+++ b/src/libnative/io/file_win32.rs
@@ -255,7 +255,7 @@ impl Drop for Inner {
 
 pub fn to_utf16(s: &CString) -> IoResult<Vec<u16>> {
     match s.as_str() {
-        Some(s) => Ok(s.to_utf16().append_one(0)),
+        Some(s) => Ok(s.utf16_units().collect::<Vec<u16>>().append_one(0)),
         None => Err(IoError {
             code: libc::ERROR_INVALID_NAME as uint,
             extra: 0,
diff --git a/src/libnative/io/process.rs b/src/libnative/io/process.rs
index b1c0d9a1506..6fab73115cf 100644
--- a/src/libnative/io/process.rs
+++ b/src/libnative/io/process.rs
@@ -294,6 +294,8 @@ fn spawn_process_os(cfg: ProcessConfig,
     use libc::funcs::extra::msvcrt::get_osfhandle;
 
     use std::mem;
+    use std::iter::Iterator;
+    use std::str::StrSlice;
 
     if cfg.gid.is_some() || cfg.uid.is_some() {
         return Err(IoError {
@@ -328,7 +330,8 @@ fn spawn_process_os(cfg: ProcessConfig,
                         lpSecurityDescriptor: ptr::mut_null(),
                         bInheritHandle: 1,
                     };
-                    let filename = "NUL".to_utf16().append_one(0);
+                    let filename: Vec<u16> = "NUL".utf16_units().collect();
+                    let filename = filename.append_one(0);
                     *slot = libc::CreateFileW(filename.as_ptr(),
                                               access,
                                               libc::FILE_SHARE_READ |
@@ -371,7 +374,8 @@ fn spawn_process_os(cfg: ProcessConfig,
 
         with_envp(cfg.env, |envp| {
             with_dirp(cfg.cwd, |dirp| {
-                let mut cmd_str = cmd_str.to_utf16().append_one(0);
+                let mut cmd_str: Vec<u16> = cmd_str.as_slice().utf16_units().collect();
+                cmd_str = cmd_str.append_one(0);
                 let created = CreateProcessW(ptr::null(),
                                              cmd_str.as_mut_ptr(),
                                              ptr::mut_null(),
@@ -770,7 +774,7 @@ fn with_envp<T>(env: Option<&[(CString, CString)]>, cb: |*mut c_void| -> T) -> T
                 let kv = format!("{}={}",
                                  pair.ref0().as_str().unwrap(),
                                  pair.ref1().as_str().unwrap());
-                blk.push_all(kv.to_utf16().as_slice());
+                blk.extend(kv.as_slice().utf16_units());
                 blk.push(0);
             }
 
@@ -788,7 +792,9 @@ fn with_dirp<T>(d: Option<&CString>, cb: |*const u16| -> T) -> T {
       Some(dir) => {
           let dir_str = dir.as_str()
                            .expect("expected workingdirectory to be utf-8 encoded");
-          let dir_str = dir_str.to_utf16().append_one(0);
+          let dir_str: Vec<u16> = dir_str.utf16_units().collect();
+          let dir_str = dir_str.append_one(0);
+
           cb(dir_str.as_ptr())
       },
       None => cb(ptr::null())
diff --git a/src/librustdoc/flock.rs b/src/librustdoc/flock.rs
index cb2ebd15b39..f07c0163676 100644
--- a/src/librustdoc/flock.rs
+++ b/src/librustdoc/flock.rs
@@ -162,7 +162,8 @@ mod imp {
 
     impl Lock {
         pub fn new(p: &Path) -> Lock {
-            let p_16 = p.as_str().unwrap().to_utf16().append_one(0);
+            let p_16: Vec<u16> = p.as_str().unwrap().utf16_units().collect();
+            let p_16 = p_16.append_one(0);
             let handle = unsafe {
                 libc::CreateFileW(p_16.as_ptr(),
                                   libc::FILE_GENERIC_READ |
diff --git a/src/libstd/dynamic_lib.rs b/src/libstd/dynamic_lib.rs
index ec2cc67a60a..728875ce260 100644
--- a/src/libstd/dynamic_lib.rs
+++ b/src/libstd/dynamic_lib.rs
@@ -281,19 +281,22 @@ pub mod dl {
 #[cfg(target_os = "win32")]
 pub mod dl {
     use c_str::ToCStr;
+    use iter::Iterator;
     use libc;
     use os;
     use ptr;
     use result::{Ok, Err, Result};
-    use str::StrAllocating;
+    use str::StrSlice;
     use str;
     use string::String;
+    use vec::Vec;
 
     pub unsafe fn open_external<T: ToCStr>(filename: T) -> *mut u8 {
         // Windows expects Unicode data
         let filename_cstr = filename.to_c_str();
         let filename_str = str::from_utf8(filename_cstr.as_bytes_no_nul()).unwrap();
-        let filename_str = filename_str.to_utf16().append_one(0);
+        let filename_str: Vec<u16> = filename_str.utf16_units().collect();
+        let filename_str = filename_str.append_one(0);
         LoadLibraryW(filename_str.as_ptr() as *const libc::c_void) as *mut u8
     }
 
diff --git a/src/libstd/os.rs b/src/libstd/os.rs
index e0ed8cf667b..5201a811791 100644
--- a/src/libstd/os.rs
+++ b/src/libstd/os.rs
@@ -365,7 +365,8 @@ pub fn getenv(n: &str) -> Option<String> {
     unsafe {
         with_env_lock(|| {
             use os::win32::{fill_utf16_buf_and_decode};
-            let n = n.to_utf16().append_one(0);
+            let n: Vec<u16> = n.utf16_units().collect();
+            let n = n.append_one(0);
             fill_utf16_buf_and_decode(|buf, sz| {
                 libc::GetEnvironmentVariableW(n.as_ptr(), buf, sz)
             })
@@ -411,8 +412,10 @@ pub fn setenv(n: &str, v: &str) {
 
     #[cfg(windows)]
     fn _setenv(n: &str, v: &str) {
-        let n = n.to_utf16().append_one(0);
-        let v = v.to_utf16().append_one(0);
+        let n: Vec<u16> = n.utf16_units().collect();
+        let n = n.append_one(0);
+        let v: Vec<u16> = v.utf16_units().collect();
+        let v = v.append_one(0);
         unsafe {
             with_env_lock(|| {
                 libc::SetEnvironmentVariableW(n.as_ptr(), v.as_ptr());
@@ -437,7 +440,8 @@ pub fn unsetenv(n: &str) {
 
     #[cfg(windows)]
     fn _unsetenv(n: &str) {
-        let n = n.to_utf16().append_one(0);
+        let n: Vec<u16> = n.utf16_units().collect();
+        let n = n.append_one(0);
         unsafe {
             with_env_lock(|| {
                 libc::SetEnvironmentVariableW(n.as_ptr(), ptr::null());
@@ -804,7 +808,7 @@ pub fn change_dir(p: &Path) -> bool {
     #[cfg(windows)]
     fn chdir(p: &Path) -> bool {
         let p = match p.as_str() {
-            Some(s) => s.to_utf16().append_one(0),
+            Some(s) => s.utf16_units().collect::<Vec<u16>>().append_one(0),
             None => return false,
         };
         unsafe {