about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2014-11-21 14:21:48 +0000
committerbors <bors@rust-lang.org>2014-11-21 14:21:48 +0000
commit9efa23e9c0d98d47612eb6cf40fdf405f9c98b65 (patch)
tree7bb764af5e6dacf3173643be5c4461ab98dc5b8c
parent47c1d437c93872e6776570beac839f2587782f94 (diff)
parentdff48a99d6bf9d2c4828f187bfa2c9ba382fe791 (diff)
downloadrust-9efa23e9c0d98d47612eb6cf40fdf405f9c98b65.tar.gz
rust-9efa23e9c0d98d47612eb6cf40fdf405f9c98b65.zip
auto merge of #19042 : SimonSapin/rust/generic-utf16-encoder, r=alexcrichton
This allows encoding to UTF-16 something that is not in UTF-8, e.g. a `[char]` UTF-32 string.

This might help with servo/servo#4023
-rw-r--r--src/libcollections/str.rs3
-rw-r--r--src/libcore/str.rs28
-rw-r--r--src/libcoretest/str.rs7
3 files changed, 34 insertions, 4 deletions
diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
index a7df5f4644a..9c93669b5ac 100644
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@@ -74,7 +74,8 @@ use vec::Vec;
 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
 pub use core::str::{Bytes, CharSplits};
 pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
-pub use core::str::{Utf16CodeUnits, eq_slice, is_utf8, is_utf16, Utf16Items};
+pub use core::str::{Utf16Encoder, Utf16CodeUnits};
+pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
 pub use core::str::{FromStr, from_str};
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 24f26b15f27..68e490ecb19 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -762,12 +762,34 @@ impl<'a> Iterator<&'a str> for StrSplits<'a> {
 /// Use with the `std::iter` module.
 #[deriving(Clone)]
 pub struct Utf16CodeUnits<'a> {
-    chars: Chars<'a>,
-    extra: u16
+    encoder: Utf16Encoder<Chars<'a>>
 }
 
 impl<'a> Iterator<u16> for Utf16CodeUnits<'a> {
     #[inline]
+    fn next(&mut self) -> Option<u16> { self.encoder.next() }
+
+    #[inline]
+    fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
+}
+
+
+/// Iterator adaptor for encoding `char`s to UTF-16.
+#[deriving(Clone)]
+pub struct Utf16Encoder<I> {
+    chars: I,
+    extra: u16
+}
+
+impl<I> Utf16Encoder<I> {
+    /// Create an UTF-16 encoder from any `char` iterator.
+    pub fn new(chars: I) -> Utf16Encoder<I> where I: Iterator<char> {
+        Utf16Encoder { chars: chars, extra: 0 }
+    }
+}
+
+impl<I> Iterator<u16> for Utf16Encoder<I> where I: Iterator<char> {
+    #[inline]
     fn next(&mut self) -> Option<u16> {
         if self.extra != 0 {
             let tmp = self.extra;
@@ -2225,7 +2247,7 @@ impl StrPrelude for str {
 
     #[inline]
     fn utf16_units(&self) -> Utf16CodeUnits {
-        Utf16CodeUnits{ chars: self.chars(), extra: 0}
+        Utf16CodeUnits { encoder: Utf16Encoder::new(self.chars()) }
     }
 
     #[inline]
diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs
index 5f44fd807cc..9574aeb3762 100644
--- a/src/libcoretest/str.rs
+++ b/src/libcoretest/str.rs
@@ -114,3 +114,10 @@ fn test_rev_split_char_iterator_no_trailing() {
     split.reverse();
     assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
 }
+
+#[test]
+fn test_utf16_code_units() {
+    use core::str::Utf16Encoder;
+    assert_eq!(Utf16Encoder::new(vec!['é', '\U0001F4A9'].into_iter()).collect::<Vec<u16>>(),
+               vec![0xE9, 0xD83D, 0xDCA9])
+}