Auto merge of #35755 - SimonSapin:char_convert, r=alexcrichton

Implement std::convert traits for char This is motivated by avoiding the `as` operator, which sometimes silently truncates, and instead use conversions that are explicitly lossless and infallible. I’m less certain that `From<u8> for char` should be implemented: while it matches an existing behavior of `as`, it’s not necessarily the right thing to use for non-ASCII bytes. It effectively decodes bytes as ISO/IEC 8859-1 (since Unicode designed its first 256 code points to be compatible with that encoding), but that is not apparent in the API name.
author: bors <bors@rust-lang.org> 2016-09-01 02:53:28 -0700
committer: GitHub <noreply@github.com> 2016-09-01 02:53:28 -0700
commit: b2799a56a1631ee351e4687ee649808e28cccda1 (patch)
tree: ee8fd765766cc38f38e8dd02782412c1d2ee189d /src/libcore
parent: 3135b7877a66e72ac4833588eb8654aa155876e8 (diff)
parent: f040208d533e1d6d9ee0e0408ee74e26e14d1284 (diff)
download: rust-b2799a56a1631ee351e4687ee649808e28cccda1.tar.gz
rust-b2799a56a1631ee351e4687ee649808e28cccda1.zip
1 files changed, 63 insertions, 6 deletions
diff --git a/src/libcore/char.rs b/src/libcore/char.rs
index 4677f0b523f..ad492c81bd3 100644
--- a/src/libcore/char.rs
+++ b/src/libcore/char.rs
@@ -16,6 +16,8 @@
 #![stable(feature = "core_char", since = "1.2.0")]
 
 use char_private::is_printable;
+use convert::TryFrom;
+use fmt;
 use iter::FusedIterator;
 use mem::transmute;
 
@@ -122,12 +124,7 @@ pub const MAX: char = '\u{10ffff}';
 #[inline]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub fn from_u32(i: u32) -> Option<char> {
-    // catch out-of-bounds and surrogates
-    if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
-        None
-    } else {
-        Some(unsafe { from_u32_unchecked(i) })
-    }
+    char::try_from(i).ok()
 }
 
 /// Converts a `u32` to a `char`, ignoring validity.
@@ -175,6 +172,66 @@ pub unsafe fn from_u32_unchecked(i: u32) -> char {
     transmute(i)
 }
 
+#[stable(feature = "char_convert", since = "1.13.0")]
+impl From<char> for u32 {
+    #[inline]
+    fn from(c: char) -> Self {
+        c as u32
+    }
+}
+
+/// Maps a byte in 0x00...0xFF to a `char` whose code point has the same value, in U+0000 to U+00FF.
+///
+/// Unicode is designed such that this effectively decodes bytes
+/// with the character encoding that IANA calls ISO-8859-1.
+/// This encoding is compatible with ASCII.
+///
+/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hypen),
+/// which leaves some "blanks", byte values that are not assigned to any character.
+/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
+///
+/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
+/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
+/// to punctuation and various Latin characters.
+///
+/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
+/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
+/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
+/// C0 and C1 control codes.
+#[stable(feature = "char_convert", since = "1.13.0")]
+impl From<u8> for char {
+    #[inline]
+    fn from(i: u8) -> Self {
+        i as char
+    }
+}
+
+#[unstable(feature = "try_from", issue = "33417")]
+impl TryFrom<u32> for char {
+    type Err = CharTryFromError;
+
+    #[inline]
+    fn try_from(i: u32) -> Result<Self, Self::Err> {
+        if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
+            Err(CharTryFromError(()))
+        } else {
+            Ok(unsafe { from_u32_unchecked(i) })
+        }
+    }
+}
+
+/// The error type returned when a conversion from u32 to char fails.
+#[unstable(feature = "try_from", issue = "33417")]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub struct CharTryFromError(());
+
+#[unstable(feature = "try_from", issue = "33417")]
+impl fmt::Display for CharTryFromError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        "converted integer out of range for `char`".fmt(f)
+    }
+}
+
 /// Converts a digit in the given radix to a `char`.
 ///
 /// A 'radix' here is sometimes also called a 'base'. A radix of two
author	bors <bors@rust-lang.org>	2016-09-01 02:53:28 -0700
committer	GitHub <noreply@github.com>	2016-09-01 02:53:28 -0700
commit	b2799a56a1631ee351e4687ee649808e28cccda1 (patch)
tree	ee8fd765766cc38f38e8dd02782412c1d2ee189d /src/libcore
parent	3135b7877a66e72ac4833588eb8654aa155876e8 (diff)
parent	f040208d533e1d6d9ee0e0408ee74e26e14d1284 (diff)
download	rust-b2799a56a1631ee351e4687ee649808e28cccda1.tar.gz rust-b2799a56a1631ee351e4687ee649808e28cccda1.zip