about summary refs log tree commit diff
diff options
context:
space:
mode:
authorSimon Sapin <simon.sapin@exyr.org>2016-08-17 17:41:33 +0200
committerSimon Sapin <simon.sapin@exyr.org>2016-08-29 17:34:02 +0200
commit41d0a89e3ad99a9fdf700ea7d15750fe1cbfab14 (patch)
treeb837e9f2c8401f2a2ef300d2a2973e43231b69c3
parent86dde9bbda92c02c61282b5df5d38338a798ef3b (diff)
downloadrust-41d0a89e3ad99a9fdf700ea7d15750fe1cbfab14.tar.gz
rust-41d0a89e3ad99a9fdf700ea7d15750fe1cbfab14.zip
Implement From<char> for u32, and From<u8> for char
These fit with other From implementations between integer types.

This helps the coding style of avoiding the 'as' operator that sometimes
silently truncates, and signals that these specific conversions are
lossless and infaillible.
-rw-r--r--src/libcore/char.rs34
-rw-r--r--src/libcoretest/char.rs8
2 files changed, 42 insertions, 0 deletions
diff --git a/src/libcore/char.rs b/src/libcore/char.rs
index 4677f0b523f..47a8678d608 100644
--- a/src/libcore/char.rs
+++ b/src/libcore/char.rs
@@ -175,6 +175,40 @@ pub unsafe fn from_u32_unchecked(i: u32) -> char {
     transmute(i)
 }
 
+#[stable(feature = "char_convert", since = "1.13.0")]
+impl From<char> for u32 {
+    #[inline]
+    fn from(c: char) -> Self {
+        c as u32
+    }
+}
+
+/// Maps a byte in 0x00...0xFF to a `char` whose code point has the same value, in U+0000 to U+00FF.
+///
+/// Unicode is designed such that this effectively decodes bytes
+/// with the character encoding that IANA calls ISO-8859-1.
+/// This encoding is compatible with ASCII.
+///
+/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hypen),
+/// which leaves some "blanks", byte values that are not assigned to any character.
+/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
+///
+/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
+/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
+/// to punctuation and various Latin characters.
+///
+/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
+/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
+/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
+/// C0 and C1 control codes.
+#[stable(feature = "char_convert", since = "1.13.0")]
+impl From<u8> for char {
+    #[inline]
+    fn from(i: u8) -> Self {
+        i as char
+    }
+}
+
 /// Converts a digit in the given radix to a `char`.
 ///
 /// A 'radix' here is sometimes also called a 'base'. A radix of two
diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs
index 333503d7389..92a2b23d242 100644
--- a/src/libcoretest/char.rs
+++ b/src/libcoretest/char.rs
@@ -11,6 +11,14 @@
 use std::char;
 
 #[test]
+fn test_convert() {
+    assert_eq!(u32::from('a'), 0x61);
+    assert_eq!(char::from(b'\0'), '\0');
+    assert_eq!(char::from(b'a'), 'a');
+    assert_eq!(char::from(b'\xFF'), '\u{FF}');
+}
+
+#[test]
 fn test_is_lowercase() {
     assert!('a'.is_lowercase());
     assert!('รถ'.is_lowercase());