about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorCorey Farwell <coreyf@rwell.org>2017-08-17 00:36:30 -0400
committerCorey Farwell <coreyf@rwell.org>2017-08-17 00:36:30 -0400
commit1065ad418e9693a8bbd4592237f858bc862d2482 (patch)
treefe09363e8ab872dddfae36d016fe6385511bafd2 /src/libstd
parentc774c9591959edc9aa098e5ff7fc4be2e4a12c09 (diff)
downloadrust-1065ad418e9693a8bbd4592237f858bc862d2482.tar.gz
rust-1065ad418e9693a8bbd4592237f858bc862d2482.zip
Minor rewrite of char primitive unicode intro.
Opened primarily to address #36998.
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/primitive_docs.rs31
1 files changed, 18 insertions, 13 deletions
diff --git a/src/libstd/primitive_docs.rs b/src/libstd/primitive_docs.rs
index c52899db437..6746754ebc3 100644
--- a/src/libstd/primitive_docs.rs
+++ b/src/libstd/primitive_docs.rs
@@ -103,26 +103,31 @@ mod prim_bool { }
 /// [`String`]: string/struct.String.html
 ///
 /// As always, remember that a human intuition for 'character' may not map to
-/// Unicode's definitions. For example, emoji symbols such as '❤️' can be more
-/// than one Unicode code point; this ❤️ in particular is two:
+/// Unicode's definitions. For example, despite looking similar, the 'é'
+/// character is one Unicode code point while 'é' is two Unicode code points:
 ///
 /// ```
-/// let s = String::from("❤️");
+/// let mut chars = "é".chars();
+/// // U+00e9: 'latin small letter e with acute'
+/// assert_eq!(Some('\u{00e9}'), chars.next());
+/// assert_eq!(None, chars.next());
 ///
-/// // we get two chars out of a single ❤️
-/// let mut iter = s.chars();
-/// assert_eq!(Some('\u{2764}'), iter.next());
-/// assert_eq!(Some('\u{fe0f}'), iter.next());
-/// assert_eq!(None, iter.next());
+/// let mut chars = "é".chars();
+/// // U+0065: 'latin small letter e'
+/// assert_eq!(Some('\u{0065}'), chars.next());
+/// // U+0301: 'combining acute accent'
+/// assert_eq!(Some('\u{0301}'), chars.next());
+/// assert_eq!(None, chars.next());
 /// ```
 ///
-/// This means it won't fit into a `char`. Trying to create a literal with
-/// `let heart = '❤️';` gives an error:
+/// This means that the contents of the first string above _will_ fit into a
+/// `char` while the contents of the second string _will not_. Trying to create
+/// a `char` literal with the contents of the second string gives an error:
 ///
 /// ```text
-/// error: character literal may only contain one codepoint: '❤
-/// let heart = '❤️';
-///             ^~
+/// error: character literal may only contain one codepoint: 'é'
+/// let c = 'é';
+///         ^^^^
 /// ```
 ///
 /// Another implication of the 4-byte fixed size of a `char` is that