about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorSteve Klabnik <steve@steveklabnik.com>2016-02-17 18:14:36 -0500
committerSteve Klabnik <steve@steveklabnik.com>2016-02-17 18:14:36 -0500
commit96c8a67a17e4158bd1781ca19ff00d3317050515 (patch)
treea843fba153737a83d1e424d6446553561da5ba9e /src/libstd
parent27ef9df824e2d5b0e22b48dfcfc19906e1ea35ca (diff)
parent071b4b6f7b162e92711677846dcfb1736f3b9440 (diff)
downloadrust-96c8a67a17e4158bd1781ca19ff00d3317050515.tar.gz
rust-96c8a67a17e4158bd1781ca19ff00d3317050515.zip
Rollup merge of #31695 - oconnor663:chardocs, r=alexcrichton
Previously the docs suggested that '❤️' doesn't fit in a char because
it's 6 bytes. But that's misleading. 'a̚' also doesn't fit in a char,
even though it's only 3 bytes. The important thing is the number of code
points, not the number of bytes. Clarify the primitive char docs around
this.
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/primitive_docs.rs32
1 files changed, 11 insertions, 21 deletions
diff --git a/src/libstd/primitive_docs.rs b/src/libstd/primitive_docs.rs
index ad93fe0094a..b840e51873e 100644
--- a/src/libstd/primitive_docs.rs
+++ b/src/libstd/primitive_docs.rs
@@ -50,18 +50,21 @@ mod prim_bool { }
 /// [`String`]: string/struct.String.html
 ///
 /// As always, remember that a human intuition for 'character' may not map to
-/// Unicode's definitions. For example, emoji symbols such as '❤️' are more than
-/// one byte; ❤️ in particular is six:
+/// Unicode's definitions. For example, emoji symbols such as '❤️' can be more
+/// than one Unicode code point; this ❤️ in particular is two:
 ///
 /// ```
 /// let s = String::from("❤️");
 ///
-/// // six bytes times one byte for each element
-/// assert_eq!(6, s.len() * std::mem::size_of::<u8>());
+/// // we get two chars out of a single ❤️
+/// let mut iter = s.chars();
+/// assert_eq!(Some('\u{2764}'), iter.next());
+/// assert_eq!(Some('\u{fe0f}'), iter.next());
+/// assert_eq!(None, iter.next());
 /// ```
 ///
-/// This also means it won't fit into a `char`, and so trying to create a
-/// literal with `let heart = '❤️';` gives an error:
+/// This means it won't fit into a `char`. Trying to create a literal with
+/// `let heart = '❤️';` gives an error:
 ///
 /// ```text
 /// error: character literal may only contain one codepoint: '❤
@@ -69,8 +72,8 @@ mod prim_bool { }
 ///             ^~
 /// ```
 ///
-/// Another implication of this is that if you want to do per-`char`acter
-/// processing, it can end up using a lot more memory:
+/// Another implication of the 4-byte fixed size of a `char`, is that
+/// per-`char`acter processing can end up using a lot more memory:
 ///
 /// ```
 /// let s = String::from("love: ❤️");
@@ -79,19 +82,6 @@ mod prim_bool { }
 /// assert_eq!(12, s.len() * std::mem::size_of::<u8>());
 /// assert_eq!(32, v.len() * std::mem::size_of::<char>());
 /// ```
-///
-/// Or may give you results you may not expect:
-///
-/// ```
-/// let s = String::from("❤️");
-///
-/// let mut iter = s.chars();
-///
-/// // we get two chars out of a single ❤️
-/// assert_eq!(Some('\u{2764}'), iter.next());
-/// assert_eq!(Some('\u{fe0f}'), iter.next());
-/// assert_eq!(None, iter.next());
-/// ```
 mod prim_char { }
 
 #[doc(primitive = "unit")]