about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRalf Jung <post@ralfj.de>2020-05-29 21:58:32 +0200
committerGitHub <noreply@github.com>2020-05-29 21:58:32 +0200
commitb965196ce0bb0b484336f1f2ba3a6a72cbfb4f76 (patch)
treebb77385894d39b2154c8c6732e95c82d6f17900d
parentde561a9d3d8d06969abc4f6851fd532f584ff52c (diff)
parentcd6a8cae2aa07bd456a1816196e3c9aa2fcb72d6 (diff)
downloadrust-b965196ce0bb0b484336f1f2ba3a6a72cbfb4f76.tar.gz
rust-b965196ce0bb0b484336f1f2ba3a6a72cbfb4f76.zip
Rollup merge of #72413 - CAD97:char-range, r=dtolnay
impl Step for char (make Range*<char> iterable)

[[irlo thread]](https://internals.rust-lang.org/t/mini-rfc-make-range-char-work/12392?u=cad97) [[godbolt asm example]](https://rust.godbolt.org/z/fdveKo)

Add an implementation of the `Step` trait for `char`, which has the effect of making `RangeInclusive<char>` (and the other range types) iterable.

I've used the surrogate range magic numbers as magic numbers here rather than e.g. a `const SURROGATE_RANGE = 0xD800..0xE000` because these numbers appear to be used as magic numbers elsewhere and there doesn't exist constants for them yet. These files definitely aren't where surrogate range constants should live.

`ExactSizeIterator` is not implemented because `0x10FFFF` is bigger than fits in a `usize == u16`. However, given we already provide some `ExactSizeIterator` that are not correct on 16 bit targets, we might still want to consider providing it for `Range`[`Inclusive`]`<char>`, as it is definitely _very_ convenient. (At the very least, we want to make sure `.count()` doesn't bother iterating the range.)

The second commit in this PR changes a call to `Step::forward` to use `Step::forward_unchecked` in `RangeInclusive::next`. This is because without this patch, iteration over all codepoints (`'\0'..=char::MAX`) does not successfully optimize out the panicking branch. This was mentioned in the PR that updated `Step` to its current design, but was deemed not yet necessary as it did not impact codegen for integral types.

More of `Range*`'s implementations' calls to `Step` methods will probably want to see if they can use the `_unchecked` version as (if) we open up `Step` to being implemented on more types.

---

cc @rust-lang/libs, this is insta-stable and a fairly significant addition to `Range*`'s capabilities; this is the first instance of a noncontinuous domain being iterable with `Range` (or, well, anything other than primitive integers). I don't think this needs a full RFC, but it should definitely get some decent eyes on it.
-rw-r--r--src/libcore/iter/range.rs74
-rw-r--r--src/libcore/tests/iter.rs12
2 files changed, 85 insertions, 1 deletions
diff --git a/src/libcore/iter/range.rs b/src/libcore/iter/range.rs
index 388a5548a31..57e3e8084dd 100644
--- a/src/libcore/iter/range.rs
+++ b/src/libcore/iter/range.rs
@@ -1,3 +1,4 @@
+use crate::char;
 use crate::convert::TryFrom;
 use crate::mem;
 use crate::ops::{self, Add, Sub, Try};
@@ -400,6 +401,73 @@ step_integer_impls! {
     wider than usize: [u32 i32], [u64 i64], [u128 i128];
 }
 
+#[unstable(feature = "step_trait", reason = "recently redesigned", issue = "42168")]
+unsafe impl Step for char {
+    #[inline]
+    fn steps_between(&start: &char, &end: &char) -> Option<usize> {
+        let start = start as u32;
+        let end = end as u32;
+        if start <= end {
+            let count = end - start;
+            if start < 0xD800 && 0xE000 <= end {
+                usize::try_from(count - 0x800).ok()
+            } else {
+                usize::try_from(count).ok()
+            }
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn forward_checked(start: char, count: usize) -> Option<char> {
+        let start = start as u32;
+        let mut res = Step::forward_checked(start, count)?;
+        if start < 0xD800 && 0xD800 <= res {
+            res = Step::forward_checked(res, 0x800)?;
+        }
+        if res <= char::MAX as u32 {
+            // SAFETY: res is a valid unicode scalar
+            // (below 0x110000 and not in 0xD800..0xE000)
+            Some(unsafe { char::from_u32_unchecked(res) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn backward_checked(start: char, count: usize) -> Option<char> {
+        let start = start as u32;
+        let mut res = Step::backward_checked(start, count)?;
+        if start >= 0xE000 && 0xE000 > res {
+            res = Step::backward_checked(res, 0x800)?;
+        }
+        // SAFETY: res is a valid unicode scalar
+        // (below 0x110000 and not in 0xD800..0xE000)
+        Some(unsafe { char::from_u32_unchecked(res) })
+    }
+
+    #[inline]
+    unsafe fn forward_unchecked(start: char, count: usize) -> char {
+        let start = start as u32;
+        let mut res = Step::forward_unchecked(start, count);
+        if start < 0xD800 && 0xD800 <= res {
+            res = Step::forward_unchecked(res, 0x800);
+        }
+        char::from_u32_unchecked(res)
+    }
+
+    #[inline]
+    unsafe fn backward_unchecked(start: char, count: usize) -> char {
+        let start = start as u32;
+        let mut res = Step::backward_unchecked(start, count);
+        if start >= 0xE000 && 0xE000 > res {
+            res = Step::backward_unchecked(res, 0x800);
+        }
+        char::from_u32_unchecked(res)
+    }
+}
+
 macro_rules! range_exact_iter_impl {
     ($($t:ty)*) => ($(
         #[stable(feature = "rust1", since = "1.0.0")]
@@ -582,7 +650,11 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
         }
         let is_iterating = self.start < self.end;
         Some(if is_iterating {
-            let n = Step::forward(self.start.clone(), 1);
+            // SAFETY: just checked precondition
+            // We use the unchecked version here, because
+            // otherwise `for _ in '\0'..=char::MAX`
+            // does not successfully remove panicking code.
+            let n = unsafe { Step::forward_unchecked(self.start.clone(), 1) };
             mem::replace(&mut self.start, n)
         } else {
             self.exhausted = true;
diff --git a/src/libcore/tests/iter.rs b/src/libcore/tests/iter.rs
index 52cf068f0a5..c5d636ac8da 100644
--- a/src/libcore/tests/iter.rs
+++ b/src/libcore/tests/iter.rs
@@ -1933,6 +1933,18 @@ fn test_range() {
 }
 
 #[test]
+fn test_char_range() {
+    use std::char;
+    assert!(('\0'..=char::MAX).eq((0..=char::MAX as u32).filter_map(char::from_u32)));
+    assert!(('\0'..=char::MAX).rev().eq((0..=char::MAX as u32).filter_map(char::from_u32).rev()));
+
+    assert_eq!(('\u{D7FF}'..='\u{E000}').count(), 2);
+    assert_eq!(('\u{D7FF}'..='\u{E000}').size_hint(), (2, Some(2)));
+    assert_eq!(('\u{D7FF}'..'\u{E000}').count(), 1);
+    assert_eq!(('\u{D7FF}'..'\u{E000}').size_hint(), (1, Some(1)));
+}
+
+#[test]
 fn test_range_exhaustion() {
     let mut r = 10..10;
     assert!(r.is_empty());