diff options
| author | Ralf Jung <post@ralfj.de> | 2020-05-29 21:58:32 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-05-29 21:58:32 +0200 |
| commit | b965196ce0bb0b484336f1f2ba3a6a72cbfb4f76 (patch) | |
| tree | bb77385894d39b2154c8c6732e95c82d6f17900d | |
| parent | de561a9d3d8d06969abc4f6851fd532f584ff52c (diff) | |
| parent | cd6a8cae2aa07bd456a1816196e3c9aa2fcb72d6 (diff) | |
| download | rust-b965196ce0bb0b484336f1f2ba3a6a72cbfb4f76.tar.gz rust-b965196ce0bb0b484336f1f2ba3a6a72cbfb4f76.zip | |
Rollup merge of #72413 - CAD97:char-range, r=dtolnay
impl Step for char (make Range*<char> iterable) [[irlo thread]](https://internals.rust-lang.org/t/mini-rfc-make-range-char-work/12392?u=cad97) [[godbolt asm example]](https://rust.godbolt.org/z/fdveKo) Add an implementation of the `Step` trait for `char`, which has the effect of making `RangeInclusive<char>` (and the other range types) iterable. I've used the surrogate range magic numbers as magic numbers here rather than e.g. a `const SURROGATE_RANGE = 0xD800..0xE000` because these numbers appear to be used as magic numbers elsewhere and there doesn't exist constants for them yet. These files definitely aren't where surrogate range constants should live. `ExactSizeIterator` is not implemented because `0x10FFFF` is bigger than fits in a `usize == u16`. However, given we already provide some `ExactSizeIterator` that are not correct on 16 bit targets, we might still want to consider providing it for `Range`[`Inclusive`]`<char>`, as it is definitely _very_ convenient. (At the very least, we want to make sure `.count()` doesn't bother iterating the range.) The second commit in this PR changes a call to `Step::forward` to use `Step::forward_unchecked` in `RangeInclusive::next`. This is because without this patch, iteration over all codepoints (`'\0'..=char::MAX`) does not successfully optimize out the panicking branch. This was mentioned in the PR that updated `Step` to its current design, but was deemed not yet necessary as it did not impact codegen for integral types. More of `Range*`'s implementations' calls to `Step` methods will probably want to see if they can use the `_unchecked` version as (if) we open up `Step` to being implemented on more types. --- cc @rust-lang/libs, this is insta-stable and a fairly significant addition to `Range*`'s capabilities; this is the first instance of a noncontinuous domain being iterable with `Range` (or, well, anything other than primitive integers). I don't think this needs a full RFC, but it should definitely get some decent eyes on it.
| -rw-r--r-- | src/libcore/iter/range.rs | 74 | ||||
| -rw-r--r-- | src/libcore/tests/iter.rs | 12 |
2 files changed, 85 insertions, 1 deletions
diff --git a/src/libcore/iter/range.rs b/src/libcore/iter/range.rs index 388a5548a31..57e3e8084dd 100644 --- a/src/libcore/iter/range.rs +++ b/src/libcore/iter/range.rs @@ -1,3 +1,4 @@ +use crate::char; use crate::convert::TryFrom; use crate::mem; use crate::ops::{self, Add, Sub, Try}; @@ -400,6 +401,73 @@ step_integer_impls! { wider than usize: [u32 i32], [u64 i64], [u128 i128]; } +#[unstable(feature = "step_trait", reason = "recently redesigned", issue = "42168")] +unsafe impl Step for char { + #[inline] + fn steps_between(&start: &char, &end: &char) -> Option<usize> { + let start = start as u32; + let end = end as u32; + if start <= end { + let count = end - start; + if start < 0xD800 && 0xE000 <= end { + usize::try_from(count - 0x800).ok() + } else { + usize::try_from(count).ok() + } + } else { + None + } + } + + #[inline] + fn forward_checked(start: char, count: usize) -> Option<char> { + let start = start as u32; + let mut res = Step::forward_checked(start, count)?; + if start < 0xD800 && 0xD800 <= res { + res = Step::forward_checked(res, 0x800)?; + } + if res <= char::MAX as u32 { + // SAFETY: res is a valid unicode scalar + // (below 0x110000 and not in 0xD800..0xE000) + Some(unsafe { char::from_u32_unchecked(res) }) + } else { + None + } + } + + #[inline] + fn backward_checked(start: char, count: usize) -> Option<char> { + let start = start as u32; + let mut res = Step::backward_checked(start, count)?; + if start >= 0xE000 && 0xE000 > res { + res = Step::backward_checked(res, 0x800)?; + } + // SAFETY: res is a valid unicode scalar + // (below 0x110000 and not in 0xD800..0xE000) + Some(unsafe { char::from_u32_unchecked(res) }) + } + + #[inline] + unsafe fn forward_unchecked(start: char, count: usize) -> char { + let start = start as u32; + let mut res = Step::forward_unchecked(start, count); + if start < 0xD800 && 0xD800 <= res { + res = Step::forward_unchecked(res, 0x800); + } + char::from_u32_unchecked(res) + } + + #[inline] + unsafe fn backward_unchecked(start: char, count: usize) -> char { + let start = start as u32; + let mut res = Step::backward_unchecked(start, count); + if start >= 0xE000 && 0xE000 > res { + res = Step::backward_unchecked(res, 0x800); + } + char::from_u32_unchecked(res) + } +} + macro_rules! range_exact_iter_impl { ($($t:ty)*) => ($( #[stable(feature = "rust1", since = "1.0.0")] @@ -582,7 +650,11 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> { } let is_iterating = self.start < self.end; Some(if is_iterating { - let n = Step::forward(self.start.clone(), 1); + // SAFETY: just checked precondition + // We use the unchecked version here, because + // otherwise `for _ in '\0'..=char::MAX` + // does not successfully remove panicking code. + let n = unsafe { Step::forward_unchecked(self.start.clone(), 1) }; mem::replace(&mut self.start, n) } else { self.exhausted = true; diff --git a/src/libcore/tests/iter.rs b/src/libcore/tests/iter.rs index 52cf068f0a5..c5d636ac8da 100644 --- a/src/libcore/tests/iter.rs +++ b/src/libcore/tests/iter.rs @@ -1933,6 +1933,18 @@ fn test_range() { } #[test] +fn test_char_range() { + use std::char; + assert!(('\0'..=char::MAX).eq((0..=char::MAX as u32).filter_map(char::from_u32))); + assert!(('\0'..=char::MAX).rev().eq((0..=char::MAX as u32).filter_map(char::from_u32).rev())); + + assert_eq!(('\u{D7FF}'..='\u{E000}').count(), 2); + assert_eq!(('\u{D7FF}'..='\u{E000}').size_hint(), (2, Some(2))); + assert_eq!(('\u{D7FF}'..'\u{E000}').count(), 1); + assert_eq!(('\u{D7FF}'..'\u{E000}').size_hint(), (1, Some(1))); +} + +#[test] fn test_range_exhaustion() { let mut r = 10..10; assert!(r.is_empty()); |
