diff options
| author | Maybe Waffle <waffle.lapkin@gmail.com> | 2022-01-27 00:25:17 +0300 |
|---|---|---|
| committer | Maybe Waffle <waffle.lapkin@gmail.com> | 2022-01-27 00:30:33 +0300 |
| commit | cd4245d318b04c8b44aed7e682c49b0507086d6c (patch) | |
| tree | 8b306ca96c0a2b56866d1c2913136e4c22a6da87 | |
| parent | a7f375789bab1a4e4a291c963081a8ca7d2b6bd7 (diff) | |
| download | rust-cd4245d318b04c8b44aed7e682c49b0507086d6c.tar.gz rust-cd4245d318b04c8b44aed7e682c49b0507086d6c.zip | |
Make char::DecodeUtf16::size_hist more precise
New implementation takes into account contents of `self.buf` and rounds lower bound up instead of down.
| -rw-r--r-- | library/core/src/char/decode.rs | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/library/core/src/char/decode.rs b/library/core/src/char/decode.rs index 5dd8c5ef789..f3fef85ef1d 100644 --- a/library/core/src/char/decode.rs +++ b/library/core/src/char/decode.rs @@ -120,9 +120,21 @@ impl<I: Iterator<Item = u16>> Iterator for DecodeUtf16<I> { #[inline] fn size_hint(&self) -> (usize, Option<usize>) { let (low, high) = self.iter.size_hint(); - // we could be entirely valid surrogates (2 elements per - // char), or entirely non-surrogates (1 element per char) - (low / 2, high) + + // `self.buf` will never contain the first part of a surrogate, + // so the presence of `buf == Some(...)` always means +1 + // on lower and upper bound. + let addition_from_buf = self.buf.is_some() as usize; + + // `self.iter` could contain entirely valid surrogates (2 elements per + // char), or entirely non-surrogates (1 element per char). + // + // On odd lower bound, at least one element must stay unpaired + // (with other elements from `self.iter`), so we round up. + let low = low.div_ceil(2) + addition_from_buf; + let high = high.and_then(|h| h.checked_add(addition_from_buf)); + + (low, high) } } |
