diff options
| author | Alex Crichton <alex@alexcrichton.com> | 2015-01-02 09:22:42 -0800 |
|---|---|---|
| committer | Alex Crichton <alex@alexcrichton.com> | 2015-01-02 09:22:42 -0800 |
| commit | c5b9ffdee62c7d6b031f3198ceb675f0cdb5745d (patch) | |
| tree | 6d7252d9d115bab64d7f27400ba430860caca8ef | |
| parent | e80b9811a6b0327fa0ec5b288a97135e414699d4 (diff) | |
| parent | 8aeefbbfdd1c25b63bf0c7aa4b7bf56047d4a65a (diff) | |
| download | rust-c5b9ffdee62c7d6b031f3198ceb675f0cdb5745d.tar.gz rust-c5b9ffdee62c7d6b031f3198ceb675f0cdb5745d.zip | |
rollup merge of #20391: daramos/utf8_lossy
Prior to 9bae6ec828fdc7f87838ee008cccef90e31b9f84 from_utf8_lossy had a minor optimization in place that avoided having to loop from the beginning of the input slice. Recently 4908017d59da8694b9ceaf743baf1163c1e19086 implemented Utf8Error::InvalidByte which makes this possible again.
| -rw-r--r-- | src/libcollections/string.rs | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 900f23bb9d1..f631af6f642 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -143,14 +143,18 @@ impl String { /// ``` #[stable] pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> { + let mut i = 0; match str::from_utf8(v) { Ok(s) => return Cow::Borrowed(s), - Err(..) => {} + Err(e) => { + if let Utf8Error::InvalidByte(firstbad) = e { + i = firstbad; + } + } } static TAG_CONT_U8: u8 = 128u8; static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8 - let mut i = 0; let total = v.len(); fn unsafe_get(xs: &[u8], i: uint) -> u8 { unsafe { *xs.get_unchecked(i) } @@ -174,7 +178,7 @@ impl String { // subseqidx is the index of the first byte of the subsequence we're looking at. // It's used to copy a bunch of contiguous good codepoints at once instead of copying // them one by one. - let mut subseqidx = 0; + let mut subseqidx = i; while i < total { let i_ = i; |
