diff options
| author | dan@daramos.com <dan@daramos.com> | 2015-01-01 01:22:43 -0500 |
|---|---|---|
| committer | dan@daramos.com <dan@daramos.com> | 2015-01-01 01:22:43 -0500 |
| commit | 8aeefbbfdd1c25b63bf0c7aa4b7bf56047d4a65a (patch) | |
| tree | 343fce9462d2d11b804d1efa593afc0516a8c72a /src/libcollections | |
| parent | 7d4f4876d65bddf101784230c0347adcb01e5c21 (diff) | |
| download | rust-8aeefbbfdd1c25b63bf0c7aa4b7bf56047d4a65a.tar.gz rust-8aeefbbfdd1c25b63bf0c7aa4b7bf56047d4a65a.zip | |
Reimplement a minor optimization in String::from_utf8_lossy that avoids having to loop the slice from the begining.
Diffstat (limited to 'src/libcollections')
| -rw-r--r-- | src/libcollections/string.rs | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index f703ff99660..e3f0860d613 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -141,14 +141,18 @@ impl String { /// ``` #[stable] pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> { + let mut i = 0; match str::from_utf8(v) { Ok(s) => return Cow::Borrowed(s), - Err(..) => {} + Err(e) => { + if let Utf8Error::InvalidByte(firstbad) = e { + i = firstbad; + } + } } static TAG_CONT_U8: u8 = 128u8; static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8 - let mut i = 0; let total = v.len(); fn unsafe_get(xs: &[u8], i: uint) -> u8 { unsafe { *xs.get_unchecked(i) } @@ -172,7 +176,7 @@ impl String { // subseqidx is the index of the first byte of the subsequence we're looking at. // It's used to copy a bunch of contiguous good codepoints at once instead of copying // them one by one. - let mut subseqidx = 0; + let mut subseqidx = i; while i < total { let i_ = i; |
