about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlex Crichton <alex@alexcrichton.com>2015-01-02 09:22:42 -0800
committerAlex Crichton <alex@alexcrichton.com>2015-01-02 09:22:42 -0800
commitc5b9ffdee62c7d6b031f3198ceb675f0cdb5745d (patch)
tree6d7252d9d115bab64d7f27400ba430860caca8ef
parente80b9811a6b0327fa0ec5b288a97135e414699d4 (diff)
parent8aeefbbfdd1c25b63bf0c7aa4b7bf56047d4a65a (diff)
downloadrust-c5b9ffdee62c7d6b031f3198ceb675f0cdb5745d.tar.gz
rust-c5b9ffdee62c7d6b031f3198ceb675f0cdb5745d.zip
rollup merge of #20391: daramos/utf8_lossy
Prior to 9bae6ec828fdc7f87838ee008cccef90e31b9f84 from_utf8_lossy had a minor optimization in place that avoided having to loop from the beginning of the input slice.
Recently 4908017d59da8694b9ceaf743baf1163c1e19086 implemented Utf8Error::InvalidByte which makes this possible again.
-rw-r--r--src/libcollections/string.rs10
1 files changed, 7 insertions, 3 deletions
diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs
index 900f23bb9d1..f631af6f642 100644
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@@ -143,14 +143,18 @@ impl String {
     /// ```
     #[stable]
     pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> CowString<'a> {
+        let mut i = 0;
         match str::from_utf8(v) {
             Ok(s) => return Cow::Borrowed(s),
-            Err(..) => {}
+            Err(e) => {
+                if let Utf8Error::InvalidByte(firstbad) = e {
+                    i = firstbad;
+                }
+            }
         }
 
         static TAG_CONT_U8: u8 = 128u8;
         static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
-        let mut i = 0;
         let total = v.len();
         fn unsafe_get(xs: &[u8], i: uint) -> u8 {
             unsafe { *xs.get_unchecked(i) }
@@ -174,7 +178,7 @@ impl String {
         // subseqidx is the index of the first byte of the subsequence we're looking at.
         // It's used to copy a bunch of contiguous good codepoints at once instead of copying
         // them one by one.
-        let mut subseqidx = 0;
+        let mut subseqidx = i;
 
         while i < total {
             let i_ = i;