about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/libcollections/lib.rs1
-rw-r--r--src/libcollections/string.rs16
-rw-r--r--src/libcollectionstest/str.rs2
-rw-r--r--src/libcollectionstest/string.rs1
-rw-r--r--src/libcore/str/mod.rs39
-rw-r--r--src/libstd/error.rs5
6 files changed, 26 insertions, 38 deletions
diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs
index 7658611d809..5179b04f882 100644
--- a/src/libcollections/lib.rs
+++ b/src/libcollections/lib.rs
@@ -40,6 +40,7 @@
 #![feature(str_char)]
 #![feature(slice_patterns)]
 #![feature(debug_builders)]
+#![feature(utf8_error)]
 #![cfg_attr(test, feature(rand, rustc_private, test, hash, collections))]
 #![cfg_attr(test, allow(deprecated))] // rand
 
diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs
index 441d0f2c5df..9c9f2d628b8 100644
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@@ -132,7 +132,7 @@ impl String {
     ///
     /// let invalid_vec = vec![240, 144, 128];
     /// let s = String::from_utf8(invalid_vec).err().unwrap();
-    /// assert_eq!(s.utf8_error(), Utf8Error::TooShort);
+    /// let err = s.utf8_error();
     /// assert_eq!(s.into_bytes(), [240, 144, 128]);
     /// ```
     #[inline]
@@ -156,14 +156,10 @@ impl String {
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
-        let mut i = 0;
+        let mut i;
         match str::from_utf8(v) {
             Ok(s) => return Cow::Borrowed(s),
-            Err(e) => {
-                if let Utf8Error::InvalidByte(firstbad) = e {
-                    i = firstbad;
-                }
-            }
+            Err(e) => i = e.valid_up_to(),
         }
 
         const TAG_CONT_U8: u8 = 128;
@@ -188,9 +184,9 @@ impl String {
             };
         }
 
-        // subseqidx is the index of the first byte of the subsequence we're looking at.
-        // It's used to copy a bunch of contiguous good codepoints at once instead of copying
-        // them one by one.
+        // subseqidx is the index of the first byte of the subsequence we're
+        // looking at.  It's used to copy a bunch of contiguous good codepoints
+        // at once instead of copying them one by one.
         let mut subseqidx = i;
 
         while i < total {
diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs
index 15f15900e78..cacafab4e3c 100644
--- a/src/libcollectionstest/str.rs
+++ b/src/libcollectionstest/str.rs
@@ -1502,7 +1502,7 @@ fn test_str_from_utf8() {
     assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
 
     let xs = b"hello\xFF";
-    assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
+    assert!(from_utf8(xs).is_err());
 }
 
 #[test]
diff --git a/src/libcollectionstest/string.rs b/src/libcollectionstest/string.rs
index 5d6aa8ac0dc..3184f842e9a 100644
--- a/src/libcollectionstest/string.rs
+++ b/src/libcollectionstest/string.rs
@@ -45,7 +45,6 @@ fn test_from_utf8() {
 
     let xs = b"hello\xFF".to_vec();
     let err = String::from_utf8(xs).err().unwrap();
-    assert_eq!(err.utf8_error(), Utf8Error::TooShort);
     assert_eq!(err.into_bytes(), b"hello\xff".to_vec());
 }
 
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs
index 9bc760b56ec..fc623f21167 100644
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -106,19 +106,19 @@ Section: Creating a string
 
 /// Errors which can occur when attempting to interpret a byte slice as a `str`.
 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
-#[unstable(feature = "core",
-           reason = "error enumeration recently added and definitions may be refined")]
-pub enum Utf8Error {
-    /// An invalid byte was detected at the byte offset given.
-    ///
-    /// The offset is guaranteed to be in bounds of the slice in question, and
-    /// the byte at the specified offset was the first invalid byte in the
-    /// sequence detected.
-    InvalidByte(usize),
+#[stable(feature = "rust1", since = "1.0.0")]
+pub struct Utf8Error {
+    valid_up_to: usize,
+}
 
-    /// The byte slice was invalid because more bytes were needed but no more
-    /// bytes were available.
-    TooShort,
+impl Utf8Error {
+    /// Returns the index in the given string up to which valid UTF-8 was
+    /// verified.
+    ///
+    /// Starting at the index provided, but not necessarily at it precisely, an
+    /// invalid UTF-8 encoding sequence was found.
+    #[unstable(feature = "utf8_error", reason = "method just added")]
+    pub fn valid_up_to(&self) -> usize { self.valid_up_to }
 }
 
 /// Converts a slice of bytes to a string slice without performing any
@@ -147,14 +147,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl fmt::Display for Utf8Error {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Utf8Error::InvalidByte(n) => {
-                write!(f, "invalid utf-8: invalid byte at index {}", n)
-            }
-            Utf8Error::TooShort => {
-                write!(f, "invalid utf-8: byte slice too short")
-            }
-        }
+        write!(f, "invalid utf-8: invalid byte near index {}", self.valid_up_to)
     }
 }
 
@@ -1218,14 +1211,16 @@ fn run_utf8_validation_iterator(iter: &mut slice::Iter<u8>)
         // restore the iterator we had at the start of this codepoint.
         macro_rules! err { () => {{
             *iter = old.clone();
-            return Err(Utf8Error::InvalidByte(whole.len() - iter.as_slice().len()))
+            return Err(Utf8Error {
+                valid_up_to: whole.len() - iter.as_slice().len()
+            })
         }}}
 
         macro_rules! next { () => {
             match iter.next() {
                 Some(a) => *a,
                 // we needed data, but there was none: error!
-                None => return Err(Utf8Error::TooShort),
+                None => err!(),
             }
         }}
 
diff --git a/src/libstd/error.rs b/src/libstd/error.rs
index c9babeb3230..96087bf1183 100644
--- a/src/libstd/error.rs
+++ b/src/libstd/error.rs
@@ -122,10 +122,7 @@ impl Error for str::ParseBoolError {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl Error for str::Utf8Error {
     fn description(&self) -> &str {
-        match *self {
-            str::Utf8Error::TooShort => "invalid utf-8: not enough bytes",
-            str::Utf8Error::InvalidByte(..) => "invalid utf-8: corrupt contents",
-        }
+        "invalid utf-8: corrupt contents"
     }
 }