about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorAlex Crichton <alex@alexcrichton.com>2015-04-14 10:55:41 -0700
committerAlex Crichton <alex@alexcrichton.com>2015-04-14 10:55:41 -0700
commit34603b0c19f134f446dc3180a02212b7839f316a (patch)
treeb5455b3f215a58c4c8bda114cec79a5b44e26949 /src
parent88a145ea36967f8afb920af4e096113271432fc5 (diff)
parentf329030b095aa30ce29be0c3459615d85506747b (diff)
downloadrust-34603b0c19f134f446dc3180a02212b7839f316a.tar.gz
rust-34603b0c19f134f446dc3180a02212b7839f316a.zip
rollup merge of #24310: alexcrichton/stabilize-utf8-error
The meaning of each variant of this enum was somewhat ambiguous and it's uncler
that we wouldn't even want to add more enumeration values in the future. As a
result this error has been altered to instead become an opaque structure.
Learning about the "first invalid byte index" is still an unstable feature, but
the type itself is now stable.
Diffstat (limited to 'src')
-rw-r--r--src/libcollections/lib.rs1
-rw-r--r--src/libcollections/string.rs16
-rw-r--r--src/libcollectionstest/str.rs2
-rw-r--r--src/libcollectionstest/string.rs1
-rw-r--r--src/libcore/str/mod.rs39
-rw-r--r--src/libstd/error.rs5
6 files changed, 26 insertions, 38 deletions
diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs
index 7658611d809..5179b04f882 100644
--- a/src/libcollections/lib.rs
+++ b/src/libcollections/lib.rs
@@ -40,6 +40,7 @@
 #![feature(str_char)]
 #![feature(slice_patterns)]
 #![feature(debug_builders)]
+#![feature(utf8_error)]
 #![cfg_attr(test, feature(rand, rustc_private, test, hash, collections))]
 #![cfg_attr(test, allow(deprecated))] // rand
 
diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs
index 441d0f2c5df..9c9f2d628b8 100644
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@@ -132,7 +132,7 @@ impl String {
     ///
     /// let invalid_vec = vec![240, 144, 128];
     /// let s = String::from_utf8(invalid_vec).err().unwrap();
-    /// assert_eq!(s.utf8_error(), Utf8Error::TooShort);
+    /// let err = s.utf8_error();
     /// assert_eq!(s.into_bytes(), [240, 144, 128]);
     /// ```
     #[inline]
@@ -156,14 +156,10 @@ impl String {
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
     pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> Cow<'a, str> {
-        let mut i = 0;
+        let mut i;
         match str::from_utf8(v) {
             Ok(s) => return Cow::Borrowed(s),
-            Err(e) => {
-                if let Utf8Error::InvalidByte(firstbad) = e {
-                    i = firstbad;
-                }
-            }
+            Err(e) => i = e.valid_up_to(),
         }
 
         const TAG_CONT_U8: u8 = 128;
@@ -188,9 +184,9 @@ impl String {
             };
         }
 
-        // subseqidx is the index of the first byte of the subsequence we're looking at.
-        // It's used to copy a bunch of contiguous good codepoints at once instead of copying
-        // them one by one.
+        // subseqidx is the index of the first byte of the subsequence we're
+        // looking at.  It's used to copy a bunch of contiguous good codepoints
+        // at once instead of copying them one by one.
         let mut subseqidx = i;
 
         while i < total {
diff --git a/src/libcollectionstest/str.rs b/src/libcollectionstest/str.rs
index 15f15900e78..cacafab4e3c 100644
--- a/src/libcollectionstest/str.rs
+++ b/src/libcollectionstest/str.rs
@@ -1502,7 +1502,7 @@ fn test_str_from_utf8() {
     assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
 
     let xs = b"hello\xFF";
-    assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
+    assert!(from_utf8(xs).is_err());
 }
 
 #[test]
diff --git a/src/libcollectionstest/string.rs b/src/libcollectionstest/string.rs
index 5d6aa8ac0dc..3184f842e9a 100644
--- a/src/libcollectionstest/string.rs
+++ b/src/libcollectionstest/string.rs
@@ -45,7 +45,6 @@ fn test_from_utf8() {
 
     let xs = b"hello\xFF".to_vec();
     let err = String::from_utf8(xs).err().unwrap();
-    assert_eq!(err.utf8_error(), Utf8Error::TooShort);
     assert_eq!(err.into_bytes(), b"hello\xff".to_vec());
 }
 
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs
index 9bc760b56ec..fc623f21167 100644
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -106,19 +106,19 @@ Section: Creating a string
 
 /// Errors which can occur when attempting to interpret a byte slice as a `str`.
 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
-#[unstable(feature = "core",
-           reason = "error enumeration recently added and definitions may be refined")]
-pub enum Utf8Error {
-    /// An invalid byte was detected at the byte offset given.
-    ///
-    /// The offset is guaranteed to be in bounds of the slice in question, and
-    /// the byte at the specified offset was the first invalid byte in the
-    /// sequence detected.
-    InvalidByte(usize),
+#[stable(feature = "rust1", since = "1.0.0")]
+pub struct Utf8Error {
+    valid_up_to: usize,
+}
 
-    /// The byte slice was invalid because more bytes were needed but no more
-    /// bytes were available.
-    TooShort,
+impl Utf8Error {
+    /// Returns the index in the given string up to which valid UTF-8 was
+    /// verified.
+    ///
+    /// Starting at the index provided, but not necessarily at it precisely, an
+    /// invalid UTF-8 encoding sequence was found.
+    #[unstable(feature = "utf8_error", reason = "method just added")]
+    pub fn valid_up_to(&self) -> usize { self.valid_up_to }
 }
 
 /// Converts a slice of bytes to a string slice without performing any
@@ -147,14 +147,7 @@ pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl fmt::Display for Utf8Error {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Utf8Error::InvalidByte(n) => {
-                write!(f, "invalid utf-8: invalid byte at index {}", n)
-            }
-            Utf8Error::TooShort => {
-                write!(f, "invalid utf-8: byte slice too short")
-            }
-        }
+        write!(f, "invalid utf-8: invalid byte near index {}", self.valid_up_to)
     }
 }
 
@@ -1218,14 +1211,16 @@ fn run_utf8_validation_iterator(iter: &mut slice::Iter<u8>)
         // restore the iterator we had at the start of this codepoint.
         macro_rules! err { () => {{
             *iter = old.clone();
-            return Err(Utf8Error::InvalidByte(whole.len() - iter.as_slice().len()))
+            return Err(Utf8Error {
+                valid_up_to: whole.len() - iter.as_slice().len()
+            })
         }}}
 
         macro_rules! next { () => {
             match iter.next() {
                 Some(a) => *a,
                 // we needed data, but there was none: error!
-                None => return Err(Utf8Error::TooShort),
+                None => err!(),
             }
         }}
 
diff --git a/src/libstd/error.rs b/src/libstd/error.rs
index c9babeb3230..96087bf1183 100644
--- a/src/libstd/error.rs
+++ b/src/libstd/error.rs
@@ -122,10 +122,7 @@ impl Error for str::ParseBoolError {
 #[stable(feature = "rust1", since = "1.0.0")]
 impl Error for str::Utf8Error {
     fn description(&self) -> &str {
-        match *self {
-            str::Utf8Error::TooShort => "invalid utf-8: not enough bytes",
-            str::Utf8Error::InvalidByte(..) => "invalid utf-8: corrupt contents",
-        }
+        "invalid utf-8: corrupt contents"
     }
 }