From f87d4a15a82a76e7510629173c366d084f2c02ca Mon Sep 17 00:00:00 2001
From: Simon Sapin <simon.sapin@exyr.org>
Date: Thu, 5 Apr 2018 15:55:28 +0200
Subject: Move Utf8Lossy decoder to libcore

---
 src/libstd_unicode/Cargo.toml     |   4 -
 src/libstd_unicode/lib.rs         |   1 -
 src/libstd_unicode/lossy.rs       | 213 --------------------------------------
 src/libstd_unicode/tests/lib.rs   |  15 ---
 src/libstd_unicode/tests/lossy.rs |  91 ----------------
 5 files changed, 324 deletions(-)
 delete mode 100644 src/libstd_unicode/lossy.rs
 delete mode 100644 src/libstd_unicode/tests/lib.rs
 delete mode 100644 src/libstd_unicode/tests/lossy.rs

(limited to 'src/libstd_unicode')
diff --git a/src/libstd_unicode/Cargo.toml b/src/libstd_unicode/Cargo.toml
index 283070a0e2c..b1c55c2e4b6 100644
--- a/src/libstd_unicode/Cargo.toml
+++ b/src/libstd_unicode/Cargo.toml
@@ -9,10 +9,6 @@ path = "lib.rs"
 test = false
 bench = false
 
-[[test]]
-name = "std_unicode_tests"
-path = "tests/lib.rs"
-
 [dependencies]
 core = { path = "../libcore" }
 compiler_builtins = { path = "../rustc/compiler_builtins_shim" }
diff --git a/src/libstd_unicode/lib.rs b/src/libstd_unicode/lib.rs
index cf8c101a2f9..106a2c0f0c5 100644
--- a/src/libstd_unicode/lib.rs
+++ b/src/libstd_unicode/lib.rs
@@ -45,7 +45,6 @@ mod tables;
 mod u_str;
 mod version;
 pub mod char;
-pub mod lossy;
 
 #[allow(deprecated)]
 pub mod str {
diff --git a/src/libstd_unicode/lossy.rs b/src/libstd_unicode/lossy.rs
deleted file mode 100644
index cc8e93308a5..00000000000
--- a/src/libstd_unicode/lossy.rs
+++ /dev/null
@@ -1,213 +0,0 @@
-// Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use core::str as core_str;
-use core::fmt;
-use core::fmt::Write;
-use char;
-use core::mem;
-
-
-/// Lossy UTF-8 string.
-#[unstable(feature = "str_internals", issue = "0")]
-pub struct Utf8Lossy {
-    bytes: [u8]
-}
-
-impl Utf8Lossy {
-    pub fn from_str(s: &str) -> &Utf8Lossy {
-        Utf8Lossy::from_bytes(s.as_bytes())
-    }
-
-    pub fn from_bytes(bytes: &[u8]) -> &Utf8Lossy {
-        unsafe { mem::transmute(bytes) }
-    }
-
-    pub fn chunks(&self) -> Utf8LossyChunksIter {
-        Utf8LossyChunksIter { source: &self.bytes }
-    }
-}
-
-
-/// Iterator over lossy UTF-8 string
-#[unstable(feature = "str_internals", issue = "0")]
-#[allow(missing_debug_implementations)]
-pub struct Utf8LossyChunksIter<'a> {
-    source: &'a [u8],
-}
-
-#[unstable(feature = "str_internals", issue = "0")]
-#[derive(PartialEq, Eq, Debug)]
-pub struct Utf8LossyChunk<'a> {
-    /// Sequence of valid chars.
-    /// Can be empty between broken UTF-8 chars.
-    pub valid: &'a str,
-    /// Single broken char, empty if none.
-    /// Empty iff iterator item is last.
-    pub broken: &'a [u8],
-}
-
-impl<'a> Iterator for Utf8LossyChunksIter<'a> {
-    type Item = Utf8LossyChunk<'a>;
-
-    fn next(&mut self) -> Option<Utf8LossyChunk<'a>> {
-        if self.source.len() == 0 {
-            return None;
-        }
-
-        const TAG_CONT_U8: u8 = 128;
-        fn unsafe_get(xs: &[u8], i: usize) -> u8 {
-            unsafe { *xs.get_unchecked(i) }
-        }
-        fn safe_get(xs: &[u8], i: usize) -> u8 {
-            if i >= xs.len() { 0 } else { unsafe_get(xs, i) }
-        }
-
-        let mut i = 0;
-        while i < self.source.len() {
-            let i_ = i;
-
-            let byte = unsafe_get(self.source, i);
-            i += 1;
-
-            if byte < 128 {
-
-            } else {
-                let w = core_str::utf8_char_width(byte);
-
-                macro_rules! error { () => ({
-                    unsafe {
-                        let r = Utf8LossyChunk {
-                            valid: core_str::from_utf8_unchecked(&self.source[0..i_]),
-                            broken: &self.source[i_..i],
-                        };
-                        self.source = &self.source[i..];
-                        return Some(r);
-                    }
-                })}
-
-                match w {
-                    2 => {
-                        if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
-                            error!();
-                        }
-                        i += 1;
-                    }
-                    3 => {
-                        match (byte, safe_get(self.source, i)) {
-                            (0xE0, 0xA0 ... 0xBF) => (),
-                            (0xE1 ... 0xEC, 0x80 ... 0xBF) => (),
-                            (0xED, 0x80 ... 0x9F) => (),
-                            (0xEE ... 0xEF, 0x80 ... 0xBF) => (),
-                            _ => {
-                                error!();
-                            }
-                        }
-                        i += 1;
-                        if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
-                            error!();
-                        }
-                        i += 1;
-                    }
-                    4 => {
-                        match (byte, safe_get(self.source, i)) {
-                            (0xF0, 0x90 ... 0xBF) => (),
-                            (0xF1 ... 0xF3, 0x80 ... 0xBF) => (),
-                            (0xF4, 0x80 ... 0x8F) => (),
-                            _ => {
-                                error!();
-                            }
-                        }
-                        i += 1;
-                        if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
-                            error!();
-                        }
-                        i += 1;
-                        if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
-                            error!();
-                        }
-                        i += 1;
-                    }
-                    _ => {
-                        error!();
-                    }
-                }
-            }
-        }
-
-        let r = Utf8LossyChunk {
-            valid: unsafe { core_str::from_utf8_unchecked(self.source) },
-            broken: &[],
-        };
-        self.source = &[];
-        return Some(r);
-    }
-}
-
-
-impl fmt::Display for Utf8Lossy {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        // If we're the empty string then our iterator won't actually yield
-        // anything, so perform the formatting manually
-        if self.bytes.len() == 0 {
-            return "".fmt(f)
-        }
-
-        for Utf8LossyChunk { valid, broken } in self.chunks() {
-            // If we successfully decoded the whole chunk as a valid string then
-            // we can return a direct formatting of the string which will also
-            // respect various formatting flags if possible.
-            if valid.len() == self.bytes.len() {
-                assert!(broken.is_empty());
-                return valid.fmt(f)
-            }
-
-            f.write_str(valid)?;
-            if !broken.is_empty() {
-                f.write_char(char::REPLACEMENT_CHARACTER)?;
-            }
-        }
-        Ok(())
-    }
-}
-
-impl fmt::Debug for Utf8Lossy {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.write_char('"')?;
-
-        for Utf8LossyChunk { valid, broken } in self.chunks() {
-
-            // Valid part.
-            // Here we partially parse UTF-8 again which is suboptimal.
-            {
-                let mut from = 0;
-                for (i, c) in valid.char_indices() {
-                    let esc = c.escape_debug();
-                    // If char needs escaping, flush backlog so far and write, else skip
-                    if esc.len() != 1 {
-                        f.write_str(&valid[from..i])?;
-                        for c in esc {
-                            f.write_char(c)?;
-                        }
-                        from = i + c.len_utf8();
-                    }
-                }
-                f.write_str(&valid[from..])?;
-            }
-
-            // Broken parts of string as hex escape.
-            for &b in broken {
-                write!(f, "\\x{:02x}", b)?;
-            }
-        }
-
-        f.write_char('"')
-    }
-}
diff --git a/src/libstd_unicode/tests/lib.rs b/src/libstd_unicode/tests/lib.rs
deleted file mode 100644
index 9535ec18763..00000000000
--- a/src/libstd_unicode/tests/lib.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-#![feature(str_internals, unicode)]
-
-extern crate std_unicode;
-
-mod lossy;
diff --git a/src/libstd_unicode/tests/lossy.rs b/src/libstd_unicode/tests/lossy.rs
deleted file mode 100644
index e05d0668556..00000000000
--- a/src/libstd_unicode/tests/lossy.rs
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright 2012-2017 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use std_unicode::lossy::*;
-
-#[test]
-fn chunks() {
-    let mut iter = Utf8Lossy::from_bytes(b"hello").chunks();
-    assert_eq!(Some(Utf8LossyChunk { valid: "hello", broken: b"", }), iter.next());
-    assert_eq!(None, iter.next());
-
-    let mut iter = Utf8Lossy::from_bytes("ศไทย中华Việt Nam".as_bytes()).chunks();
-    assert_eq!(Some(Utf8LossyChunk { valid: "ศไทย中华Việt Nam", broken: b"", }), iter.next());
-    assert_eq!(None, iter.next());
-
-    let mut iter = Utf8Lossy::from_bytes(b"Hello\xC2 There\xFF Goodbye").chunks();
-    assert_eq!(Some(Utf8LossyChunk { valid: "Hello", broken: b"\xC2", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: " There", broken: b"\xFF", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: " Goodbye", broken: b"", }), iter.next());
-    assert_eq!(None, iter.next());
-
-    let mut iter = Utf8Lossy::from_bytes(b"Hello\xC0\x80 There\xE6\x83 Goodbye").chunks();
-    assert_eq!(Some(Utf8LossyChunk { valid: "Hello", broken: b"\xC0", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: " There", broken: b"\xE6\x83", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: " Goodbye", broken: b"", }), iter.next());
-    assert_eq!(None, iter.next());
-
-    let mut iter = Utf8Lossy::from_bytes(b"\xF5foo\xF5\x80bar").chunks();
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xF5", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "foo", broken: b"\xF5", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "bar", broken: b"", }), iter.next());
-    assert_eq!(None, iter.next());
-
-    let mut iter = Utf8Lossy::from_bytes(b"\xF1foo\xF1\x80bar\xF1\x80\x80baz").chunks();
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xF1", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "foo", broken: b"\xF1\x80", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "bar", broken: b"\xF1\x80\x80", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "baz", broken: b"", }), iter.next());
-    assert_eq!(None, iter.next());
-
-    let mut iter = Utf8Lossy::from_bytes(b"\xF4foo\xF4\x80bar\xF4\xBFbaz").chunks();
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xF4", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "foo", broken: b"\xF4\x80", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "bar", broken: b"\xF4", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xBF", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "baz", broken: b"", }), iter.next());
-    assert_eq!(None, iter.next());
-
-    let mut iter = Utf8Lossy::from_bytes(b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar").chunks();
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xF0", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "foo\u{10000}bar", broken: b"", }), iter.next());
-    assert_eq!(None, iter.next());
-
-    // surrogates
-    let mut iter = Utf8Lossy::from_bytes(b"\xED\xA0\x80foo\xED\xBF\xBFbar").chunks();
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xED", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xA0", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\x80", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "foo", broken: b"\xED", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xBF", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "", broken: b"\xBF", }), iter.next());
-    assert_eq!(Some(Utf8LossyChunk { valid: "bar", broken: b"", }), iter.next());
-    assert_eq!(None, iter.next());
-}
-
-#[test]
-fn display() {
-    assert_eq!(
-        "Hello\u{FFFD}\u{FFFD} There\u{FFFD} Goodbye",
-        &format!("{}", Utf8Lossy::from_bytes(b"Hello\xC0\x80 There\xE6\x83 Goodbye")));
-}
-
-#[test]
-fn debug() {
-    assert_eq!(
-        "\"Hello\\xc0\\x80 There\\xe6\\x83 Goodbye\\u{10d4ea}\"",
-        &format!("{:?}", Utf8Lossy::from_bytes(
-            b"Hello\xC0\x80 There\xE6\x83 Goodbye\xf4\x8d\x93\xaa")));
-}
-- 
cgit 1.4.1-3-g733a5