about summary refs log tree commit diff
diff options
context:
space:
mode:
author1011X <1011XXXXX@gmail.com>2018-03-04 13:33:34 -0500
committer1011X <1011XXXXX@gmail.com>2018-03-04 13:33:34 -0500
commit4e4c1b5b325c4c474426a7e3c346c316fbc644f1 (patch)
tree99b1de26df5d39e3396fa38efee8582d2f9577d9
parent3b8bd530b0ec0dc7538c12799468867662f818cc (diff)
downloadrust-4e4c1b5b325c4c474426a7e3c346c316fbc644f1.tar.gz
rust-4e4c1b5b325c4c474426a7e3c346c316fbc644f1.zip
Added `ascii` module to core
-rw-r--r--src/libcore/ascii.rs499
1 files changed, 499 insertions, 0 deletions
diff --git a/src/libcore/ascii.rs b/src/libcore/ascii.rs
new file mode 100644
index 00000000000..e7bee41ac34
--- /dev/null
+++ b/src/libcore/ascii.rs
@@ -0,0 +1,499 @@
+// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! Operations on ASCII strings and characters.
+//!
+//! Most string operations in Rust act on UTF-8 strings. However, at times it
+//! makes more sense to only consider the ASCII character set for a specific
+//! operation.
+//!
+//! The [`escape_default`] function provides an iterator over the bytes of an
+//! escaped version of the character given.
+//!
+//! [`escape_default`]: fn.escape_default.html
+
+#![stable(feature = "rust1", since = "1.0.0")]
+
+use fmt;
+use ops::Range;
+use iter::FusedIterator;
+
+/// An iterator over the escaped version of a byte.
+///
+/// This `struct` is created by the [`escape_default`] function. See its
+/// documentation for more.
+///
+/// [`escape_default`]: fn.escape_default.html
+#[unstable(feature = "core_ascii", issue = "46409")]
+pub struct EscapeDefault {
+    range: Range<usize>,
+    data: [u8; 4],
+}
+
+/// Returns an iterator that produces an escaped version of a `u8`.
+///
+/// The default is chosen with a bias toward producing literals that are
+/// legal in a variety of languages, including C++11 and similar C-family
+/// languages. The exact rules are:
+///
+/// * Tab is escaped as `\t`.
+/// * Carriage return is escaped as `\r`.
+/// * Line feed is escaped as `\n`.
+/// * Single quote is escaped as `\'`.
+/// * Double quote is escaped as `\"`.
+/// * Backslash is escaped as `\\`.
+/// * Any character in the 'printable ASCII' range `0x20` .. `0x7e`
+///   inclusive is not escaped.
+/// * Any other chars are given hex escapes of the form '\xNN'.
+/// * Unicode escapes are never generated by this function.
+///
+/// # Examples
+///
+/// ```
+/// let escaped = ascii::escape_default(b'0').next().unwrap();
+/// assert_eq!(b'0', escaped);
+///
+/// let mut escaped = ascii::escape_default(b'\t');
+///
+/// assert_eq!(b'\\', escaped.next().unwrap());
+/// assert_eq!(b't', escaped.next().unwrap());
+///
+/// let mut escaped = ascii::escape_default(b'\r');
+///
+/// assert_eq!(b'\\', escaped.next().unwrap());
+/// assert_eq!(b'r', escaped.next().unwrap());
+///
+/// let mut escaped = ascii::escape_default(b'\n');
+///
+/// assert_eq!(b'\\', escaped.next().unwrap());
+/// assert_eq!(b'n', escaped.next().unwrap());
+///
+/// let mut escaped = ascii::escape_default(b'\'');
+///
+/// assert_eq!(b'\\', escaped.next().unwrap());
+/// assert_eq!(b'\'', escaped.next().unwrap());
+///
+/// let mut escaped = ascii::escape_default(b'"');
+///
+/// assert_eq!(b'\\', escaped.next().unwrap());
+/// assert_eq!(b'"', escaped.next().unwrap());
+///
+/// let mut escaped = ascii::escape_default(b'\\');
+///
+/// assert_eq!(b'\\', escaped.next().unwrap());
+/// assert_eq!(b'\\', escaped.next().unwrap());
+///
+/// let mut escaped = ascii::escape_default(b'\x9d');
+///
+/// assert_eq!(b'\\', escaped.next().unwrap());
+/// assert_eq!(b'x', escaped.next().unwrap());
+/// assert_eq!(b'9', escaped.next().unwrap());
+/// assert_eq!(b'd', escaped.next().unwrap());
+/// ```
+#[unstable(feature = "core_ascii", issue = "46409")]
+pub fn escape_ascii(c: u8) -> EscapeDefault {
+    let (data, len) = match c {
+        b'\t' => ([b'\\', b't', 0, 0], 2),
+        b'\r' => ([b'\\', b'r', 0, 0], 2),
+        b'\n' => ([b'\\', b'n', 0, 0], 2),
+        b'\\' => ([b'\\', b'\\', 0, 0], 2),
+        b'\'' => ([b'\\', b'\'', 0, 0], 2),
+        b'"' => ([b'\\', b'"', 0, 0], 2),
+        b'\x20' ... b'\x7e' => ([c, 0, 0, 0], 1),
+        _ => ([b'\\', b'x', hexify(c >> 4), hexify(c & 0xf)], 4),
+    };
+
+    return EscapeDefault { range: 0..len, data };
+
+    fn hexify(b: u8) -> u8 {
+        match b {
+            0 ... 9 => b'0' + b,
+            _ => b'a' + b - 10,
+        }
+    }
+}
+
+#[stable(feature = "rust1", since = "1.0.0")]
+impl Iterator for EscapeDefault {
+    type Item = u8;
+    fn next(&mut self) -> Option<u8> { self.range.next().map(|i| self.data[i]) }
+    fn size_hint(&self) -> (usize, Option<usize>) { self.range.size_hint() }
+}
+#[stable(feature = "rust1", since = "1.0.0")]
+impl DoubleEndedIterator for EscapeDefault {
+    fn next_back(&mut self) -> Option<u8> {
+        self.range.next_back().map(|i| self.data[i])
+    }
+}
+#[stable(feature = "rust1", since = "1.0.0")]
+impl ExactSizeIterator for EscapeDefault {}
+#[unstable(feature = "fused", issue = "35602")]
+impl FusedIterator for EscapeDefault {}
+
+#[stable(feature = "std_debug", since = "1.16.0")]
+impl fmt::Debug for EscapeDefault {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.pad("EscapeDefault { .. }")
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use char::from_u32;
+
+    #[test]
+    fn test_is_ascii() {
+        assert!(b"".is_ascii());
+        assert!(b"banana\0\x7F".is_ascii());
+        assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii()));
+        assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii());
+        assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii()));
+        assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii()));
+
+        assert!("".is_ascii());
+        assert!("banana\0\u{7F}".is_ascii());
+        assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii()));
+        assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii()));
+        assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii()));
+    }
+
+    #[test]
+    fn test_to_ascii_uppercase() {
+        assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL");
+        assert_eq!("hıKß".to_ascii_uppercase(), "HıKß");
+
+        for i in 0..501 {
+            let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 }
+                        else { i };
+            assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_uppercase(),
+                       (from_u32(upper).unwrap()).to_string());
+        }
+    }
+
+    #[test]
+    fn test_to_ascii_lowercase() {
+        assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl");
+        // Dotted capital I, Kelvin sign, Sharp S.
+        assert_eq!("HİKß".to_ascii_lowercase(), "hİKß");
+
+        for i in 0..501 {
+            let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
+                        else { i };
+            assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_lowercase(),
+                       (from_u32(lower).unwrap()).to_string());
+        }
+    }
+
+    #[test]
+    fn test_make_ascii_lower_case() {
+        macro_rules! test {
+            ($from: expr, $to: expr) => {
+                {
+                    let mut x = $from;
+                    x.make_ascii_lowercase();
+                    assert_eq!(x, $to);
+                }
+            }
+        }
+        test!(b'A', b'a');
+        test!(b'a', b'a');
+        test!(b'!', b'!');
+        test!('A', 'a');
+        test!('À', 'À');
+        test!('a', 'a');
+        test!('!', '!');
+        test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89");
+        test!("HİKß".to_string(), "hİKß");
+    }
+
+
+    #[test]
+    fn test_make_ascii_upper_case() {
+        macro_rules! test {
+            ($from: expr, $to: expr) => {
+                {
+                    let mut x = $from;
+                    x.make_ascii_uppercase();
+                    assert_eq!(x, $to);
+                }
+            }
+        }
+        test!(b'a', b'A');
+        test!(b'A', b'A');
+        test!(b'!', b'!');
+        test!('a', 'A');
+        test!('à', 'à');
+        test!('A', 'A');
+        test!('!', '!');
+        test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9");
+        test!("hıKß".to_string(), "HıKß");
+
+        let mut x = "Hello".to_string();
+        x[..3].make_ascii_uppercase();  // Test IndexMut on String.
+        assert_eq!(x, "HELlo")
+    }
+
+    #[test]
+    fn test_eq_ignore_ascii_case() {
+        assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl"));
+        assert!(!"Ürl".eq_ignore_ascii_case("ürl"));
+        // Dotted capital I, Kelvin sign, Sharp S.
+        assert!("HİKß".eq_ignore_ascii_case("hİKß"));
+        assert!(!"İ".eq_ignore_ascii_case("i"));
+        assert!(!"K".eq_ignore_ascii_case("k"));
+        assert!(!"ß".eq_ignore_ascii_case("s"));
+
+        for i in 0..501 {
+            let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 }
+                        else { i };
+            assert!((from_u32(i).unwrap()).to_string().eq_ignore_ascii_case(
+                    &from_u32(lower).unwrap().to_string()));
+        }
+    }
+
+    #[test]
+    fn inference_works() {
+        let x = "a".to_string();
+        x.eq_ignore_ascii_case("A");
+    }
+
+    // Shorthands used by the is_ascii_* tests.
+    macro_rules! assert_all {
+        ($what:ident, $($str:tt),+) => {{
+            $(
+                for b in $str.chars() {
+                    if !b.$what() {
+                        panic!("expected {}({}) but it isn't",
+                               stringify!($what), b);
+                    }
+                }
+                for b in $str.as_bytes().iter() {
+                    if !b.$what() {
+                        panic!("expected {}(0x{:02x})) but it isn't",
+                               stringify!($what), b);
+                    }
+                }
+                assert!($str.$what());
+                assert!($str.as_bytes().$what());
+            )+
+        }};
+        ($what:ident, $($str:tt),+,) => (assert_all!($what,$($str),+))
+    }
+    macro_rules! assert_none {
+        ($what:ident, $($str:tt),+) => {{
+            $(
+                for b in $str.chars() {
+                    if b.$what() {
+                        panic!("expected not-{}({}) but it is",
+                               stringify!($what), b);
+                    }
+                }
+                for b in $str.as_bytes().iter() {
+                    if b.$what() {
+                        panic!("expected not-{}(0x{:02x})) but it is",
+                               stringify!($what), b);
+                    }
+                }
+            )*
+        }};
+        ($what:ident, $($str:tt),+,) => (assert_none!($what,$($str),+))
+    }
+
+    #[test]
+    fn test_is_ascii_alphabetic() {
+        assert_all!(is_ascii_alphabetic,
+            "",
+            "abcdefghijklmnopqrstuvwxyz",
+            "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
+        );
+        assert_none!(is_ascii_alphabetic,
+            "0123456789",
+            "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+            " \t\n\x0c\r",
+            "\x00\x01\x02\x03\x04\x05\x06\x07",
+            "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+            "\x10\x11\x12\x13\x14\x15\x16\x17",
+            "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+            "\x7f",
+        );
+    }
+
+    #[test]
+    fn test_is_ascii_uppercase() {
+        assert_all!(is_ascii_uppercase,
+            "",
+            "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
+        );
+        assert_none!(is_ascii_uppercase,
+            "abcdefghijklmnopqrstuvwxyz",
+            "0123456789",
+            "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+            " \t\n\x0c\r",
+            "\x00\x01\x02\x03\x04\x05\x06\x07",
+            "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+            "\x10\x11\x12\x13\x14\x15\x16\x17",
+            "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+            "\x7f",
+        );
+    }
+
+    #[test]
+    fn test_is_ascii_lowercase() {
+        assert_all!(is_ascii_lowercase,
+            "abcdefghijklmnopqrstuvwxyz",
+        );
+        assert_none!(is_ascii_lowercase,
+            "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
+            "0123456789",
+            "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+            " \t\n\x0c\r",
+            "\x00\x01\x02\x03\x04\x05\x06\x07",
+            "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+            "\x10\x11\x12\x13\x14\x15\x16\x17",
+            "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+            "\x7f",
+        );
+    }
+
+    #[test]
+    fn test_is_ascii_alphanumeric() {
+        assert_all!(is_ascii_alphanumeric,
+            "",
+            "abcdefghijklmnopqrstuvwxyz",
+            "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
+            "0123456789",
+        );
+        assert_none!(is_ascii_alphanumeric,
+            "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+            " \t\n\x0c\r",
+            "\x00\x01\x02\x03\x04\x05\x06\x07",
+            "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+            "\x10\x11\x12\x13\x14\x15\x16\x17",
+            "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+            "\x7f",
+        );
+    }
+
+    #[test]
+    fn test_is_ascii_digit() {
+        assert_all!(is_ascii_digit,
+            "",
+            "0123456789",
+        );
+        assert_none!(is_ascii_digit,
+            "abcdefghijklmnopqrstuvwxyz",
+            "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
+            "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+            " \t\n\x0c\r",
+            "\x00\x01\x02\x03\x04\x05\x06\x07",
+            "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+            "\x10\x11\x12\x13\x14\x15\x16\x17",
+            "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+            "\x7f",
+        );
+    }
+
+    #[test]
+    fn test_is_ascii_hexdigit() {
+        assert_all!(is_ascii_hexdigit,
+            "",
+            "0123456789",
+            "abcdefABCDEF",
+        );
+        assert_none!(is_ascii_hexdigit,
+            "ghijklmnopqrstuvwxyz",
+            "GHIJKLMNOQPRSTUVWXYZ",
+            "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+            " \t\n\x0c\r",
+            "\x00\x01\x02\x03\x04\x05\x06\x07",
+            "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+            "\x10\x11\x12\x13\x14\x15\x16\x17",
+            "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+            "\x7f",
+        );
+    }
+
+    #[test]
+    fn test_is_ascii_punctuation() {
+        assert_all!(is_ascii_punctuation,
+            "",
+            "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+        );
+        assert_none!(is_ascii_punctuation,
+            "abcdefghijklmnopqrstuvwxyz",
+            "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
+            "0123456789",
+            " \t\n\x0c\r",
+            "\x00\x01\x02\x03\x04\x05\x06\x07",
+            "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+            "\x10\x11\x12\x13\x14\x15\x16\x17",
+            "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+            "\x7f",
+        );
+    }
+
+    #[test]
+    fn test_is_ascii_graphic() {
+        assert_all!(is_ascii_graphic,
+            "",
+            "abcdefghijklmnopqrstuvwxyz",
+            "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
+            "0123456789",
+            "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+        );
+        assert_none!(is_ascii_graphic,
+            " \t\n\x0c\r",
+            "\x00\x01\x02\x03\x04\x05\x06\x07",
+            "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+            "\x10\x11\x12\x13\x14\x15\x16\x17",
+            "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+            "\x7f",
+        );
+    }
+
+    #[test]
+    fn test_is_ascii_whitespace() {
+        assert_all!(is_ascii_whitespace,
+            "",
+            " \t\n\x0c\r",
+        );
+        assert_none!(is_ascii_whitespace,
+            "abcdefghijklmnopqrstuvwxyz",
+            "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
+            "0123456789",
+            "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+            "\x00\x01\x02\x03\x04\x05\x06\x07",
+            "\x08\x0b\x0e\x0f",
+            "\x10\x11\x12\x13\x14\x15\x16\x17",
+            "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+            "\x7f",
+        );
+    }
+
+    #[test]
+    fn test_is_ascii_control() {
+        assert_all!(is_ascii_control,
+            "",
+            "\x00\x01\x02\x03\x04\x05\x06\x07",
+            "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+            "\x10\x11\x12\x13\x14\x15\x16\x17",
+            "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+            "\x7f",
+        );
+        assert_none!(is_ascii_control,
+            "abcdefghijklmnopqrstuvwxyz",
+            "ABCDEFGHIJKLMNOQPRSTUVWXYZ",
+            "0123456789",
+            "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+            " ",
+        );
+    }
+}