diff options
| author | 1011X <1011XXXXX@gmail.com> | 2018-03-04 13:33:34 -0500 |
|---|---|---|
| committer | 1011X <1011XXXXX@gmail.com> | 2018-03-04 13:33:34 -0500 |
| commit | 4e4c1b5b325c4c474426a7e3c346c316fbc644f1 (patch) | |
| tree | 99b1de26df5d39e3396fa38efee8582d2f9577d9 | |
| parent | 3b8bd530b0ec0dc7538c12799468867662f818cc (diff) | |
| download | rust-4e4c1b5b325c4c474426a7e3c346c316fbc644f1.tar.gz rust-4e4c1b5b325c4c474426a7e3c346c316fbc644f1.zip | |
Added `ascii` module to core
| -rw-r--r-- | src/libcore/ascii.rs | 499 |
1 files changed, 499 insertions, 0 deletions
diff --git a/src/libcore/ascii.rs b/src/libcore/ascii.rs new file mode 100644 index 00000000000..e7bee41ac34 --- /dev/null +++ b/src/libcore/ascii.rs @@ -0,0 +1,499 @@ +// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Operations on ASCII strings and characters. +//! +//! Most string operations in Rust act on UTF-8 strings. However, at times it +//! makes more sense to only consider the ASCII character set for a specific +//! operation. +//! +//! The [`escape_default`] function provides an iterator over the bytes of an +//! escaped version of the character given. +//! +//! [`escape_default`]: fn.escape_default.html + +#![stable(feature = "rust1", since = "1.0.0")] + +use fmt; +use ops::Range; +use iter::FusedIterator; + +/// An iterator over the escaped version of a byte. +/// +/// This `struct` is created by the [`escape_default`] function. See its +/// documentation for more. +/// +/// [`escape_default`]: fn.escape_default.html +#[unstable(feature = "core_ascii", issue = "46409")] +pub struct EscapeDefault { + range: Range<usize>, + data: [u8; 4], +} + +/// Returns an iterator that produces an escaped version of a `u8`. +/// +/// The default is chosen with a bias toward producing literals that are +/// legal in a variety of languages, including C++11 and similar C-family +/// languages. The exact rules are: +/// +/// * Tab is escaped as `\t`. +/// * Carriage return is escaped as `\r`. +/// * Line feed is escaped as `\n`. +/// * Single quote is escaped as `\'`. +/// * Double quote is escaped as `\"`. +/// * Backslash is escaped as `\\`. +/// * Any character in the 'printable ASCII' range `0x20` .. `0x7e` +/// inclusive is not escaped. +/// * Any other chars are given hex escapes of the form '\xNN'. +/// * Unicode escapes are never generated by this function. +/// +/// # Examples +/// +/// ``` +/// let escaped = ascii::escape_default(b'0').next().unwrap(); +/// assert_eq!(b'0', escaped); +/// +/// let mut escaped = ascii::escape_default(b'\t'); +/// +/// assert_eq!(b'\\', escaped.next().unwrap()); +/// assert_eq!(b't', escaped.next().unwrap()); +/// +/// let mut escaped = ascii::escape_default(b'\r'); +/// +/// assert_eq!(b'\\', escaped.next().unwrap()); +/// assert_eq!(b'r', escaped.next().unwrap()); +/// +/// let mut escaped = ascii::escape_default(b'\n'); +/// +/// assert_eq!(b'\\', escaped.next().unwrap()); +/// assert_eq!(b'n', escaped.next().unwrap()); +/// +/// let mut escaped = ascii::escape_default(b'\''); +/// +/// assert_eq!(b'\\', escaped.next().unwrap()); +/// assert_eq!(b'\'', escaped.next().unwrap()); +/// +/// let mut escaped = ascii::escape_default(b'"'); +/// +/// assert_eq!(b'\\', escaped.next().unwrap()); +/// assert_eq!(b'"', escaped.next().unwrap()); +/// +/// let mut escaped = ascii::escape_default(b'\\'); +/// +/// assert_eq!(b'\\', escaped.next().unwrap()); +/// assert_eq!(b'\\', escaped.next().unwrap()); +/// +/// let mut escaped = ascii::escape_default(b'\x9d'); +/// +/// assert_eq!(b'\\', escaped.next().unwrap()); +/// assert_eq!(b'x', escaped.next().unwrap()); +/// assert_eq!(b'9', escaped.next().unwrap()); +/// assert_eq!(b'd', escaped.next().unwrap()); +/// ``` +#[unstable(feature = "core_ascii", issue = "46409")] +pub fn escape_ascii(c: u8) -> EscapeDefault { + let (data, len) = match c { + b'\t' => ([b'\\', b't', 0, 0], 2), + b'\r' => ([b'\\', b'r', 0, 0], 2), + b'\n' => ([b'\\', b'n', 0, 0], 2), + b'\\' => ([b'\\', b'\\', 0, 0], 2), + b'\'' => ([b'\\', b'\'', 0, 0], 2), + b'"' => ([b'\\', b'"', 0, 0], 2), + b'\x20' ... b'\x7e' => ([c, 0, 0, 0], 1), + _ => ([b'\\', b'x', hexify(c >> 4), hexify(c & 0xf)], 4), + }; + + return EscapeDefault { range: 0..len, data }; + + fn hexify(b: u8) -> u8 { + match b { + 0 ... 9 => b'0' + b, + _ => b'a' + b - 10, + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Iterator for EscapeDefault { + type Item = u8; + fn next(&mut self) -> Option<u8> { self.range.next().map(|i| self.data[i]) } + fn size_hint(&self) -> (usize, Option<usize>) { self.range.size_hint() } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl DoubleEndedIterator for EscapeDefault { + fn next_back(&mut self) -> Option<u8> { + self.range.next_back().map(|i| self.data[i]) + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ExactSizeIterator for EscapeDefault {} +#[unstable(feature = "fused", issue = "35602")] +impl FusedIterator for EscapeDefault {} + +#[stable(feature = "std_debug", since = "1.16.0")] +impl fmt::Debug for EscapeDefault { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.pad("EscapeDefault { .. }") + } +} + + +#[cfg(test)] +mod tests { + use char::from_u32; + + #[test] + fn test_is_ascii() { + assert!(b"".is_ascii()); + assert!(b"banana\0\x7F".is_ascii()); + assert!(b"banana\0\x7F".iter().all(|b| b.is_ascii())); + assert!(!b"Vi\xe1\xbb\x87t Nam".is_ascii()); + assert!(!b"Vi\xe1\xbb\x87t Nam".iter().all(|b| b.is_ascii())); + assert!(!b"\xe1\xbb\x87".iter().any(|b| b.is_ascii())); + + assert!("".is_ascii()); + assert!("banana\0\u{7F}".is_ascii()); + assert!("banana\0\u{7F}".chars().all(|c| c.is_ascii())); + assert!(!"ประเทศไทย中华Việt Nam".chars().all(|c| c.is_ascii())); + assert!(!"ประเทศไทย中华ệ ".chars().any(|c| c.is_ascii())); + } + + #[test] + fn test_to_ascii_uppercase() { + assert_eq!("url()URL()uRl()ürl".to_ascii_uppercase(), "URL()URL()URL()üRL"); + assert_eq!("hıKß".to_ascii_uppercase(), "HıKß"); + + for i in 0..501 { + let upper = if 'a' as u32 <= i && i <= 'z' as u32 { i + 'A' as u32 - 'a' as u32 } + else { i }; + assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_uppercase(), + (from_u32(upper).unwrap()).to_string()); + } + } + + #[test] + fn test_to_ascii_lowercase() { + assert_eq!("url()URL()uRl()Ürl".to_ascii_lowercase(), "url()url()url()Ürl"); + // Dotted capital I, Kelvin sign, Sharp S. + assert_eq!("HİKß".to_ascii_lowercase(), "hİKß"); + + for i in 0..501 { + let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } + else { i }; + assert_eq!((from_u32(i).unwrap()).to_string().to_ascii_lowercase(), + (from_u32(lower).unwrap()).to_string()); + } + } + + #[test] + fn test_make_ascii_lower_case() { + macro_rules! test { + ($from: expr, $to: expr) => { + { + let mut x = $from; + x.make_ascii_lowercase(); + assert_eq!(x, $to); + } + } + } + test!(b'A', b'a'); + test!(b'a', b'a'); + test!(b'!', b'!'); + test!('A', 'a'); + test!('À', 'À'); + test!('a', 'a'); + test!('!', '!'); + test!(b"H\xc3\x89".to_vec(), b"h\xc3\x89"); + test!("HİKß".to_string(), "hİKß"); + } + + + #[test] + fn test_make_ascii_upper_case() { + macro_rules! test { + ($from: expr, $to: expr) => { + { + let mut x = $from; + x.make_ascii_uppercase(); + assert_eq!(x, $to); + } + } + } + test!(b'a', b'A'); + test!(b'A', b'A'); + test!(b'!', b'!'); + test!('a', 'A'); + test!('à', 'à'); + test!('A', 'A'); + test!('!', '!'); + test!(b"h\xc3\xa9".to_vec(), b"H\xc3\xa9"); + test!("hıKß".to_string(), "HıKß"); + + let mut x = "Hello".to_string(); + x[..3].make_ascii_uppercase(); // Test IndexMut on String. + assert_eq!(x, "HELlo") + } + + #[test] + fn test_eq_ignore_ascii_case() { + assert!("url()URL()uRl()Ürl".eq_ignore_ascii_case("url()url()url()Ürl")); + assert!(!"Ürl".eq_ignore_ascii_case("ürl")); + // Dotted capital I, Kelvin sign, Sharp S. + assert!("HİKß".eq_ignore_ascii_case("hİKß")); + assert!(!"İ".eq_ignore_ascii_case("i")); + assert!(!"K".eq_ignore_ascii_case("k")); + assert!(!"ß".eq_ignore_ascii_case("s")); + + for i in 0..501 { + let lower = if 'A' as u32 <= i && i <= 'Z' as u32 { i + 'a' as u32 - 'A' as u32 } + else { i }; + assert!((from_u32(i).unwrap()).to_string().eq_ignore_ascii_case( + &from_u32(lower).unwrap().to_string())); + } + } + + #[test] + fn inference_works() { + let x = "a".to_string(); + x.eq_ignore_ascii_case("A"); + } + + // Shorthands used by the is_ascii_* tests. + macro_rules! assert_all { + ($what:ident, $($str:tt),+) => {{ + $( + for b in $str.chars() { + if !b.$what() { + panic!("expected {}({}) but it isn't", + stringify!($what), b); + } + } + for b in $str.as_bytes().iter() { + if !b.$what() { + panic!("expected {}(0x{:02x})) but it isn't", + stringify!($what), b); + } + } + assert!($str.$what()); + assert!($str.as_bytes().$what()); + )+ + }}; + ($what:ident, $($str:tt),+,) => (assert_all!($what,$($str),+)) + } + macro_rules! assert_none { + ($what:ident, $($str:tt),+) => {{ + $( + for b in $str.chars() { + if b.$what() { + panic!("expected not-{}({}) but it is", + stringify!($what), b); + } + } + for b in $str.as_bytes().iter() { + if b.$what() { + panic!("expected not-{}(0x{:02x})) but it is", + stringify!($what), b); + } + } + )* + }}; + ($what:ident, $($str:tt),+,) => (assert_none!($what,$($str),+)) + } + + #[test] + fn test_is_ascii_alphabetic() { + assert_all!(is_ascii_alphabetic, + "", + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + ); + assert_none!(is_ascii_alphabetic, + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_uppercase() { + assert_all!(is_ascii_uppercase, + "", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + ); + assert_none!(is_ascii_uppercase, + "abcdefghijklmnopqrstuvwxyz", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_lowercase() { + assert_all!(is_ascii_lowercase, + "abcdefghijklmnopqrstuvwxyz", + ); + assert_none!(is_ascii_lowercase, + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_alphanumeric() { + assert_all!(is_ascii_alphanumeric, + "", + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + ); + assert_none!(is_ascii_alphanumeric, + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_digit() { + assert_all!(is_ascii_digit, + "", + "0123456789", + ); + assert_none!(is_ascii_digit, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_hexdigit() { + assert_all!(is_ascii_hexdigit, + "", + "0123456789", + "abcdefABCDEF", + ); + assert_none!(is_ascii_hexdigit, + "ghijklmnopqrstuvwxyz", + "GHIJKLMNOQPRSTUVWXYZ", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_punctuation() { + assert_all!(is_ascii_punctuation, + "", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ); + assert_none!(is_ascii_punctuation, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_graphic() { + assert_all!(is_ascii_graphic, + "", + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ); + assert_none!(is_ascii_graphic, + " \t\n\x0c\r", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_whitespace() { + assert_all!(is_ascii_whitespace, + "", + " \t\n\x0c\r", + ); + assert_none!(is_ascii_whitespace, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x0b\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + } + + #[test] + fn test_is_ascii_control() { + assert_all!(is_ascii_control, + "", + "\x00\x01\x02\x03\x04\x05\x06\x07", + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + "\x10\x11\x12\x13\x14\x15\x16\x17", + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + "\x7f", + ); + assert_none!(is_ascii_control, + "abcdefghijklmnopqrstuvwxyz", + "ABCDEFGHIJKLMNOQPRSTUVWXYZ", + "0123456789", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + " ", + ); + } +} |
