diff options
| author | bors <bors@rust-lang.org> | 2015-02-22 22:45:46 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2015-02-22 22:45:46 +0000 |
| commit | 67eb38ee4cfd7b28f8498b5b6492da172768dcb9 (patch) | |
| tree | 358263816246c0e05a4ba458cadb266307a96172 /src/libcoretest | |
| parent | dcc6ce2c772cb851ac35cbc2ddafcae9bf2fa9fd (diff) | |
| parent | c8dd2d066d7b25246d2b940b7c161b8b67608b74 (diff) | |
| download | rust-67eb38ee4cfd7b28f8498b5b6492da172768dcb9.tar.gz rust-67eb38ee4cfd7b28f8498b5b6492da172768dcb9.zip | |
Auto merge of #22466 - Kimundi:str_pattern_ai_safe, r=aturon
This is not a complete implementation of the RFC: - only existing methods got updated, no new ones added - doc comments are not extensive enough yet - optimizations got lost and need to be reimplemented See https://github.com/rust-lang/rfcs/pull/528 Technically a [breaking-change]
Diffstat (limited to 'src/libcoretest')
| -rw-r--r-- | src/libcoretest/str.rs | 268 |
1 files changed, 267 insertions, 1 deletions
diff --git a/src/libcoretest/str.rs b/src/libcoretest/str.rs index 375564c39bb..beb746d25b6 100644 --- a/src/libcoretest/str.rs +++ b/src/libcoretest/str.rs @@ -1,4 +1,4 @@ -// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -9,6 +9,23 @@ // except according to those terms. #[test] +fn test_pattern_deref_forward() { + let data = "aabcdaa"; + assert!(data.contains("bcd")); + assert!(data.contains(&"bcd")); + assert!(data.contains(&&"bcd")); + assert!(data.contains(&"bcd".to_string())); + assert!(data.contains(&&"bcd".to_string())); +} + +#[test] +fn test_empty_match_indices() { + let data = "aä中!"; + let vec: Vec<_> = data.match_indices("").collect(); + assert_eq!(vec, vec![(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]); +} + +#[test] fn test_bool_from_str() { assert_eq!("true".parse().ok(), Some(true)); assert_eq!("false".parse().ok(), Some(false)); @@ -121,3 +138,252 @@ fn test_utf16_code_units() { assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(), vec![0xE9, 0xD83D, 0xDCA9]) } + +#[test] +fn starts_with_in_unicode() { + assert!(!"├── Cargo.toml".starts_with("# ")); +} + +#[test] +fn starts_short_long() { + assert!(!"".starts_with("##")); + assert!(!"##".starts_with("####")); + assert!("####".starts_with("##")); + assert!(!"##ä".starts_with("####")); + assert!("####ä".starts_with("##")); + assert!(!"##".starts_with("####ä")); + assert!("##ä##".starts_with("##ä")); + + assert!("".starts_with("")); + assert!("ä".starts_with("")); + assert!("#ä".starts_with("")); + assert!("##ä".starts_with("")); + assert!("ä###".starts_with("")); + assert!("#ä##".starts_with("")); + assert!("##ä#".starts_with("")); +} + +#[test] +fn contains_weird_cases() { + assert!("* \t".contains_char(' ')); + assert!(!"* \t".contains_char('?')); + assert!(!"* \t".contains_char('\u{1F4A9}')); +} + +#[test] +fn trim_ws() { + assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()), + "a \t "); + assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()), + " \t a"); + assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()), + "a"); + assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()), + ""); + assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()), + ""); + assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()), + ""); +} + +mod pattern { + use std::str::Pattern; + use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher}; + use std::str::SearchStep::{self, Match, Reject, Done}; + + macro_rules! make_test { + ($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => { + mod $name { + use std::str::Pattern; + use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher}; + use std::str::SearchStep::{self, Match, Reject, Done}; + use super::{cmp_search_to_vec}; + #[test] + fn fwd() { + cmp_search_to_vec(false, $p, $h, vec![$($e),*]); + } + #[test] + fn bwd() { + cmp_search_to_vec(true, $p, $h, vec![$($e),*]); + } + } + } + } + + fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str, + right: Vec<SearchStep>) + where P::Searcher: ReverseSearcher<'a> + { + let mut searcher = pat.into_searcher(haystack); + let mut v = vec![]; + loop { + match if !rev {searcher.next()} else {searcher.next_back()} { + Match(a, b) => v.push(Match(a, b)), + Reject(a, b) => v.push(Reject(a, b)), + Done => break, + } + } + if rev { + v.reverse(); + } + assert_eq!(v, right); + } + + make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [ + Reject(0, 1), + Match (1, 3), + Reject(3, 4), + Match (4, 6), + Reject(6, 7), + ]); + make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [ + Match(0, 0), + Match(1, 1), + Match(2, 2), + Match(3, 3), + Match(4, 4), + Match(5, 5), + Match(6, 6), + Match(7, 7), + ]); + make_test!(str_searcher_mulibyte_haystack, " ", "├──", [ + Reject(0, 3), + Reject(3, 6), + Reject(6, 9), + ]); + make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [ + Match(0, 0), + Match(3, 3), + Match(6, 6), + Match(9, 9), + ]); + make_test!(str_searcher_empty_needle_empty_haystack, "", "", [ + Match(0, 0), + ]); + make_test!(str_searcher_nonempty_needle_empty_haystack, "├", "", [ + ]); + make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [ + Reject(0, 1), + Match (1, 2), + Match (2, 3), + Reject(3, 4), + Match (4, 5), + Match (5, 6), + Reject(6, 7), + ]); + make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [ + Reject(0, 3), + Reject(3, 6), + Reject(6, 9), + ]); + make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [ + Reject(0, 1), + Reject(1, 2), + Reject(2, 3), + ]); + +} + +mod bench { + macro_rules! make_test_inner { + ($s:ident, $code:expr, $name:ident, $str:expr) => { + #[bench] + fn $name(bencher: &mut Bencher) { + let mut $s = $str; + black_box(&mut $s); + bencher.iter(|| $code); + } + } + } + + macro_rules! make_test { + ($name:ident, $s:ident, $code:expr) => { + mod $name { + use test::Bencher; + use test::black_box; + + // Short strings: 65 bytes each + make_test_inner!($s, $code, short_ascii, + "Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!"); + make_test_inner!($s, $code, short_mixed, + "ศไทย中华Việt Nam; Mary had a little lamb, Little lam!"); + make_test_inner!($s, $code, short_pile_of_poo, + "💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!"); + make_test_inner!($s, $code, long_lorem_ipsum,"\ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \ +ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \ +eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \ +sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \ +tempus vel, gravida nec quam. + +In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \ +sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \ +diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \ +lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \ +eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \ +interdum. Curabitur ut nisi justo. + +Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \ +mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \ +lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \ +est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \ +felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \ +ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \ +feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \ +Aliquam sit amet placerat lorem. + +Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \ +mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \ +Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \ +lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \ +suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \ +cursus accumsan. + +Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \ +feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \ +vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \ +leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \ +malesuada sollicitudin quam eu fermentum!"); + } + } + } + + make_test!(chars_count, s, s.chars().count()); + + make_test!(contains_bang_str, s, s.contains("!")); + make_test!(contains_bang_char, s, s.contains_char('!')); + + make_test!(match_indices_a_str, s, s.match_indices("a").count()); + + make_test!(split_str_a_str, s, s.split_str("a").count()); + + make_test!(trim_ascii_char, s, { + use std::ascii::AsciiExt; + s.trim_matches(|c: char| c.is_ascii()) + }); + make_test!(trim_left_ascii_char, s, { + use std::ascii::AsciiExt; + s.trim_left_matches(|c: char| c.is_ascii()) + }); + make_test!(trim_right_ascii_char, s, { + use std::ascii::AsciiExt; + s.trim_right_matches(|c: char| c.is_ascii()) + }); + + make_test!(find_underscore_char, s, s.find('_')); + make_test!(rfind_underscore_char, s, s.rfind('_')); + make_test!(find_underscore_str, s, s.find_str("_")); + + make_test!(find_zzz_char, s, s.find('\u{1F4A4}')); + make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}')); + make_test!(find_zzz_str, s, s.find_str("\u{1F4A4}")); + + make_test!(split_space_char, s, s.split(' ').count()); + make_test!(split_terminator_space_char, s, s.split_terminator(' ').count()); + + make_test!(splitn_space_char, s, s.splitn(10, ' ').count()); + make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count()); + + make_test!(split_str_space_str, s, s.split_str(" ").count()); + make_test!(split_str_ad_str, s, s.split_str("ad").count()); +} |
