diff options
| author | Alex Crichton <alex@alexcrichton.com> | 2018-05-10 11:35:17 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-05-10 11:35:17 -0500 |
| commit | cff1a263c9e7744df286d2518e0c6ca3191dc681 (patch) | |
| tree | 2d39f779a1fba6ca894db351eac6242edae37f92 /src/liballoc | |
| parent | ecd9898b60b601f69113c64b77650a09d7678edf (diff) | |
| parent | f1d7b453fed6acefc68f90752922b37c6e3ac7a4 (diff) | |
| download | rust-cff1a263c9e7744df286d2518e0c6ca3191dc681.tar.gz rust-cff1a263c9e7744df286d2518e0c6ca3191dc681.zip | |
Rollup merge of #50010 - ExpHP:slice-bounds, r=alexcrichton
Give SliceIndex impls a test suite of girth befitting the implementation (and fix a UTF8 boundary check)
So one day I was writing something in my codebase that basically amounted to `impl SliceIndex for (Bound<usize>, Bound<usize>)`, and I said to myself:
*Boy, gee, golly! I never realized bounds checking was so tricky!*
At some point when I had around 60 lines of tests for it, I decided to go see how the standard library does it to see if I missed any edge cases. ...That's when I discovered that libcore only had about 40 lines of tests for slicing altogether, and none of them even used `..=`.
---
This PR includes:
* **Literally the first appearance of the word `get_unchecked_mut` in any directory named `test` or `tests`.**
* Likewise the first appearance of `get_mut` used with _any type of range argument_ in these directories.
* Tests for the panics on overflow with `..=`.
* I wanted to test on `[(); usize::MAX]` as well but that takes linear time in debug mode </3
* A horrible and ugly test-generating macro for the `should_panic` tests that increases the DRYness by a single order of magnitude (which IMO wasn't enough, but I didn't want to go any further and risk making the tests inaccessible to next guy).
* Same stuff for str!
* Actually, the existing `str` tests were pretty good. I just helped filled in the holes.
* [A fix for the bug it caught](https://github.com/rust-lang/rust/issues/50002). (only one ~~sadly~~)
Diffstat (limited to 'src/liballoc')
| -rw-r--r-- | src/liballoc/tests/str.rs | 489 |
1 files changed, 355 insertions, 134 deletions
diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs index a03b61ec97e..2edd41a70b9 100644 --- a/src/liballoc/tests/str.rs +++ b/src/liballoc/tests/str.rs @@ -291,113 +291,378 @@ fn test_replace_pattern() { assert_eq!(data.replace(|c| c == 'γ', "😺😺😺"), "abcdαβ😺😺😺δabcdαβ😺😺😺δ"); } -#[test] -fn test_slice() { - assert_eq!("ab", &"abc"[0..2]); - assert_eq!("bc", &"abc"[1..3]); - assert_eq!("", &"abc"[1..1]); - assert_eq!("\u{65e5}", &"\u{65e5}\u{672c}"[0..3]); +// The current implementation of SliceIndex fails to handle methods +// orthogonally from range types; therefore, it is worth testing +// all of the indexing operations on each input. +mod slice_index { + // Test a slicing operation **that should succeed,** + // testing it on all of the indexing methods. + // + // This is not suitable for testing failure on invalid inputs. + macro_rules! assert_range_eq { + ($s:expr, $range:expr, $expected:expr) + => { + let mut s: String = $s.to_owned(); + let mut expected: String = $expected.to_owned(); + { + let s: &str = &s; + let expected: &str = &expected; + + assert_eq!(&s[$range], expected, "(in assertion for: index)"); + assert_eq!(s.get($range), Some(expected), "(in assertion for: get)"); + unsafe { + assert_eq!( + s.get_unchecked($range), expected, + "(in assertion for: get_unchecked)", + ); + } + } + { + let s: &mut str = &mut s; + let expected: &mut str = &mut expected; + + assert_eq!( + &mut s[$range], expected, + "(in assertion for: index_mut)", + ); + assert_eq!( + s.get_mut($range), Some(&mut expected[..]), + "(in assertion for: get_mut)", + ); + unsafe { + assert_eq!( + s.get_unchecked_mut($range), expected, + "(in assertion for: get_unchecked_mut)", + ); + } + } + } + } - let data = "ประเทศไทย中华"; - assert_eq!("ป", &data[0..3]); - assert_eq!("ร", &data[3..6]); - assert_eq!("", &data[3..3]); - assert_eq!("华", &data[30..33]); + // Make sure the macro can actually detect bugs, + // because if it can't, then what are we even doing here? + // + // (Be aware this only demonstrates the ability to detect bugs + // in the FIRST method that panics, as the macro is not designed + // to be used in `should_panic`) + #[test] + #[should_panic(expected = "out of bounds")] + fn assert_range_eq_can_fail_by_panic() { + assert_range_eq!("abc", 0..5, "abc"); + } - fn a_million_letter_x() -> String { - let mut i = 0; - let mut rs = String::new(); - while i < 100000 { - rs.push_str("华华华华华华华华华华"); - i += 1; + // (Be aware this only demonstrates the ability to detect bugs + // in the FIRST method it calls, as the macro is not designed + // to be used in `should_panic`) + #[test] + #[should_panic(expected = "==")] + fn assert_range_eq_can_fail_by_inequality() { + assert_range_eq!("abc", 0..2, "abc"); + } + + // Generates test cases for bad index operations. + // + // This generates `should_panic` test cases for Index/IndexMut + // and `None` test cases for get/get_mut. + macro_rules! panic_cases { + ($( + in mod $case_name:ident { + data: $data:expr; + + // optional: + // + // a similar input for which DATA[input] succeeds, and the corresponding + // output str. This helps validate "critical points" where an input range + // straddles the boundary between valid and invalid. + // (such as the input `len..len`, which is just barely valid) + $( + good: data[$good:expr] == $output:expr; + )* + + bad: data[$bad:expr]; + message: $expect_msg:expr; // must be a literal + } + )*) => {$( + mod $case_name { + #[test] + fn pass() { + let mut v: String = $data.into(); + + $( assert_range_eq!(v, $good, $output); )* + + { + let v: &str = &v; + assert_eq!(v.get($bad), None, "(in None assertion for get)"); + } + + { + let v: &mut str = &mut v; + assert_eq!(v.get_mut($bad), None, "(in None assertion for get_mut)"); + } + } + + #[test] + #[should_panic(expected = $expect_msg)] + fn index_fail() { + let v: String = $data.into(); + let v: &str = &v; + let _v = &v[$bad]; + } + + #[test] + #[should_panic(expected = $expect_msg)] + fn index_mut_fail() { + let mut v: String = $data.into(); + let v: &mut str = &mut v; + let _v = &mut v[$bad]; + } + } + )*}; + } + + #[test] + fn simple_ascii() { + assert_range_eq!("abc", .., "abc"); + + assert_range_eq!("abc", 0..2, "ab"); + assert_range_eq!("abc", 0..=1, "ab"); + assert_range_eq!("abc", ..2, "ab"); + assert_range_eq!("abc", ..=1, "ab"); + + assert_range_eq!("abc", 1..3, "bc"); + assert_range_eq!("abc", 1..=2, "bc"); + assert_range_eq!("abc", 1..1, ""); + assert_range_eq!("abc", 1..=0, ""); + } + + #[test] + fn simple_unicode() { + // 日本 + assert_range_eq!("\u{65e5}\u{672c}", .., "\u{65e5}\u{672c}"); + + assert_range_eq!("\u{65e5}\u{672c}", 0..3, "\u{65e5}"); + assert_range_eq!("\u{65e5}\u{672c}", 0..=2, "\u{65e5}"); + assert_range_eq!("\u{65e5}\u{672c}", ..3, "\u{65e5}"); + assert_range_eq!("\u{65e5}\u{672c}", ..=2, "\u{65e5}"); + + assert_range_eq!("\u{65e5}\u{672c}", 3..6, "\u{672c}"); + assert_range_eq!("\u{65e5}\u{672c}", 3..=5, "\u{672c}"); + assert_range_eq!("\u{65e5}\u{672c}", 3.., "\u{672c}"); + + let data = "ประเทศไทย中华"; + assert_range_eq!(data, 0..3, "ป"); + assert_range_eq!(data, 3..6, "ร"); + assert_range_eq!(data, 3..3, ""); + assert_range_eq!(data, 30..33, "华"); + + /*0: 中 + 3: 华 + 6: V + 7: i + 8: ệ + 11: t + 12: + 13: N + 14: a + 15: m */ + let ss = "中华Việt Nam"; + assert_range_eq!(ss, 3..6, "华"); + assert_range_eq!(ss, 6..16, "Việt Nam"); + assert_range_eq!(ss, 6..=15, "Việt Nam"); + assert_range_eq!(ss, 6.., "Việt Nam"); + + assert_range_eq!(ss, 0..3, "中"); + assert_range_eq!(ss, 3..7, "华V"); + assert_range_eq!(ss, 3..=6, "华V"); + assert_range_eq!(ss, 3..3, ""); + assert_range_eq!(ss, 3..=2, ""); + } + + #[test] + fn simple_big() { + fn a_million_letter_x() -> String { + let mut i = 0; + let mut rs = String::new(); + while i < 100000 { + rs.push_str("华华华华华华华华华华"); + i += 1; + } + rs } - rs + fn half_a_million_letter_x() -> String { + let mut i = 0; + let mut rs = String::new(); + while i < 100000 { + rs.push_str("华华华华华"); + i += 1; + } + rs + } + let letters = a_million_letter_x(); + assert_range_eq!(letters, 0..3 * 500000, half_a_million_letter_x()); } - fn half_a_million_letter_x() -> String { - let mut i = 0; - let mut rs = String::new(); - while i < 100000 { - rs.push_str("华华华华华"); - i += 1; + + #[test] + #[should_panic] + fn test_slice_fail() { + &"中华Việt Nam"[0..2]; + } + + panic_cases! { + in mod rangefrom_len { + data: "abcdef"; + good: data[6..] == ""; + bad: data[7..]; + message: "out of bounds"; + } + + in mod rangeto_len { + data: "abcdef"; + good: data[..6] == "abcdef"; + bad: data[..7]; + message: "out of bounds"; + } + + in mod rangetoinclusive_len { + data: "abcdef"; + good: data[..=5] == "abcdef"; + bad: data[..=6]; + message: "out of bounds"; + } + + in mod range_len_len { + data: "abcdef"; + good: data[6..6] == ""; + bad: data[7..7]; + message: "out of bounds"; + } + + in mod rangeinclusive_len_len { + data: "abcdef"; + good: data[6..=5] == ""; + bad: data[7..=6]; + message: "out of bounds"; } - rs } - let letters = a_million_letter_x(); - assert_eq!(half_a_million_letter_x(), &letters[0..3 * 500000]); -} -#[test] -fn test_slice_2() { - let ss = "中华Việt Nam"; + panic_cases! { + in mod range_neg_width { + data: "abcdef"; + good: data[4..4] == ""; + bad: data[4..3]; + message: "begin <= end (4 <= 3)"; + } - assert_eq!("华", &ss[3..6]); - assert_eq!("Việt Nam", &ss[6..16]); + in mod rangeinclusive_neg_width { + data: "abcdef"; + good: data[4..=3] == ""; + bad: data[4..=2]; + message: "begin <= end (4 <= 3)"; + } + } - assert_eq!("ab", &"abc"[0..2]); - assert_eq!("bc", &"abc"[1..3]); - assert_eq!("", &"abc"[1..1]); + mod overflow { + panic_cases! { + in mod rangeinclusive { + data: "hello"; + // note: using 0 specifically ensures that the result of overflowing is 0..0, + // so that `get` doesn't simply return None for the wrong reason. + bad: data[0..=usize::max_value()]; + message: "maximum usize"; + } - assert_eq!("中", &ss[0..3]); - assert_eq!("华V", &ss[3..7]); - assert_eq!("", &ss[3..3]); - /*0: 中 - 3: 华 - 6: V - 7: i - 8: ệ - 11: t - 12: - 13: N - 14: a - 15: m */ -} + in mod rangetoinclusive { + data: "hello"; + bad: data[..=usize::max_value()]; + message: "maximum usize"; + } + } + } -#[test] -#[should_panic] -fn test_slice_fail() { - &"中华Việt Nam"[0..2]; -} + mod boundary { + const DATA: &'static str = "abcαβγ"; + + const BAD_START: usize = 4; + const GOOD_START: usize = 3; + const BAD_END: usize = 6; + const GOOD_END: usize = 7; + const BAD_END_INCL: usize = BAD_END - 1; + const GOOD_END_INCL: usize = GOOD_END - 1; + + // it is especially important to test all of the different range types here + // because some of the logic may be duplicated as part of micro-optimizations + // to dodge unicode boundary checks on half-ranges. + panic_cases! { + in mod range_1 { + data: super::DATA; + bad: data[super::BAD_START..super::GOOD_END]; + message: + "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of"; + } -#[test] -#[should_panic] -fn test_str_slice_rangetoinclusive_max_panics() { - &"hello"[..=usize::max_value()]; -} + in mod range_2 { + data: super::DATA; + bad: data[super::GOOD_START..super::BAD_END]; + message: + "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; + } -#[test] -#[should_panic] -fn test_str_slice_rangeinclusive_max_panics() { - &"hello"[1..=usize::max_value()]; -} + in mod rangefrom { + data: super::DATA; + bad: data[super::BAD_START..]; + message: + "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of"; + } -#[test] -#[should_panic] -fn test_str_slicemut_rangetoinclusive_max_panics() { - let mut s = "hello".to_owned(); - let s: &mut str = &mut s; - &mut s[..=usize::max_value()]; -} + in mod rangeto { + data: super::DATA; + bad: data[..super::BAD_END]; + message: + "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; + } -#[test] -#[should_panic] -fn test_str_slicemut_rangeinclusive_max_panics() { - let mut s = "hello".to_owned(); - let s: &mut str = &mut s; - &mut s[1..=usize::max_value()]; -} + in mod rangeinclusive_1 { + data: super::DATA; + bad: data[super::BAD_START..=super::GOOD_END_INCL]; + message: + "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of"; + } -#[test] -fn test_str_get_maxinclusive() { - let mut s = "hello".to_owned(); - { - let s: &str = &s; - assert_eq!(s.get(..=usize::max_value()), None); - assert_eq!(s.get(1..=usize::max_value()), None); + in mod rangeinclusive_2 { + data: super::DATA; + bad: data[super::GOOD_START..=super::BAD_END_INCL]; + message: + "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; + } + + in mod rangetoinclusive { + data: super::DATA; + bad: data[..=super::BAD_END_INCL]; + message: + "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; + } + } } - { - let s: &mut str = &mut s; - assert_eq!(s.get(..=usize::max_value()), None); - assert_eq!(s.get(1..=usize::max_value()), None); + + const LOREM_PARAGRAPH: &'static str = "\ + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem \ + sit amet dolor ultricies condimentum. Praesent iaculis purus elit, ac malesuada \ + quam malesuada in. Duis sed orci eros. Suspendisse sit amet magna mollis, mollis \ + nunc luctus, imperdiet mi. Integer fringilla non sem ut lacinia. Fusce varius \ + tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec tempus vel, \ + gravida nec quam."; + + // check the panic includes the prefix of the sliced string + #[test] + #[should_panic(expected="byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")] + fn test_slice_fail_truncated_1() { + &LOREM_PARAGRAPH[..1024]; + } + // check the truncation in the panic message + #[test] + #[should_panic(expected="luctus, im`[...]")] + fn test_slice_fail_truncated_2() { + &LOREM_PARAGRAPH[..1024]; } } @@ -446,50 +711,6 @@ fn test_is_char_boundary() { } } } -const LOREM_PARAGRAPH: &'static str = "\ -Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \ -ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \ -eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \ -sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \ -tempus vel, gravida nec quam."; - -// check the panic includes the prefix of the sliced string -#[test] -#[should_panic(expected="byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")] -fn test_slice_fail_truncated_1() { - &LOREM_PARAGRAPH[..1024]; -} -// check the truncation in the panic message -#[test] -#[should_panic(expected="luctus, im`[...]")] -fn test_slice_fail_truncated_2() { - &LOREM_PARAGRAPH[..1024]; -} - -#[test] -#[should_panic(expected="byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of")] -fn test_slice_fail_boundary_1() { - &"abcαβγ"[4..]; -} - -#[test] -#[should_panic(expected="byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of")] -fn test_slice_fail_boundary_2() { - &"abcαβγ"[2..6]; -} - -#[test] -fn test_slice_from() { - assert_eq!(&"abcd"[0..], "abcd"); - assert_eq!(&"abcd"[2..], "cd"); - assert_eq!(&"abcd"[4..], ""); -} -#[test] -fn test_slice_to() { - assert_eq!(&"abcd"[..0], ""); - assert_eq!(&"abcd"[..2], "ab"); - assert_eq!(&"abcd"[..4], "abcd"); -} #[test] fn test_trim_left_matches() { |
