Rollup merge of #50010 - ExpHP:slice-bounds, r=alexcrichton

Give SliceIndex impls a test suite of girth befitting the implementation (and fix a UTF8 boundary check) So one day I was writing something in my codebase that basically amounted to `impl SliceIndex for (Bound<usize>, Bound<usize>)`, and I said to myself: *Boy, gee, golly! I never realized bounds checking was so tricky!* At some point when I had around 60 lines of tests for it, I decided to go see how the standard library does it to see if I missed any edge cases. ...That's when I discovered that libcore only had about 40 lines of tests for slicing altogether, and none of them even used `..=`. --- This PR includes: * **Literally the first appearance of the word `get_unchecked_mut` in any directory named `test` or `tests`.** * Likewise the first appearance of `get_mut` used with _any type of range argument_ in these directories. * Tests for the panics on overflow with `..=`. * I wanted to test on `[(); usize::MAX]` as well but that takes linear time in debug mode </3 * A horrible and ugly test-generating macro for the `should_panic` tests that increases the DRYness by a single order of magnitude (which IMO wasn't enough, but I didn't want to go any further and risk making the tests inaccessible to next guy). * Same stuff for str! * Actually, the existing `str` tests were pretty good. I just helped filled in the holes. * [A fix for the bug it caught](https://github.com/rust-lang/rust/issues/50002). (only one ~~sadly~~)
author: Alex Crichton <alex@alexcrichton.com> 2018-05-10 11:35:17 -0500
committer: GitHub <noreply@github.com> 2018-05-10 11:35:17 -0500
commit: cff1a263c9e7744df286d2518e0c6ca3191dc681 (patch)
tree: 2d39f779a1fba6ca894db351eac6242edae37f92 /src/liballoc
parent: ecd9898b60b601f69113c64b77650a09d7678edf (diff)
parent: f1d7b453fed6acefc68f90752922b37c6e3ac7a4 (diff)
download: rust-cff1a263c9e7744df286d2518e0c6ca3191dc681.tar.gz
rust-cff1a263c9e7744df286d2518e0c6ca3191dc681.zip
1 files changed, 355 insertions, 134 deletions
diff --git a/src/liballoc/tests/str.rs b/src/liballoc/tests/str.rs
index a03b61ec97e..2edd41a70b9 100644
--- a/src/liballoc/tests/str.rs
+++ b/src/liballoc/tests/str.rs
@@ -291,113 +291,378 @@ fn test_replace_pattern() {
     assert_eq!(data.replace(|c| c == 'γ', "😺😺😺"), "abcdαβ😺😺😺δabcdαβ😺😺😺δ");
 }
 
-#[test]
-fn test_slice() {
-    assert_eq!("ab", &"abc"[0..2]);
-    assert_eq!("bc", &"abc"[1..3]);
-    assert_eq!("", &"abc"[1..1]);
-    assert_eq!("\u{65e5}", &"\u{65e5}\u{672c}"[0..3]);
+// The current implementation of SliceIndex fails to handle methods
+// orthogonally from range types; therefore, it is worth testing
+// all of the indexing operations on each input.
+mod slice_index {
+    // Test a slicing operation **that should succeed,**
+    // testing it on all of the indexing methods.
+    //
+    // This is not suitable for testing failure on invalid inputs.
+    macro_rules! assert_range_eq {
+        ($s:expr, $range:expr, $expected:expr)
+        => {
+            let mut s: String = $s.to_owned();
+            let mut expected: String = $expected.to_owned();
+            {
+                let s: &str = &s;
+                let expected: &str = &expected;
+
+                assert_eq!(&s[$range], expected, "(in assertion for: index)");
+                assert_eq!(s.get($range), Some(expected), "(in assertion for: get)");
+                unsafe {
+                    assert_eq!(
+                        s.get_unchecked($range), expected,
+                        "(in assertion for: get_unchecked)",
+                    );
+                }
+            }
+            {
+                let s: &mut str = &mut s;
+                let expected: &mut str = &mut expected;
+
+                assert_eq!(
+                    &mut s[$range], expected,
+                    "(in assertion for: index_mut)",
+                );
+                assert_eq!(
+                    s.get_mut($range), Some(&mut expected[..]),
+                    "(in assertion for: get_mut)",
+                );
+                unsafe {
+                    assert_eq!(
+                        s.get_unchecked_mut($range), expected,
+                        "(in assertion for: get_unchecked_mut)",
+                    );
+                }
+            }
+        }
+    }
 
-    let data = "ประเทศไทย中华";
-    assert_eq!("ป", &data[0..3]);
-    assert_eq!("ร", &data[3..6]);
-    assert_eq!("", &data[3..3]);
-    assert_eq!("华", &data[30..33]);
+    // Make sure the macro can actually detect bugs,
+    // because if it can't, then what are we even doing here?
+    //
+    // (Be aware this only demonstrates the ability to detect bugs
+    //  in the FIRST method that panics, as the macro is not designed
+    //  to be used in `should_panic`)
+    #[test]
+    #[should_panic(expected = "out of bounds")]
+    fn assert_range_eq_can_fail_by_panic() {
+        assert_range_eq!("abc", 0..5, "abc");
+    }
 
-    fn a_million_letter_x() -> String {
-        let mut i = 0;
-        let mut rs = String::new();
-        while i < 100000 {
-            rs.push_str("华华华华华华华华华华");
-            i += 1;
+    // (Be aware this only demonstrates the ability to detect bugs
+    //  in the FIRST method it calls, as the macro is not designed
+    //  to be used in `should_panic`)
+    #[test]
+    #[should_panic(expected = "==")]
+    fn assert_range_eq_can_fail_by_inequality() {
+        assert_range_eq!("abc", 0..2, "abc");
+    }
+
+    // Generates test cases for bad index operations.
+    //
+    // This generates `should_panic` test cases for Index/IndexMut
+    // and `None` test cases for get/get_mut.
+    macro_rules! panic_cases {
+        ($(
+            in mod $case_name:ident {
+                data: $data:expr;
+
+                // optional:
+                //
+                // a similar input for which DATA[input] succeeds, and the corresponding
+                // output str. This helps validate "critical points" where an input range
+                // straddles the boundary between valid and invalid.
+                // (such as the input `len..len`, which is just barely valid)
+                $(
+                    good: data[$good:expr] == $output:expr;
+                )*
+
+                bad: data[$bad:expr];
+                message: $expect_msg:expr; // must be a literal
+            }
+        )*) => {$(
+            mod $case_name {
+                #[test]
+                fn pass() {
+                    let mut v: String = $data.into();
+
+                    $( assert_range_eq!(v, $good, $output); )*
+
+                    {
+                        let v: &str = &v;
+                        assert_eq!(v.get($bad), None, "(in None assertion for get)");
+                    }
+
+                    {
+                        let v: &mut str = &mut v;
+                        assert_eq!(v.get_mut($bad), None, "(in None assertion for get_mut)");
+                    }
+                }
+
+                #[test]
+                #[should_panic(expected = $expect_msg)]
+                fn index_fail() {
+                    let v: String = $data.into();
+                    let v: &str = &v;
+                    let _v = &v[$bad];
+                }
+
+                #[test]
+                #[should_panic(expected = $expect_msg)]
+                fn index_mut_fail() {
+                    let mut v: String = $data.into();
+                    let v: &mut str = &mut v;
+                    let _v = &mut v[$bad];
+                }
+            }
+        )*};
+    }
+
+    #[test]
+    fn simple_ascii() {
+        assert_range_eq!("abc", .., "abc");
+
+        assert_range_eq!("abc", 0..2, "ab");
+        assert_range_eq!("abc", 0..=1, "ab");
+        assert_range_eq!("abc", ..2, "ab");
+        assert_range_eq!("abc", ..=1, "ab");
+
+        assert_range_eq!("abc", 1..3, "bc");
+        assert_range_eq!("abc", 1..=2, "bc");
+        assert_range_eq!("abc", 1..1, "");
+        assert_range_eq!("abc", 1..=0, "");
+    }
+
+    #[test]
+    fn simple_unicode() {
+        // 日本
+        assert_range_eq!("\u{65e5}\u{672c}", .., "\u{65e5}\u{672c}");
+
+        assert_range_eq!("\u{65e5}\u{672c}", 0..3, "\u{65e5}");
+        assert_range_eq!("\u{65e5}\u{672c}", 0..=2, "\u{65e5}");
+        assert_range_eq!("\u{65e5}\u{672c}", ..3, "\u{65e5}");
+        assert_range_eq!("\u{65e5}\u{672c}", ..=2, "\u{65e5}");
+
+        assert_range_eq!("\u{65e5}\u{672c}", 3..6, "\u{672c}");
+        assert_range_eq!("\u{65e5}\u{672c}", 3..=5, "\u{672c}");
+        assert_range_eq!("\u{65e5}\u{672c}", 3.., "\u{672c}");
+
+        let data = "ประเทศไทย中华";
+        assert_range_eq!(data, 0..3, "ป");
+        assert_range_eq!(data, 3..6, "ร");
+        assert_range_eq!(data, 3..3, "");
+        assert_range_eq!(data, 30..33, "华");
+
+        /*0: 中
+          3: 华
+          6: V
+          7: i
+          8: ệ
+         11: t
+         12:
+         13: N
+         14: a
+         15: m */
+        let ss = "中华Việt Nam";
+        assert_range_eq!(ss, 3..6, "华");
+        assert_range_eq!(ss, 6..16, "Việt Nam");
+        assert_range_eq!(ss, 6..=15, "Việt Nam");
+        assert_range_eq!(ss, 6.., "Việt Nam");
+
+        assert_range_eq!(ss, 0..3, "中");
+        assert_range_eq!(ss, 3..7, "华V");
+        assert_range_eq!(ss, 3..=6, "华V");
+        assert_range_eq!(ss, 3..3, "");
+        assert_range_eq!(ss, 3..=2, "");
+    }
+
+    #[test]
+    fn simple_big() {
+        fn a_million_letter_x() -> String {
+            let mut i = 0;
+            let mut rs = String::new();
+            while i < 100000 {
+                rs.push_str("华华华华华华华华华华");
+                i += 1;
+            }
+            rs
         }
-        rs
+        fn half_a_million_letter_x() -> String {
+            let mut i = 0;
+            let mut rs = String::new();
+            while i < 100000 {
+                rs.push_str("华华华华华");
+                i += 1;
+            }
+            rs
+        }
+        let letters = a_million_letter_x();
+        assert_range_eq!(letters, 0..3 * 500000, half_a_million_letter_x());
     }
-    fn half_a_million_letter_x() -> String {
-        let mut i = 0;
-        let mut rs = String::new();
-        while i < 100000 {
-            rs.push_str("华华华华华");
-            i += 1;
+
+    #[test]
+    #[should_panic]
+    fn test_slice_fail() {
+        &"中华Việt Nam"[0..2];
+    }
+
+    panic_cases! {
+        in mod rangefrom_len {
+            data: "abcdef";
+            good: data[6..] == "";
+            bad: data[7..];
+            message: "out of bounds";
+        }
+
+        in mod rangeto_len {
+            data: "abcdef";
+            good: data[..6] == "abcdef";
+            bad: data[..7];
+            message: "out of bounds";
+        }
+
+        in mod rangetoinclusive_len {
+            data: "abcdef";
+            good: data[..=5] == "abcdef";
+            bad: data[..=6];
+            message: "out of bounds";
+        }
+
+        in mod range_len_len {
+            data: "abcdef";
+            good: data[6..6] == "";
+            bad: data[7..7];
+            message: "out of bounds";
+        }
+
+        in mod rangeinclusive_len_len {
+            data: "abcdef";
+            good: data[6..=5] == "";
+            bad: data[7..=6];
+            message: "out of bounds";
         }
-        rs
     }
-    let letters = a_million_letter_x();
-    assert_eq!(half_a_million_letter_x(), &letters[0..3 * 500000]);
-}
 
-#[test]
-fn test_slice_2() {
-    let ss = "中华Việt Nam";
+    panic_cases! {
+        in mod range_neg_width {
+            data: "abcdef";
+            good: data[4..4] == "";
+            bad: data[4..3];
+            message: "begin <= end (4 <= 3)";
+        }
 
-    assert_eq!("华", &ss[3..6]);
-    assert_eq!("Việt Nam", &ss[6..16]);
+        in mod rangeinclusive_neg_width {
+            data: "abcdef";
+            good: data[4..=3] == "";
+            bad: data[4..=2];
+            message: "begin <= end (4 <= 3)";
+        }
+    }
 
-    assert_eq!("ab", &"abc"[0..2]);
-    assert_eq!("bc", &"abc"[1..3]);
-    assert_eq!("", &"abc"[1..1]);
+    mod overflow {
+        panic_cases! {
+            in mod rangeinclusive {
+                data: "hello";
+                // note: using 0 specifically ensures that the result of overflowing is 0..0,
+                //       so that `get` doesn't simply return None for the wrong reason.
+                bad: data[0..=usize::max_value()];
+                message: "maximum usize";
+            }
 
-    assert_eq!("中", &ss[0..3]);
-    assert_eq!("华V", &ss[3..7]);
-    assert_eq!("", &ss[3..3]);
-    /*0: 中
-      3: 华
-      6: V
-      7: i
-      8: ệ
-     11: t
-     12:
-     13: N
-     14: a
-     15: m */
-}
+            in mod rangetoinclusive {
+                data: "hello";
+                bad: data[..=usize::max_value()];
+                message: "maximum usize";
+            }
+        }
+    }
 
-#[test]
-#[should_panic]
-fn test_slice_fail() {
-    &"中华Việt Nam"[0..2];
-}
+    mod boundary {
+        const DATA: &'static str = "abcαβγ";
+
+        const BAD_START: usize = 4;
+        const GOOD_START: usize = 3;
+        const BAD_END: usize = 6;
+        const GOOD_END: usize = 7;
+        const BAD_END_INCL: usize = BAD_END - 1;
+        const GOOD_END_INCL: usize = GOOD_END - 1;
+
+        // it is especially important to test all of the different range types here
+        // because some of the logic may be duplicated as part of micro-optimizations
+        // to dodge unicode boundary checks on half-ranges.
+        panic_cases! {
+            in mod range_1 {
+                data: super::DATA;
+                bad: data[super::BAD_START..super::GOOD_END];
+                message:
+                    "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
+            }
 
-#[test]
-#[should_panic]
-fn test_str_slice_rangetoinclusive_max_panics() {
-    &"hello"[..=usize::max_value()];
-}
+            in mod range_2 {
+                data: super::DATA;
+                bad: data[super::GOOD_START..super::BAD_END];
+                message:
+                    "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
+            }
 
-#[test]
-#[should_panic]
-fn test_str_slice_rangeinclusive_max_panics() {
-    &"hello"[1..=usize::max_value()];
-}
+            in mod rangefrom {
+                data: super::DATA;
+                bad: data[super::BAD_START..];
+                message:
+                    "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
+            }
 
-#[test]
-#[should_panic]
-fn test_str_slicemut_rangetoinclusive_max_panics() {
-    let mut s = "hello".to_owned();
-    let s: &mut str = &mut s;
-    &mut s[..=usize::max_value()];
-}
+            in mod rangeto {
+                data: super::DATA;
+                bad: data[..super::BAD_END];
+                message:
+                    "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
+            }
 
-#[test]
-#[should_panic]
-fn test_str_slicemut_rangeinclusive_max_panics() {
-    let mut s = "hello".to_owned();
-    let s: &mut str = &mut s;
-    &mut s[1..=usize::max_value()];
-}
+            in mod rangeinclusive_1 {
+                data: super::DATA;
+                bad: data[super::BAD_START..=super::GOOD_END_INCL];
+                message:
+                    "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
+            }
 
-#[test]
-fn test_str_get_maxinclusive() {
-    let mut s = "hello".to_owned();
-    {
-        let s: &str = &s;
-        assert_eq!(s.get(..=usize::max_value()), None);
-        assert_eq!(s.get(1..=usize::max_value()), None);
+            in mod rangeinclusive_2 {
+                data: super::DATA;
+                bad: data[super::GOOD_START..=super::BAD_END_INCL];
+                message:
+                    "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
+            }
+
+            in mod rangetoinclusive {
+                data: super::DATA;
+                bad: data[..=super::BAD_END_INCL];
+                message:
+                    "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
+            }
+        }
     }
-    {
-        let s: &mut str = &mut s;
-        assert_eq!(s.get(..=usize::max_value()), None);
-        assert_eq!(s.get(1..=usize::max_value()), None);
+
+    const LOREM_PARAGRAPH: &'static str = "\
+    Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem \
+    sit amet dolor ultricies condimentum. Praesent iaculis purus elit, ac malesuada \
+    quam malesuada in. Duis sed orci eros. Suspendisse sit amet magna mollis, mollis \
+    nunc luctus, imperdiet mi. Integer fringilla non sem ut lacinia. Fusce varius \
+    tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec tempus vel, \
+    gravida nec quam.";
+
+    // check the panic includes the prefix of the sliced string
+    #[test]
+    #[should_panic(expected="byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")]
+    fn test_slice_fail_truncated_1() {
+        &LOREM_PARAGRAPH[..1024];
+    }
+    // check the truncation in the panic message
+    #[test]
+    #[should_panic(expected="luctus, im`[...]")]
+    fn test_slice_fail_truncated_2() {
+        &LOREM_PARAGRAPH[..1024];
     }
 }
 
@@ -446,50 +711,6 @@ fn test_is_char_boundary() {
         }
     }
 }
-const LOREM_PARAGRAPH: &'static str = "\
-Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
-ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
-eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
-sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
-tempus vel, gravida nec quam.";
-
-// check the panic includes the prefix of the sliced string
-#[test]
-#[should_panic(expected="byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")]
-fn test_slice_fail_truncated_1() {
-    &LOREM_PARAGRAPH[..1024];
-}
-// check the truncation in the panic message
-#[test]
-#[should_panic(expected="luctus, im`[...]")]
-fn test_slice_fail_truncated_2() {
-    &LOREM_PARAGRAPH[..1024];
-}
-
-#[test]
-#[should_panic(expected="byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of")]
-fn test_slice_fail_boundary_1() {
-    &"abcαβγ"[4..];
-}
-
-#[test]
-#[should_panic(expected="byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of")]
-fn test_slice_fail_boundary_2() {
-    &"abcαβγ"[2..6];
-}
-
-#[test]
-fn test_slice_from() {
-    assert_eq!(&"abcd"[0..], "abcd");
-    assert_eq!(&"abcd"[2..], "cd");
-    assert_eq!(&"abcd"[4..], "");
-}
-#[test]
-fn test_slice_to() {
-    assert_eq!(&"abcd"[..0], "");
-    assert_eq!(&"abcd"[..2], "ab");
-    assert_eq!(&"abcd"[..4], "abcd");
-}
 
 #[test]
 fn test_trim_left_matches() {
author	Alex Crichton <alex@alexcrichton.com>	2018-05-10 11:35:17 -0500
committer	GitHub <noreply@github.com>	2018-05-10 11:35:17 -0500
commit	cff1a263c9e7744df286d2518e0c6ca3191dc681 (patch)
tree	2d39f779a1fba6ca894db351eac6242edae37f92 /src/liballoc
parent	ecd9898b60b601f69113c64b77650a09d7678edf (diff)
parent	f1d7b453fed6acefc68f90752922b37c6e3ac7a4 (diff)
download	rust-cff1a263c9e7744df286d2518e0c6ca3191dc681.tar.gz rust-cff1a263c9e7744df286d2518e0c6ca3191dc681.zip