diff options
Diffstat (limited to 'src/libstd/memchr.rs')
| -rw-r--r-- | src/libstd/memchr.rs | 275 |
1 files changed, 4 insertions, 271 deletions
diff --git a/src/libstd/memchr.rs b/src/libstd/memchr.rs index 03f55f7ad61..7c8c97a6caf 100644 --- a/src/libstd/memchr.rs +++ b/src/libstd/memchr.rs @@ -11,8 +11,6 @@ // Original implementation taken from rust-memchr // Copyright 2015 Andrew Gallant, bluss and Nicolas Koch - - /// A safe interface to `memchr`. /// /// Returns the index corresponding to the first occurrence of `needle` in @@ -32,32 +30,9 @@ /// let haystack = b"the quick brown fox"; /// assert_eq!(memchr(b'k', haystack), Some(8)); /// ``` +#[inline] pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> { - // libc memchr - #[cfg(not(target_os = "windows"))] - fn memchr_specific(needle: u8, haystack: &[u8]) -> Option<usize> { - use libc; - - let p = unsafe { - libc::memchr( - haystack.as_ptr() as *const libc::c_void, - needle as libc::c_int, - haystack.len() as libc::size_t) - }; - if p.is_null() { - None - } else { - Some(p as usize - (haystack.as_ptr() as usize)) - } - } - - // use fallback on windows, since it's faster - #[cfg(target_os = "windows")] - fn memchr_specific(needle: u8, haystack: &[u8]) -> Option<usize> { - fallback::memchr(needle, haystack) - } - - memchr_specific(needle, haystack) + ::sys::memchr::memchr(needle, haystack) } /// A safe interface to `memrchr`. @@ -75,251 +50,9 @@ pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> { /// let haystack = b"the quick brown fox"; /// assert_eq!(memrchr(b'o', haystack), Some(17)); /// ``` +#[inline] pub fn memrchr(needle: u8, haystack: &[u8]) -> Option<usize> { - - #[cfg(target_os = "linux")] - fn memrchr_specific(needle: u8, haystack: &[u8]) -> Option<usize> { - use libc; - - // GNU's memrchr() will - unlike memchr() - error if haystack is empty. - if haystack.is_empty() {return None} - let p = unsafe { - libc::memrchr( - haystack.as_ptr() as *const libc::c_void, - needle as libc::c_int, - haystack.len() as libc::size_t) - }; - if p.is_null() { - None - } else { - Some(p as usize - (haystack.as_ptr() as usize)) - } - } - - #[cfg(not(target_os = "linux"))] - fn memrchr_specific(needle: u8, haystack: &[u8]) -> Option<usize> { - fallback::memrchr(needle, haystack) - } - - memrchr_specific(needle, haystack) -} - -#[allow(dead_code)] -mod fallback { - use cmp; - use mem; - - const LO_U64: u64 = 0x0101010101010101; - const HI_U64: u64 = 0x8080808080808080; - - // use truncation - const LO_USIZE: usize = LO_U64 as usize; - const HI_USIZE: usize = HI_U64 as usize; - - /// Return `true` if `x` contains any zero byte. - /// - /// From *Matters Computational*, J. Arndt - /// - /// "The idea is to subtract one from each of the bytes and then look for - /// bytes where the borrow propagated all the way to the most significant - /// bit." - #[inline] - fn contains_zero_byte(x: usize) -> bool { - x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0 - } - - #[cfg(target_pointer_width = "32")] - #[inline] - fn repeat_byte(b: u8) -> usize { - let mut rep = (b as usize) << 8 | b as usize; - rep = rep << 16 | rep; - rep - } - - #[cfg(target_pointer_width = "64")] - #[inline] - fn repeat_byte(b: u8) -> usize { - let mut rep = (b as usize) << 8 | b as usize; - rep = rep << 16 | rep; - rep = rep << 32 | rep; - rep - } - - /// Return the first index matching the byte `a` in `text`. - pub fn memchr(x: u8, text: &[u8]) -> Option<usize> { - // Scan for a single byte value by reading two `usize` words at a time. - // - // Split `text` in three parts - // - unaligned initial part, before the first word aligned address in text - // - body, scan by 2 words at a time - // - the last remaining part, < 2 word size - let len = text.len(); - let ptr = text.as_ptr(); - let usize_bytes = mem::size_of::<usize>(); - - // search up to an aligned boundary - let align = (ptr as usize) & (usize_bytes- 1); - let mut offset; - if align > 0 { - offset = cmp::min(usize_bytes - align, len); - if let Some(index) = text[..offset].iter().position(|elt| *elt == x) { - return Some(index); - } - } else { - offset = 0; - } - - // search the body of the text - let repeated_x = repeat_byte(x); - - if len >= 2 * usize_bytes { - while offset <= len - 2 * usize_bytes { - unsafe { - let u = *(ptr.offset(offset as isize) as *const usize); - let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize); - - // break if there is a matching byte - let zu = contains_zero_byte(u ^ repeated_x); - let zv = contains_zero_byte(v ^ repeated_x); - if zu || zv { - break; - } - } - offset += usize_bytes * 2; - } - } - - // find the byte after the point the body loop stopped - text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i) - } - - /// Return the last index matching the byte `a` in `text`. - pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> { - // Scan for a single byte value by reading two `usize` words at a time. - // - // Split `text` in three parts - // - unaligned tail, after the last word aligned address in text - // - body, scan by 2 words at a time - // - the first remaining bytes, < 2 word size - let len = text.len(); - let ptr = text.as_ptr(); - let usize_bytes = mem::size_of::<usize>(); - - // search to an aligned boundary - let end_align = (ptr as usize + len) & (usize_bytes - 1); - let mut offset; - if end_align > 0 { - offset = if end_align >= len { 0 } else { len - end_align }; - if let Some(index) = text[offset..].iter().rposition(|elt| *elt == x) { - return Some(offset + index); - } - } else { - offset = len; - } - - // search the body of the text - let repeated_x = repeat_byte(x); - - while offset >= 2 * usize_bytes { - unsafe { - let u = *(ptr.offset(offset as isize - 2 * usize_bytes as isize) as *const usize); - let v = *(ptr.offset(offset as isize - usize_bytes as isize) as *const usize); - - // break if there is a matching byte - let zu = contains_zero_byte(u ^ repeated_x); - let zv = contains_zero_byte(v ^ repeated_x); - if zu || zv { - break; - } - } - offset -= 2 * usize_bytes; - } - - // find the byte before the point the body loop stopped - text[..offset].iter().rposition(|elt| *elt == x) - } - - // test fallback implementations on all platforms - #[test] - fn matches_one() { - assert_eq!(Some(0), memchr(b'a', b"a")); - } - - #[test] - fn matches_begin() { - assert_eq!(Some(0), memchr(b'a', b"aaaa")); - } - - #[test] - fn matches_end() { - assert_eq!(Some(4), memchr(b'z', b"aaaaz")); - } - - #[test] - fn matches_nul() { - assert_eq!(Some(4), memchr(b'\x00', b"aaaa\x00")); - } - - #[test] - fn matches_past_nul() { - assert_eq!(Some(5), memchr(b'z', b"aaaa\x00z")); - } - - #[test] - fn no_match_empty() { - assert_eq!(None, memchr(b'a', b"")); - } - - #[test] - fn no_match() { - assert_eq!(None, memchr(b'a', b"xyz")); - } - - #[test] - fn matches_one_reversed() { - assert_eq!(Some(0), memrchr(b'a', b"a")); - } - - #[test] - fn matches_begin_reversed() { - assert_eq!(Some(3), memrchr(b'a', b"aaaa")); - } - - #[test] - fn matches_end_reversed() { - assert_eq!(Some(0), memrchr(b'z', b"zaaaa")); - } - - #[test] - fn matches_nul_reversed() { - assert_eq!(Some(4), memrchr(b'\x00', b"aaaa\x00")); - } - - #[test] - fn matches_past_nul_reversed() { - assert_eq!(Some(0), memrchr(b'z', b"z\x00aaaa")); - } - - #[test] - fn no_match_empty_reversed() { - assert_eq!(None, memrchr(b'a', b"")); - } - - #[test] - fn no_match_reversed() { - assert_eq!(None, memrchr(b'a', b"xyz")); - } - - #[test] - fn each_alignment_reversed() { - let mut data = [1u8; 64]; - let needle = 2; - let pos = 40; - data[pos] = needle; - for start in 0..16 { - assert_eq!(Some(pos - start), memrchr(needle, &data[start..])); - } - } + ::sys::memchr::memrchr(needle, haystack) } #[cfg(test)] |
