about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2017-04-16 19:13:52 +0000
committerbors <bors@rust-lang.org>2017-04-16 19:13:52 +0000
commit7627e3d31dd641ae9042675e9032857f58d0c5d1 (patch)
treea9e6a160ae903447feee5c907ac67b1f2281750e
parent9af79f52d0ab43e318bdf3aa73d939c4482cc946 (diff)
parentaad2062073f46f28c6d1269463cc6c19df1e0199 (diff)
downloadrust-7627e3d31dd641ae9042675e9032857f58d0c5d1.tar.gz
rust-7627e3d31dd641ae9042675e9032857f58d0c5d1.zip
Auto merge of #40409 - mbrubeck:calloc, r=sfackler
Specialize Vec::from_elem to use calloc

Fixes #38723.  This specializes the implementation for `u8` only, but it could be extended to other zeroable types if desired.

I haven't tested this extensively, but I did verify that it gives the expected performance boost for large `vec![0; n]` allocations with both alloc_system and jemalloc, on Linux.  (I have not tested or even built the Windows code.)
-rw-r--r--src/doc/unstable-book/src/allocator.md5
-rw-r--r--src/liballoc/heap.rs34
-rw-r--r--src/liballoc/raw_vec.rs17
-rw-r--r--src/liballoc_jemalloc/lib.rs21
-rw-r--r--src/liballoc_system/lib.rs34
-rw-r--r--src/libcollections/lib.rs2
-rw-r--r--src/libcollections/vec.rs72
-rw-r--r--src/test/run-pass/auxiliary/allocator-dummy.rs5
8 files changed, 183 insertions, 7 deletions
diff --git a/src/doc/unstable-book/src/allocator.md b/src/doc/unstable-book/src/allocator.md
index 7261641698f..cfcf8e22d70 100644
--- a/src/doc/unstable-book/src/allocator.md
+++ b/src/doc/unstable-book/src/allocator.md
@@ -52,6 +52,11 @@ pub extern fn __rust_allocate(size: usize, _align: usize) -> *mut u8 {
 }
 
 #[no_mangle]
+pub extern fn __rust_allocate_zeroed(size: usize, _align: usize) -> *mut u8 {
+    unsafe { libc::calloc(size as libc::size_t, 1) as *mut u8 }
+}
+
+#[no_mangle]
 pub extern fn __rust_deallocate(ptr: *mut u8, _old_size: usize, _align: usize) {
     unsafe { libc::free(ptr as *mut libc::c_void) }
 }
diff --git a/src/liballoc/heap.rs b/src/liballoc/heap.rs
index 51e6f2f8bd7..08a0b2a6d00 100644
--- a/src/liballoc/heap.rs
+++ b/src/liballoc/heap.rs
@@ -23,6 +23,7 @@ use core::intrinsics::{min_align_of_val, size_of_val};
 extern "C" {
     #[allocator]
     fn __rust_allocate(size: usize, align: usize) -> *mut u8;
+    fn __rust_allocate_zeroed(size: usize, align: usize) -> *mut u8;
     fn __rust_deallocate(ptr: *mut u8, old_size: usize, align: usize);
     fn __rust_reallocate(ptr: *mut u8, old_size: usize, size: usize, align: usize) -> *mut u8;
     fn __rust_reallocate_inplace(ptr: *mut u8,
@@ -59,6 +60,20 @@ pub unsafe fn allocate(size: usize, align: usize) -> *mut u8 {
     __rust_allocate(size, align)
 }
 
+/// Return a pointer to `size` bytes of memory aligned to `align` and
+/// initialized to zeroes.
+///
+/// On failure, return a null pointer.
+///
+/// Behavior is undefined if the requested size is 0 or the alignment is not a
+/// power of 2. The alignment must be no larger than the largest supported page
+/// size on the platform.
+#[inline]
+pub unsafe fn allocate_zeroed(size: usize, align: usize) -> *mut u8 {
+    check_size_and_alignment(size, align);
+    __rust_allocate_zeroed(size, align)
+}
+
 /// Resize the allocation referenced by `ptr` to `size` bytes.
 ///
 /// On failure, return a null pointer and leave the original allocation intact.
@@ -163,6 +178,25 @@ mod tests {
     use heap;
 
     #[test]
+    fn allocate_zeroed() {
+        unsafe {
+            let size = 1024;
+            let ptr = heap::allocate_zeroed(size, 1);
+            if ptr.is_null() {
+                ::oom()
+            }
+
+            let end = ptr.offset(size as isize);
+            let mut i = ptr;
+            while i < end {
+                assert_eq!(*i, 0);
+                i = i.offset(1);
+            }
+            heap::deallocate(ptr, size, 1);
+        }
+    }
+
+    #[test]
     fn basic_reallocate_inplace_noop() {
         unsafe {
             let size = 4000;
diff --git a/src/liballoc/raw_vec.rs b/src/liballoc/raw_vec.rs
index 357a2724e00..6a53d3a9ca5 100644
--- a/src/liballoc/raw_vec.rs
+++ b/src/liballoc/raw_vec.rs
@@ -81,7 +81,18 @@ impl<T> RawVec<T> {
     /// # Aborts
     ///
     /// Aborts on OOM
+    #[inline]
     pub fn with_capacity(cap: usize) -> Self {
+        RawVec::allocate(cap, false)
+    }
+
+    /// Like `with_capacity` but guarantees the buffer is zeroed.
+    #[inline]
+    pub fn with_capacity_zeroed(cap: usize) -> Self {
+        RawVec::allocate(cap, true)
+    }
+
+    fn allocate(cap: usize, zeroed: bool) -> Self {
         unsafe {
             let elem_size = mem::size_of::<T>();
 
@@ -93,7 +104,11 @@ impl<T> RawVec<T> {
                 heap::EMPTY as *mut u8
             } else {
                 let align = mem::align_of::<T>();
-                let ptr = heap::allocate(alloc_size, align);
+                let ptr = if zeroed {
+                    heap::allocate_zeroed(alloc_size, align)
+                } else {
+                    heap::allocate(alloc_size, align)
+                };
                 if ptr.is_null() {
                     oom()
                 }
diff --git a/src/liballoc_jemalloc/lib.rs b/src/liballoc_jemalloc/lib.rs
index 83cc1ef09c2..288531cb5b2 100644
--- a/src/liballoc_jemalloc/lib.rs
+++ b/src/liballoc_jemalloc/lib.rs
@@ -40,6 +40,10 @@ mod imp {
         fn mallocx(size: size_t, flags: c_int) -> *mut c_void;
         #[cfg_attr(any(target_os = "macos", target_os = "android", target_os = "ios",
                        target_os = "dragonfly", target_os = "windows", target_env = "musl"),
+                   link_name = "je_calloc")]
+        fn calloc(size: size_t, flags: c_int) -> *mut c_void;
+        #[cfg_attr(any(target_os = "macos", target_os = "android", target_os = "ios",
+                       target_os = "dragonfly", target_os = "windows", target_env = "musl"),
                    link_name = "je_rallocx")]
         fn rallocx(ptr: *mut c_void, size: size_t, flags: c_int) -> *mut c_void;
         #[cfg_attr(any(target_os = "macos", target_os = "android", target_os = "ios",
@@ -56,6 +60,8 @@ mod imp {
         fn nallocx(size: size_t, flags: c_int) -> size_t;
     }
 
+    const MALLOCX_ZERO: c_int = 0x40;
+
     // The minimum alignment guaranteed by the architecture. This value is used to
     // add fast paths for low alignment values. In practice, the alignment is a
     // constant at the call site and the branch will be optimized out.
@@ -92,6 +98,16 @@ mod imp {
     }
 
     #[no_mangle]
+    pub extern "C" fn __rust_allocate_zeroed(size: usize, align: usize) -> *mut u8 {
+        if align <= MIN_ALIGN {
+            unsafe { calloc(size as size_t, 1) as *mut u8 }
+        } else {
+            let flags = align_to_flags(align) | MALLOCX_ZERO;
+            unsafe { mallocx(size as size_t, flags) as *mut u8 }
+        }
+    }
+
+    #[no_mangle]
     pub extern "C" fn __rust_reallocate(ptr: *mut u8,
                                         _old_size: usize,
                                         size: usize,
@@ -136,6 +152,11 @@ mod imp {
     }
 
     #[no_mangle]
+    pub extern "C" fn __rust_allocate_zeroed(_size: usize, _align: usize) -> *mut u8 {
+        bogus()
+    }
+
+    #[no_mangle]
     pub extern "C" fn __rust_reallocate(_ptr: *mut u8,
                                         _old_size: usize,
                                         _size: usize,
diff --git a/src/liballoc_system/lib.rs b/src/liballoc_system/lib.rs
index de2b75f62b6..6d47c2ff28f 100644
--- a/src/liballoc_system/lib.rs
+++ b/src/liballoc_system/lib.rs
@@ -45,6 +45,11 @@ pub extern "C" fn __rust_allocate(size: usize, align: usize) -> *mut u8 {
 }
 
 #[no_mangle]
+pub extern "C" fn __rust_allocate_zeroed(size: usize, align: usize) -> *mut u8 {
+    unsafe { imp::allocate_zeroed(size, align) }
+}
+
+#[no_mangle]
 pub extern "C" fn __rust_deallocate(ptr: *mut u8, old_size: usize, align: usize) {
     unsafe { imp::deallocate(ptr, old_size, align) }
 }
@@ -121,6 +126,18 @@ mod imp {
         }
     }
 
+    pub unsafe fn allocate_zeroed(size: usize, align: usize) -> *mut u8 {
+        if align <= MIN_ALIGN {
+            libc::calloc(size as libc::size_t, 1) as *mut u8
+        } else {
+            let ptr = aligned_malloc(size, align);
+            if !ptr.is_null() {
+                ptr::write_bytes(ptr, 0, size);
+            }
+            ptr
+        }
+    }
+
     pub unsafe fn reallocate(ptr: *mut u8, old_size: usize, size: usize, align: usize) -> *mut u8 {
         if align <= MIN_ALIGN {
             libc::realloc(ptr as *mut libc::c_void, size as libc::size_t) as *mut u8
@@ -173,6 +190,8 @@ mod imp {
     #[repr(C)]
     struct Header(*mut u8);
 
+
+    const HEAP_ZERO_MEMORY: DWORD = 0x00000008;
     const HEAP_REALLOC_IN_PLACE_ONLY: DWORD = 0x00000010;
 
     unsafe fn get_header<'a>(ptr: *mut u8) -> &'a mut Header {
@@ -185,11 +204,12 @@ mod imp {
         aligned
     }
 
-    pub unsafe fn allocate(size: usize, align: usize) -> *mut u8 {
+    #[inline]
+    unsafe fn allocate_with_flags(size: usize, align: usize, flags: DWORD) -> *mut u8 {
         if align <= MIN_ALIGN {
-            HeapAlloc(GetProcessHeap(), 0, size as SIZE_T) as *mut u8
+            HeapAlloc(GetProcessHeap(), flags, size as SIZE_T) as *mut u8
         } else {
-            let ptr = HeapAlloc(GetProcessHeap(), 0, (size + align) as SIZE_T) as *mut u8;
+            let ptr = HeapAlloc(GetProcessHeap(), flags, (size + align) as SIZE_T) as *mut u8;
             if ptr.is_null() {
                 return ptr;
             }
@@ -197,6 +217,14 @@ mod imp {
         }
     }
 
+    pub unsafe fn allocate(size: usize, align: usize) -> *mut u8 {
+        allocate_with_flags(size, align, 0)
+    }
+
+    pub unsafe fn allocate_zeroed(size: usize, align: usize) -> *mut u8 {
+        allocate_with_flags(size, align, HEAP_ZERO_MEMORY)
+    }
+
     pub unsafe fn reallocate(ptr: *mut u8, _old_size: usize, size: usize, align: usize) -> *mut u8 {
         if align <= MIN_ALIGN {
             HeapReAlloc(GetProcessHeap(), 0, ptr as LPVOID, size as SIZE_T) as *mut u8
diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs
index a207087915a..3bea61f6220 100644
--- a/src/libcollections/lib.rs
+++ b/src/libcollections/lib.rs
@@ -35,6 +35,7 @@
 #![feature(box_patterns)]
 #![feature(box_syntax)]
 #![cfg_attr(not(test), feature(char_escape_debug))]
+#![cfg_attr(not(test), feature(core_float))]
 #![feature(core_intrinsics)]
 #![feature(dropck_eyepatch)]
 #![feature(exact_size_is_empty)]
@@ -42,6 +43,7 @@
 #![feature(fused)]
 #![feature(generic_param_attrs)]
 #![feature(heap_api)]
+#![feature(i128_type)]
 #![feature(inclusive_range)]
 #![feature(lang_items)]
 #![feature(manually_drop)]
diff --git a/src/libcollections/vec.rs b/src/libcollections/vec.rs
index 8824185d280..a3c529f3585 100644
--- a/src/libcollections/vec.rs
+++ b/src/libcollections/vec.rs
@@ -77,6 +77,8 @@ use core::hash::{self, Hash};
 use core::intrinsics::{arith_offset, assume};
 use core::iter::{FromIterator, FusedIterator, TrustedLen};
 use core::mem;
+#[cfg(not(test))]
+use core::num::Float;
 use core::ops::{InPlace, Index, IndexMut, Place, Placer};
 use core::ops;
 use core::ptr;
@@ -1370,11 +1372,75 @@ impl<T: PartialEq> Vec<T> {
 #[doc(hidden)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub fn from_elem<T: Clone>(elem: T, n: usize) -> Vec<T> {
-    let mut v = Vec::with_capacity(n);
-    v.extend_with_element(n, elem);
-    v
+    <T as SpecFromElem>::from_elem(elem, n)
+}
+
+// Specialization trait used for Vec::from_elem
+trait SpecFromElem: Sized {
+    fn from_elem(elem: Self, n: usize) -> Vec<Self>;
 }
 
+impl<T: Clone> SpecFromElem for T {
+    default fn from_elem(elem: Self, n: usize) -> Vec<Self> {
+        let mut v = Vec::with_capacity(n);
+        v.extend_with_element(n, elem);
+        v
+    }
+}
+
+impl SpecFromElem for u8 {
+    #[inline]
+    fn from_elem(elem: u8, n: usize) -> Vec<u8> {
+        if elem == 0 {
+            return Vec {
+                buf: RawVec::with_capacity_zeroed(n),
+                len: n,
+            }
+        }
+        unsafe {
+            let mut v = Vec::with_capacity(n);
+            ptr::write_bytes(v.as_mut_ptr(), elem, n);
+            v.set_len(n);
+            v
+        }
+    }
+}
+
+macro_rules! impl_spec_from_elem {
+    ($t: ty, $is_zero: expr) => {
+        impl SpecFromElem for $t {
+            #[inline]
+            fn from_elem(elem: $t, n: usize) -> Vec<$t> {
+                if $is_zero(elem) {
+                    return Vec {
+                        buf: RawVec::with_capacity_zeroed(n),
+                        len: n,
+                    }
+                }
+                let mut v = Vec::with_capacity(n);
+                v.extend_with_element(n, elem);
+                v
+            }
+        }
+    };
+}
+
+impl_spec_from_elem!(i8, |x| x == 0);
+impl_spec_from_elem!(i16, |x| x == 0);
+impl_spec_from_elem!(i32, |x| x == 0);
+impl_spec_from_elem!(i64, |x| x == 0);
+impl_spec_from_elem!(i128, |x| x == 0);
+impl_spec_from_elem!(isize, |x| x == 0);
+
+impl_spec_from_elem!(u16, |x| x == 0);
+impl_spec_from_elem!(u32, |x| x == 0);
+impl_spec_from_elem!(u64, |x| x == 0);
+impl_spec_from_elem!(u128, |x| x == 0);
+impl_spec_from_elem!(usize, |x| x == 0);
+
+impl_spec_from_elem!(f32, |x: f32| x == 0. && x.is_sign_positive());
+impl_spec_from_elem!(f64, |x: f64| x == 0. && x.is_sign_positive());
+
 ////////////////////////////////////////////////////////////////////////////////
 // Common trait implementations for Vec
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/src/test/run-pass/auxiliary/allocator-dummy.rs b/src/test/run-pass/auxiliary/allocator-dummy.rs
index a1d21db8f4d..1133ace275b 100644
--- a/src/test/run-pass/auxiliary/allocator-dummy.rs
+++ b/src/test/run-pass/auxiliary/allocator-dummy.rs
@@ -28,6 +28,11 @@ pub extern fn __rust_allocate(size: usize, align: usize) -> *mut u8 {
 }
 
 #[no_mangle]
+pub extern fn __rust_allocate_zeroed(size: usize, _align: usize) -> *mut u8 {
+    unsafe { libc::calloc(size as libc::size_t, 1) as *mut u8 }
+}
+
+#[no_mangle]
 pub extern fn __rust_deallocate(ptr: *mut u8, old_size: usize, align: usize) {
     unsafe {
         HITS += 1;