about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2017-08-06 08:09:59 +0000
committerbors <bors@rust-lang.org>2017-08-06 08:09:59 +0000
commita9c24fd579cfa08852dca94214caae4b7e6b91c1 (patch)
tree32126d8fc5afedad4033af072b1df6d5f0e3ae3b /src
parentdd1df35f87beca7b1768b3e8fc3569dd8f9009e5 (diff)
parent11d6312abd614fca3970902f137225e0437d0a09 (diff)
downloadrust-a9c24fd579cfa08852dca94214caae4b7e6b91c1.tar.gz
rust-a9c24fd579cfa08852dca94214caae4b7e6b91c1.zip
Auto merge of #43488 - Florob:repeat-opt, r=arielb1
Optimize initialization of arrays using repeat expressions

This PR was inspired by [this thread](https://www.reddit.com/r/rust/comments/6o8ok9/understanding_rust_performances_a_newbie_question/) on Reddit.
It tries to bring array initialization in the same ballpark as `Vec::from_elem()` for unoptimized builds.
For optimized builds this should relieve LLVM of having to figure out the construct we generate is in fact a `memset()`.

To that end this emits `llvm.memset()` when:
* the array is of integer type and all elements are zero (`Vec::from_elem()` also explicitly optimizes for this case)
* the array elements are byte sized

If the array is zero-sized initialization is omitted entirely.
Diffstat (limited to 'src')
-rw-r--r--src/librustc_trans/common.rs2
-rw-r--r--src/librustc_trans/mir/rvalue.rs34
-rw-r--r--src/librustc_trans/tvec.rs2
-rw-r--r--src/test/codegen/slice-init.rs74
4 files changed, 108 insertions, 4 deletions
diff --git a/src/librustc_trans/common.rs b/src/librustc_trans/common.rs
index 61766a3db2c..0cc499577bb 100644
--- a/src/librustc_trans/common.rs
+++ b/src/librustc_trans/common.rs
@@ -366,7 +366,7 @@ pub fn const_to_uint(v: ValueRef) -> u64 {
     }
 }
 
-fn is_const_integral(v: ValueRef) -> bool {
+pub fn is_const_integral(v: ValueRef) -> bool {
     unsafe {
         !llvm::LLVMIsAConstantInt(v).is_null()
     }
diff --git a/src/librustc_trans/mir/rvalue.rs b/src/librustc_trans/mir/rvalue.rs
index 4bd5091a4f3..a23e1a0684b 100644
--- a/src/librustc_trans/mir/rvalue.rs
+++ b/src/librustc_trans/mir/rvalue.rs
@@ -20,7 +20,7 @@ use base;
 use builder::Builder;
 use callee;
 use common::{self, val_ty, C_bool, C_null, C_uint};
-use common::{C_integral};
+use common::{C_integral, C_i32};
 use adt;
 use machine;
 use monomorphize;
@@ -93,12 +93,42 @@ impl<'a, 'tcx> MirContext<'a, 'tcx> {
             }
 
             mir::Rvalue::Repeat(ref elem, ref count) => {
+                let dest_ty = dest.ty.to_ty(bcx.tcx());
+
+                // No need to inizialize memory of a zero-sized slice
+                if common::type_is_zero_size(bcx.ccx, dest_ty) {
+                    return bcx;
+                }
+
                 let tr_elem = self.trans_operand(&bcx, elem);
                 let size = count.as_u64(bcx.tcx().sess.target.uint_type);
                 let size = C_uint(bcx.ccx, size);
                 let base = base::get_dataptr(&bcx, dest.llval);
+                let align = dest.alignment.to_align();
+
+                if let OperandValue::Immediate(v) = tr_elem.val {
+                    // Use llvm.memset.p0i8.* to initialize all zero arrays
+                    if common::is_const_integral(v) && common::const_to_uint(v) == 0 {
+                        let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty));
+                        let align = C_i32(bcx.ccx, align as i32);
+                        let ty = type_of::type_of(bcx.ccx, dest_ty);
+                        let size = machine::llsize_of(bcx.ccx, ty);
+                        let fill = C_integral(Type::i8(bcx.ccx), 0, false);
+                        base::call_memset(&bcx, base, fill, size, align, false);
+                        return bcx;
+                    }
+
+                    // Use llvm.memset.p0i8.* to initialize byte arrays
+                    if common::val_ty(v) == Type::i8(bcx.ccx) {
+                        let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty));
+                        let align = C_i32(bcx.ccx, align as i32);
+                        base::call_memset(&bcx, base, v, size, align, false);
+                        return bcx;
+                    }
+                }
+
                 tvec::slice_for_each(&bcx, base, tr_elem.ty, size, |bcx, llslot, loop_bb| {
-                    self.store_operand(bcx, llslot, dest.alignment.to_align(), tr_elem);
+                    self.store_operand(bcx, llslot, align, tr_elem);
                     bcx.br(loop_bb);
                 })
             }
diff --git a/src/librustc_trans/tvec.rs b/src/librustc_trans/tvec.rs
index 4216a73a8dd..de4d217c735 100644
--- a/src/librustc_trans/tvec.rs
+++ b/src/librustc_trans/tvec.rs
@@ -30,8 +30,8 @@ pub fn slice_for_each<'a, 'tcx, F>(
     };
 
     let body_bcx = bcx.build_sibling_block("slice_loop_body");
-    let next_bcx = bcx.build_sibling_block("slice_loop_next");
     let header_bcx = bcx.build_sibling_block("slice_loop_header");
+    let next_bcx = bcx.build_sibling_block("slice_loop_next");
 
     let start = if zst {
         C_uint(bcx.ccx, 0usize)
diff --git a/src/test/codegen/slice-init.rs b/src/test/codegen/slice-init.rs
new file mode 100644
index 00000000000..569d937c812
--- /dev/null
+++ b/src/test/codegen/slice-init.rs
@@ -0,0 +1,74 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// compile-flags: -C no-prepopulate-passes
+
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @zero_sized_elem
+#[no_mangle]
+pub fn zero_sized_elem() {
+    // CHECK-NOT: br label %slice_loop_header{{.*}}
+    // CHECK-NOT: call void @llvm.memset.p0i8
+    let x = [(); 4];
+    drop(&x);
+}
+
+// CHECK-LABEL: @zero_len_array
+#[no_mangle]
+pub fn zero_len_array() {
+    // CHECK-NOT: br label %slice_loop_header{{.*}}
+    // CHECK-NOT: call void @llvm.memset.p0i8
+    let x = [4; 0];
+    drop(&x);
+}
+
+// CHECK-LABEL: @byte_array
+#[no_mangle]
+pub fn byte_array() {
+    // CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 7, i[[WIDTH]] 4
+    // CHECK-NOT: br label %slice_loop_header{{.*}}
+    let x = [7u8; 4];
+    drop(&x);
+}
+
+#[allow(dead_code)]
+#[derive(Copy, Clone)]
+enum Init {
+    Loop,
+    Memset,
+}
+
+// CHECK-LABEL: @byte_enum_array
+#[no_mangle]
+pub fn byte_enum_array() {
+    // CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 {{.*}}, i[[WIDTH]] 4
+    // CHECK-NOT: br label %slice_loop_header{{.*}}
+    let x = [Init::Memset; 4];
+    drop(&x);
+}
+
+// CHECK-LABEL: @zeroed_integer_array
+#[no_mangle]
+pub fn zeroed_integer_array() {
+    // CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 0, i[[WIDTH]] 16
+    // CHECK-NOT: br label %slice_loop_header{{.*}}
+    let x = [0u32; 4];
+    drop(&x);
+}
+
+// CHECK-LABEL: @nonzero_integer_array
+#[no_mangle]
+pub fn nonzero_integer_array() {
+    // CHECK: br label %slice_loop_header{{.*}}
+    // CHECK-NOT: call void @llvm.memset.p0i8
+    let x = [0x1a_2b_3c_4d_u32; 4];
+    drop(&x);
+}