about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorPatrick Walton <pcwalton@mimiga.net>2012-08-26 10:50:06 -0700
committerPatrick Walton <pcwalton@mimiga.net>2012-08-26 10:50:06 -0700
commitff9151fa55e4e81c0cbaa7181eb672b2df6b53f6 (patch)
treed783b788c62c86cc7f326413950c5ce5fba45c75 /src
parentdb71ff3eb61bbce3123f62b274100dadf5ca99a6 (diff)
downloadrust-ff9151fa55e4e81c0cbaa7181eb672b2df6b53f6.tar.gz
rust-ff9151fa55e4e81c0cbaa7181eb672b2df6b53f6.zip
rustc: Use memset when zeroing allocas out (issue #3025).
Previously, LLVM was generating a ton of byte-by-byte copies,
leading to huge numbers of vregs and bloating the code. Now, using
memset, the code becomes a nice series of SSE moves instead.
Diffstat (limited to 'src')
-rw-r--r--src/rustc/middle/trans/base.rs32
1 files changed, 30 insertions, 2 deletions
diff --git a/src/rustc/middle/trans/base.rs b/src/rustc/middle/trans/base.rs
index a254876f697..24aa8f1dec0 100644
--- a/src/rustc/middle/trans/base.rs
+++ b/src/rustc/middle/trans/base.rs
@@ -278,7 +278,7 @@ fn alloca_maybe_zeroed(cx: block, t: TypeRef, zero: bool) -> ValueRef {
     if cx.unreachable { return llvm::LLVMGetUndef(t); }
     let initcx = raw_block(cx.fcx, false, cx.fcx.llstaticallocas);
     let p = Alloca(initcx, t);
-    if zero { Store(initcx, C_null(t), p); }
+    if zero { memzero(initcx, p, t); }
     return p;
 }
 
@@ -287,10 +287,38 @@ fn zero_mem(cx: block, llptr: ValueRef, t: ty::t) -> block {
     let bcx = cx;
     let ccx = cx.ccx();
     let llty = type_of(ccx, t);
-    Store(bcx, C_null(llty), llptr);
+    memzero(bcx, llptr, llty);
     return bcx;
 }
 
+// Always use this function instead of storing a zero constant to the memory
+// in question. If you store a zero constant, LLVM will drown in vreg
+// allocation for large data structures, and the generated code will be
+// awful. (A telltale sign of this is large quantities of
+// `mov [byte ptr foo],0` in the generated code.)
+fn memzero(cx: block, llptr: ValueRef, llty: TypeRef) {
+    let _icx = cx.insn_ctxt("memzero");
+    let ccx = cx.ccx();
+
+    let intrinsic_key;
+    match ccx.sess.targ_cfg.arch {
+        session::arch_x86 | session::arch_arm => {
+            intrinsic_key = ~"llvm.memset.p0i8.i32";
+        }
+        session::arch_x86_64 => {
+            intrinsic_key = ~"llvm.memset.p0i8.i64";
+        }
+    }
+
+    let llintrinsicfn = ccx.intrinsics.get(intrinsic_key);
+    let llptr = PointerCast(cx, llptr, T_ptr(T_i8()));
+    let llzeroval = C_u8(0);
+    let size = IntCast(cx, llsize_of(ccx, llty), ccx.int_type);
+    let align = C_i32(1i32);
+    let volatile = C_bool(false);
+    Call(cx, llintrinsicfn, ~[llptr, llzeroval, size, align, volatile]);
+}
+
 fn arrayalloca(cx: block, t: TypeRef, v: ValueRef) -> ValueRef {
     let _icx = cx.insn_ctxt("arrayalloca");
     if cx.unreachable { return llvm::LLVMGetUndef(t); }