diff options
| author | Patrick Walton <pcwalton@mimiga.net> | 2012-08-26 10:50:06 -0700 |
|---|---|---|
| committer | Patrick Walton <pcwalton@mimiga.net> | 2012-08-26 10:50:06 -0700 |
| commit | ff9151fa55e4e81c0cbaa7181eb672b2df6b53f6 (patch) | |
| tree | d783b788c62c86cc7f326413950c5ce5fba45c75 /src | |
| parent | db71ff3eb61bbce3123f62b274100dadf5ca99a6 (diff) | |
| download | rust-ff9151fa55e4e81c0cbaa7181eb672b2df6b53f6.tar.gz rust-ff9151fa55e4e81c0cbaa7181eb672b2df6b53f6.zip | |
rustc: Use memset when zeroing allocas out (issue #3025).
Previously, LLVM was generating a ton of byte-by-byte copies, leading to huge numbers of vregs and bloating the code. Now, using memset, the code becomes a nice series of SSE moves instead.
Diffstat (limited to 'src')
| -rw-r--r-- | src/rustc/middle/trans/base.rs | 32 |
1 files changed, 30 insertions, 2 deletions
diff --git a/src/rustc/middle/trans/base.rs b/src/rustc/middle/trans/base.rs index a254876f697..24aa8f1dec0 100644 --- a/src/rustc/middle/trans/base.rs +++ b/src/rustc/middle/trans/base.rs @@ -278,7 +278,7 @@ fn alloca_maybe_zeroed(cx: block, t: TypeRef, zero: bool) -> ValueRef { if cx.unreachable { return llvm::LLVMGetUndef(t); } let initcx = raw_block(cx.fcx, false, cx.fcx.llstaticallocas); let p = Alloca(initcx, t); - if zero { Store(initcx, C_null(t), p); } + if zero { memzero(initcx, p, t); } return p; } @@ -287,10 +287,38 @@ fn zero_mem(cx: block, llptr: ValueRef, t: ty::t) -> block { let bcx = cx; let ccx = cx.ccx(); let llty = type_of(ccx, t); - Store(bcx, C_null(llty), llptr); + memzero(bcx, llptr, llty); return bcx; } +// Always use this function instead of storing a zero constant to the memory +// in question. If you store a zero constant, LLVM will drown in vreg +// allocation for large data structures, and the generated code will be +// awful. (A telltale sign of this is large quantities of +// `mov [byte ptr foo],0` in the generated code.) +fn memzero(cx: block, llptr: ValueRef, llty: TypeRef) { + let _icx = cx.insn_ctxt("memzero"); + let ccx = cx.ccx(); + + let intrinsic_key; + match ccx.sess.targ_cfg.arch { + session::arch_x86 | session::arch_arm => { + intrinsic_key = ~"llvm.memset.p0i8.i32"; + } + session::arch_x86_64 => { + intrinsic_key = ~"llvm.memset.p0i8.i64"; + } + } + + let llintrinsicfn = ccx.intrinsics.get(intrinsic_key); + let llptr = PointerCast(cx, llptr, T_ptr(T_i8())); + let llzeroval = C_u8(0); + let size = IntCast(cx, llsize_of(ccx, llty), ccx.int_type); + let align = C_i32(1i32); + let volatile = C_bool(false); + Call(cx, llintrinsicfn, ~[llptr, llzeroval, size, align, volatile]); +} + fn arrayalloca(cx: block, t: TypeRef, v: ValueRef) -> ValueRef { let _icx = cx.insn_ctxt("arrayalloca"); if cx.unreachable { return llvm::LLVMGetUndef(t); } |
