about summary refs log tree commit diff
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2013-05-27 15:56:08 -0700
committerbors <bors@rust-lang.org>2013-05-27 15:56:08 -0700
commitdbc57584bd4e87f0bd3eed6bced8bbd04d99edcf (patch)
tree834f7712e7345b319c0662b0488bc25ee53b8654
parent5d04ee805b96d34e7c5b316270a730fd9a0c537f (diff)
parente6c04dea0325af808198306e283c17f90d31fc26 (diff)
downloadrust-dbc57584bd4e87f0bd3eed6bced8bbd04d99edcf.tar.gz
rust-dbc57584bd4e87f0bd3eed6bced8bbd04d99edcf.zip
auto merge of #6724 : thestinger/rust/swap_fast, r=thestinger
Passing higher alignment values gives the optimization passes more freedom since it can copy in larger chunks. This change results in rustc outputting the same post-optimization IR as clang for swaps and most copies excluding the lack of information about padding.

Code snippet:

```rust
#[inline(never)]
fn swap<T>(x: &mut T, y: &mut T) {
    util::swap(x, y);
}
```

Original IR (for `int`):

```llvm
define internal fastcc void @_ZN9swap_283417_a71830ca3ed2d65d3_00E(i64*, i64*) #1 {
static_allocas:
  %2 = icmp eq i64* %0, %1
  br i1 %2, label %_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit, label %3

; <label>:3                                       ; preds = %static_allocas
  %4 = load i64* %0, align 1
  %5 = load i64* %1, align 1
  store i64 %5, i64* %0, align 1
  store i64 %4, i64* %1, align 1
  br label %_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit

_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit:  ; preds = %3, %static_allocas
  ret void
}
```

After #6710:

```llvm
define internal fastcc void @_ZN9swap_283017_a71830ca3ed2d65d3_00E(i64* nocapture, i64* nocapture) #1 {
static_allocas:
  %2 = load i64* %0, align 1
  %3 = load i64* %1, align 1
  store i64 %3, i64* %0, align 1
  store i64 %2, i64* %1, align 1
  ret void
}
```

After this change:

```llvm
define internal fastcc void @_ZN9swap_283017_a71830ca3ed2d65d3_00E(i64* nocapture, i64* nocapture) #1 {
static_allocas:
  %2 = load i64* %0, align 8
  %3 = load i64* %1, align 8
  store i64 %3, i64* %0, align 8
  store i64 %2, i64* %1, align 8
  ret void
}
```

Another example:

```rust
#[inline(never)]
fn set<T>(x: &mut T, y: T) {
    *x = y;
}
```

Before, with `(int, int)` (align 1):

```llvm
define internal fastcc void @_ZN8set_282517_8fa972e3f9e451983_00E({ i64, i64 }* nocapture, { i64, i64 }* nocapture) #1 {
static_allocas:
  %2 = bitcast { i64, i64 }* %1 to i8*
  %3 = bitcast { i64, i64 }* %0 to i8*
  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 1, i1 false)
  ret void
}
```

After, with `(int, int)` (align 8):

```llvm
define internal fastcc void @_ZN8set_282617_8fa972e3f9e451983_00E({ i64, i64 }* nocapture, { i64, i64 }* nocapture) #1 {
static_allocas:
  %2 = bitcast { i64, i64 }* %1 to i8*
  %3 = bitcast { i64, i64 }* %0 to i8*
  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 8, i1 false)
  ret void
}
```
-rw-r--r--src/librustc/middle/trans/base.rs17
-rw-r--r--src/librustc/middle/trans/closure.rs2
-rw-r--r--src/librustc/middle/trans/foreign.rs96
-rw-r--r--src/librustc/middle/trans/tvec.rs4
-rw-r--r--src/librustc/middle/trans/type_use.rs3
-rw-r--r--src/librustc/middle/typeck/check/mod.rs48
-rw-r--r--src/libstd/cast.rs24
-rw-r--r--src/libstd/ptr.rs84
-rw-r--r--src/libstd/unstable/intrinsics.rs33
9 files changed, 231 insertions, 80 deletions
diff --git a/src/librustc/middle/trans/base.rs b/src/librustc/middle/trans/base.rs
index 7826781aa6b..90449e8a17a 100644
--- a/src/librustc/middle/trans/base.rs
+++ b/src/librustc/middle/trans/base.rs
@@ -52,7 +52,7 @@ use middle::trans::foreign;
 use middle::trans::glue;
 use middle::trans::inline;
 use middle::trans::machine;
-use middle::trans::machine::llsize_of;
+use middle::trans::machine::{llalign_of_min, llsize_of};
 use middle::trans::meth;
 use middle::trans::monomorphize;
 use middle::trans::reachable;
@@ -1442,12 +1442,7 @@ pub fn with_cond(bcx: block, val: ValueRef, f: &fn(block) -> block) -> block {
     next_cx
 }
 
-pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef,
-                   n_bytes: ValueRef) {
-    // FIXME (Related to #1645, I think?): Provide LLVM with better
-    // alignment information when the alignment is statically known (it must
-    // be nothing more than a constant int, or LLVM complains -- not even a
-    // constant element of a tydesc works).
+pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef, n_bytes: ValueRef, align: u32) {
     let _icx = cx.insn_ctxt("call_memcpy");
     let ccx = cx.ccx();
     let key = match ccx.sess.targ_cfg.arch {
@@ -1462,7 +1457,7 @@ pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef,
     let src_ptr = PointerCast(cx, src, T_ptr(T_i8()));
     let dst_ptr = PointerCast(cx, dst, T_ptr(T_i8()));
     let size = IntCast(cx, n_bytes, ccx.int_type);
-    let align = C_i32(1i32);
+    let align = C_i32(align as i32);
     let volatile = C_i1(false);
     Call(cx, memcpy, [dst_ptr, src_ptr, size, align, volatile]);
 }
@@ -1471,8 +1466,10 @@ pub fn memcpy_ty(bcx: block, dst: ValueRef, src: ValueRef, t: ty::t) {
     let _icx = bcx.insn_ctxt("memcpy_ty");
     let ccx = bcx.ccx();
     if ty::type_is_structural(t) {
-        let llsz = llsize_of(ccx, type_of::type_of(ccx, t));
-        call_memcpy(bcx, dst, src, llsz);
+        let llty = type_of::type_of(ccx, t);
+        let llsz = llsize_of(ccx, llty);
+        let llalign = llalign_of_min(ccx, llty);
+        call_memcpy(bcx, dst, src, llsz, llalign as u32);
     } else {
         Store(bcx, Load(bcx, src), dst);
     }
diff --git a/src/librustc/middle/trans/closure.rs b/src/librustc/middle/trans/closure.rs
index 57df27ef199..335dd47b78b 100644
--- a/src/librustc/middle/trans/closure.rs
+++ b/src/librustc/middle/trans/closure.rs
@@ -521,7 +521,7 @@ pub fn make_opaque_cbox_take_glue(
             [opaque_tydesc, sz],
             expr::SaveIn(rval));
         let cbox_out = PointerCast(bcx, Load(bcx, rval), llopaquecboxty);
-        call_memcpy(bcx, cbox_out, cbox_in, sz);
+        call_memcpy(bcx, cbox_out, cbox_in, sz, 1);
         Store(bcx, cbox_out, cboxptr);
 
         // Take the (deeply cloned) type descriptor
diff --git a/src/librustc/middle/trans/foreign.rs b/src/librustc/middle/trans/foreign.rs
index 79b88a9ba81..b0560c40277 100644
--- a/src/librustc/middle/trans/foreign.rs
+++ b/src/librustc/middle/trans/foreign.rs
@@ -787,7 +787,7 @@ pub fn trans_intrinsic(ccx: @CrateContext,
                 let llsrcptr = PointerCast(bcx, llsrcptr, T_ptr(T_i8()));
 
                 let llsize = llsize_of(ccx, llintype);
-                call_memcpy(bcx, lldestptr, llsrcptr, llsize);
+                call_memcpy(bcx, lldestptr, llsrcptr, llsize, 1);
             }
         }
         ~"needs_drop" => {
@@ -846,44 +846,82 @@ pub fn trans_intrinsic(ccx: @CrateContext,
             Store(bcx, morestack_addr, fcx.llretptr.get());
         }
         ~"memcpy32" => {
-            let dst_ptr = get_param(decl, first_real_arg);
-            let src_ptr = get_param(decl, first_real_arg + 1);
-            let size = get_param(decl, first_real_arg + 2);
-            let align = C_i32(1);
+            let tp_ty = substs.tys[0];
+            let lltp_ty = type_of::type_of(ccx, tp_ty);
+            let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
+            let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);
+
+            let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
+            let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
+            let count = get_param(decl, first_real_arg + 2);
             let volatile = C_i1(false);
-            let llfn = *bcx.ccx().intrinsics.get(
-                &~"llvm.memcpy.p0i8.p0i8.i32");
-            Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
+            let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memcpy.p0i8.p0i8.i32");
+            Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
         }
         ~"memcpy64" => {
-            let dst_ptr = get_param(decl, first_real_arg);
-            let src_ptr = get_param(decl, first_real_arg + 1);
-            let size = get_param(decl, first_real_arg + 2);
-            let align = C_i32(1);
+            let tp_ty = substs.tys[0];
+            let lltp_ty = type_of::type_of(ccx, tp_ty);
+            let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
+            let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);
+
+            let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
+            let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
+            let count = get_param(decl, first_real_arg + 2);
             let volatile = C_i1(false);
-            let llfn = *bcx.ccx().intrinsics.get(
-                &~"llvm.memcpy.p0i8.p0i8.i64");
-            Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
+            let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memcpy.p0i8.p0i8.i64");
+            Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
         }
         ~"memmove32" => {
-            let dst_ptr = get_param(decl, first_real_arg);
-            let src_ptr = get_param(decl, first_real_arg + 1);
-            let size = get_param(decl, first_real_arg + 2);
-            let align = C_i32(1);
+            let tp_ty = substs.tys[0];
+            let lltp_ty = type_of::type_of(ccx, tp_ty);
+            let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
+            let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);
+
+            let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
+            let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
+            let count = get_param(decl, first_real_arg + 2);
             let volatile = C_i1(false);
-            let llfn = *bcx.ccx().intrinsics.get(
-                &~"llvm.memmove.p0i8.p0i8.i32");
-            Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
+            let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memmove.p0i8.p0i8.i32");
+            Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
         }
         ~"memmove64" => {
-            let dst_ptr = get_param(decl, first_real_arg);
-            let src_ptr = get_param(decl, first_real_arg + 1);
-            let size = get_param(decl, first_real_arg + 2);
-            let align = C_i32(1);
+            let tp_ty = substs.tys[0];
+            let lltp_ty = type_of::type_of(ccx, tp_ty);
+            let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
+            let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);
+
+            let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
+            let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
+            let count = get_param(decl, first_real_arg + 2);
+            let volatile = C_i1(false);
+            let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memmove.p0i8.p0i8.i64");
+            Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
+        }
+        ~"memset32" => {
+            let tp_ty = substs.tys[0];
+            let lltp_ty = type_of::type_of(ccx, tp_ty);
+            let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
+            let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);
+
+            let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
+            let val = get_param(decl, first_real_arg + 1);
+            let count = get_param(decl, first_real_arg + 2);
+            let volatile = C_i1(false);
+            let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memset.p0i8.i32");
+            Call(bcx, llfn, [dst_ptr, val, Mul(bcx, size, count), align, volatile]);
+        }
+        ~"memset64" => {
+            let tp_ty = substs.tys[0];
+            let lltp_ty = type_of::type_of(ccx, tp_ty);
+            let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
+            let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);
+
+            let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
+            let val = get_param(decl, first_real_arg + 1);
+            let count = get_param(decl, first_real_arg + 2);
             let volatile = C_i1(false);
-            let llfn = *bcx.ccx().intrinsics.get(
-                &~"llvm.memmove.p0i8.p0i8.i64");
-            Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
+            let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memset.p0i8.i64");
+            Call(bcx, llfn, [dst_ptr, val, Mul(bcx, size, count), align, volatile]);
         }
         ~"sqrtf32" => {
             let x = get_param(decl, first_real_arg);
diff --git a/src/librustc/middle/trans/tvec.rs b/src/librustc/middle/trans/tvec.rs
index 29e026189db..278a7355892 100644
--- a/src/librustc/middle/trans/tvec.rs
+++ b/src/librustc/middle/trans/tvec.rs
@@ -125,7 +125,7 @@ pub fn duplicate_uniq(bcx: block, vptr: ValueRef, vec_ty: ty::t) -> Result {
 
     let data_ptr = get_dataptr(bcx, get_bodyptr(bcx, vptr));
     let new_data_ptr = get_dataptr(bcx, get_bodyptr(bcx, newptr));
-    base::call_memcpy(bcx, new_data_ptr, data_ptr, fill);
+    base::call_memcpy(bcx, new_data_ptr, data_ptr, fill, 1);
 
     let bcx = if ty::type_needs_drop(bcx.tcx(), unit_ty) {
         iter_vec_raw(bcx, new_data_ptr, vec_ty, fill, glue::take_ty)
@@ -370,7 +370,7 @@ pub fn write_content(bcx: block,
                     let bytes = s.len() + 1; // copy null-terminator too
                     let llbytes = C_uint(bcx.ccx(), bytes);
                     let llcstr = C_cstr(bcx.ccx(), s);
-                    base::call_memcpy(bcx, lldest, llcstr, llbytes);
+                    base::call_memcpy(bcx, lldest, llcstr, llbytes, 1);
                     return bcx;
                 }
             }
diff --git a/src/librustc/middle/trans/type_use.rs b/src/librustc/middle/trans/type_use.rs
index d4c34a3ace5..ceb229c79bd 100644
--- a/src/librustc/middle/trans/type_use.rs
+++ b/src/librustc/middle/trans/type_use.rs
@@ -135,7 +135,8 @@ pub fn type_uses_for(ccx: @CrateContext, fn_id: def_id, n_tps: uint)
                 ~"visit_tydesc"  | ~"forget" | ~"frame_address" |
                 ~"morestack_addr" => 0,
 
-                ~"memcpy32" | ~"memcpy64" | ~"memmove32" | ~"memmove64" => 0,
+                ~"memcpy32" | ~"memcpy64" | ~"memmove32" | ~"memmove64" |
+                ~"memset32" | ~"memset64" => use_repr,
 
                 ~"sqrtf32" | ~"sqrtf64" | ~"powif32" | ~"powif64" |
                 ~"sinf32"  | ~"sinf64"  | ~"cosf32"  | ~"cosf64"  |
diff --git a/src/librustc/middle/typeck/check/mod.rs b/src/librustc/middle/typeck/check/mod.rs
index ec76e74ae8f..e700b8760fd 100644
--- a/src/librustc/middle/typeck/check/mod.rs
+++ b/src/librustc/middle/typeck/check/mod.rs
@@ -3538,14 +3538,14 @@ pub fn check_intrinsic_type(ccx: @mut CrateCtxt, it: @ast::foreign_item) {
           (0u, ~[], ty::mk_nil_ptr(ccx.tcx))
         }
         ~"memcpy32" => {
-          (0,
+          (1,
            ~[
               ty::mk_ptr(tcx, ty::mt {
-                  ty: ty::mk_u8(),
+                  ty: param(ccx, 0),
                   mutbl: ast::m_mutbl
               }),
               ty::mk_ptr(tcx, ty::mt {
-                  ty: ty::mk_u8(),
+                  ty: param(ccx, 0),
                   mutbl: ast::m_imm
               }),
               ty::mk_u32()
@@ -3553,14 +3553,14 @@ pub fn check_intrinsic_type(ccx: @mut CrateCtxt, it: @ast::foreign_item) {
            ty::mk_nil())
         }
         ~"memcpy64" => {
-          (0,
+          (1,
            ~[
               ty::mk_ptr(tcx, ty::mt {
-                  ty: ty::mk_u8(),
+                  ty: param(ccx, 0),
                   mutbl: ast::m_mutbl
               }),
               ty::mk_ptr(tcx, ty::mt {
-                  ty: ty::mk_u8(),
+                  ty: param(ccx, 0),
                   mutbl: ast::m_imm
               }),
               ty::mk_u64()
@@ -3568,14 +3568,14 @@ pub fn check_intrinsic_type(ccx: @mut CrateCtxt, it: @ast::foreign_item) {
            ty::mk_nil())
         }
         ~"memmove32" => {
-          (0,
+          (1,
            ~[
               ty::mk_ptr(tcx, ty::mt {
-                  ty: ty::mk_u8(),
+                  ty: param(ccx, 0),
                   mutbl: ast::m_mutbl
               }),
               ty::mk_ptr(tcx, ty::mt {
-                  ty: ty::mk_u8(),
+                  ty: param(ccx, 0),
                   mutbl: ast::m_imm
               }),
               ty::mk_u32()
@@ -3583,20 +3583,44 @@ pub fn check_intrinsic_type(ccx: @mut CrateCtxt, it: @ast::foreign_item) {
            ty::mk_nil())
         }
         ~"memmove64" => {
-          (0,
+          (1,
            ~[
               ty::mk_ptr(tcx, ty::mt {
-                  ty: ty::mk_u8(),
+                  ty: param(ccx, 0),
                   mutbl: ast::m_mutbl
               }),
               ty::mk_ptr(tcx, ty::mt {
-                  ty: ty::mk_u8(),
+                  ty: param(ccx, 0),
                   mutbl: ast::m_imm
               }),
               ty::mk_u64()
            ],
            ty::mk_nil())
         }
+        ~"memset32" => {
+          (1,
+           ~[
+              ty::mk_ptr(tcx, ty::mt {
+                  ty: param(ccx, 0),
+                  mutbl: ast::m_mutbl
+              }),
+              ty::mk_u8(),
+              ty::mk_u32()
+           ],
+           ty::mk_nil())
+        }
+        ~"memset64" => {
+          (1,
+           ~[
+              ty::mk_ptr(tcx, ty::mt {
+                  ty: param(ccx, 0),
+                  mutbl: ast::m_mutbl
+              }),
+              ty::mk_u8(),
+              ty::mk_u64()
+           ],
+           ty::mk_nil())
+        }
         ~"sqrtf32" => (0, ~[ ty::mk_f32() ], ty::mk_f32()),
         ~"sqrtf64" => (0, ~[ ty::mk_f64() ], ty::mk_f64()),
         ~"powif32" => {
diff --git a/src/libstd/cast.rs b/src/libstd/cast.rs
index cde22afd34a..30ad41f0ca2 100644
--- a/src/libstd/cast.rs
+++ b/src/libstd/cast.rs
@@ -14,8 +14,9 @@ use sys;
 use unstable::intrinsics;
 
 /// Casts the value at `src` to U. The two types must have the same length.
+#[cfg(stage0)]
 pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
-    let mut dest: U = intrinsics::init();
+    let mut dest: U = intrinsics::uninit();
     {
         let dest_ptr: *mut u8 = transmute(&mut dest);
         let src_ptr: *u8 = transmute(src);
@@ -26,6 +27,26 @@ pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
     dest
 }
 
+#[cfg(target_word_size = "32", not(stage0))]
+#[inline(always)]
+pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
+    let mut dest: U = intrinsics::uninit();
+    let dest_ptr: *mut u8 = transmute(&mut dest);
+    let src_ptr: *u8 = transmute(src);
+    intrinsics::memcpy32(dest_ptr, src_ptr, sys::size_of::<U>() as u32);
+    dest
+}
+
+#[cfg(target_word_size = "64", not(stage0))]
+#[inline(always)]
+pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
+    let mut dest: U = intrinsics::uninit();
+    let dest_ptr: *mut u8 = transmute(&mut dest);
+    let src_ptr: *u8 = transmute(src);
+    intrinsics::memcpy64(dest_ptr, src_ptr, sys::size_of::<U>() as u64);
+    dest
+}
+
 /**
  * Move a thing into the void
  *
@@ -43,6 +64,7 @@ pub unsafe fn forget<T>(thing: T) { intrinsics::forget(thing); }
  * and/or reinterpret_cast when such calls would otherwise scramble a box's
  * reference count
  */
+#[inline(always)]
 pub unsafe fn bump_box_refcount<T>(t: @T) { forget(t); }
 
 /**
diff --git a/src/libstd/ptr.rs b/src/libstd/ptr.rs
index d1c0ffe7953..309129b7f13 100644
--- a/src/libstd/ptr.rs
+++ b/src/libstd/ptr.rs
@@ -19,6 +19,7 @@ use sys;
 #[cfg(not(test))] use cmp::{Eq, Ord};
 use uint;
 
+#[cfg(stage0)]
 pub mod libc_ {
     use libc::c_void;
     use libc;
@@ -27,12 +28,6 @@ pub mod libc_ {
     #[abi = "cdecl"]
     pub extern {
         #[rust_stack]
-        unsafe fn memmove(dest: *mut c_void,
-                          src: *const c_void,
-                          n: libc::size_t)
-                       -> *c_void;
-
-        #[rust_stack]
         unsafe fn memset(dest: *mut c_void,
                          c: libc::c_int,
                          len: libc::size_t)
@@ -97,15 +92,28 @@ pub fn is_not_null<T>(ptr: *const T) -> bool { !is_null(ptr) }
  * and destination may overlap.
  */
 #[inline(always)]
-#[cfg(target_word_size = "32")]
+#[cfg(target_word_size = "32", stage0)]
 pub unsafe fn copy_memory<T>(dst: *mut T, src: *const T, count: uint) {
     use unstable::intrinsics::memmove32;
     let n = count * sys::size_of::<T>();
     memmove32(dst as *mut u8, src as *u8, n as u32);
 }
 
+/**
+ * Copies data from one location to another
+ *
+ * Copies `count` elements (not bytes) from `src` to `dst`. The source
+ * and destination may overlap.
+ */
 #[inline(always)]
-#[cfg(target_word_size = "64")]
+#[cfg(target_word_size = "32", not(stage0))]
+pub unsafe fn copy_memory<T>(dst: *mut T, src: *const T, count: uint) {
+    use unstable::intrinsics::memmove32;
+    memmove32(dst, src as *T, count as u32);
+}
+
+#[inline(always)]
+#[cfg(target_word_size = "64", stage0)]
 pub unsafe fn copy_memory<T>(dst: *mut T, src: *const T, count: uint) {
     use unstable::intrinsics::memmove64;
     let n = count * sys::size_of::<T>();
@@ -113,33 +121,63 @@ pub unsafe fn copy_memory<T>(dst: *mut T, src: *const T, count: uint) {
 }
 
 #[inline(always)]
-#[cfg(target_word_size = "32")]
+#[cfg(target_word_size = "64", not(stage0))]
+pub unsafe fn copy_memory<T>(dst: *mut T, src: *const T, count: uint) {
+    use unstable::intrinsics::memmove64;
+    memmove64(dst, src as *T, count as u64);
+}
+
+#[inline(always)]
+#[cfg(target_word_size = "32", stage0)]
+pub unsafe fn copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T, count: uint) {
+    use unstable::intrinsics::memmove32;
+    let n = count * sys::size_of::<T>();
+    memmove32(dst as *mut u8, src as *u8, n as u32);
+}
+
+#[inline(always)]
+#[cfg(target_word_size = "32", not(stage0))]
 pub unsafe fn copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T, count: uint) {
-    #[cfg(stage0)]
-    use memcpy32 = unstable::intrinsics::memmove32;
-    #[cfg(not(stage0))]
     use unstable::intrinsics::memcpy32;
+    memcpy32(dst, src as *T, count as u32);
+}
+
+#[inline(always)]
+#[cfg(target_word_size = "64", stage0)]
+pub unsafe fn copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T, count: uint) {
+    use unstable::intrinsics::memmove64;
     let n = count * sys::size_of::<T>();
-    memcpy32(dst as *mut u8, src as *u8, n as u32);
+    memmove64(dst as *mut u8, src as *u8, n as u64);
 }
 
 #[inline(always)]
-#[cfg(target_word_size = "64")]
+#[cfg(target_word_size = "64", not(stage0))]
 pub unsafe fn copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T, count: uint) {
-    #[cfg(stage0)]
-    use memcpy64 = unstable::intrinsics::memmove64;
-    #[cfg(not(stage0))]
     use unstable::intrinsics::memcpy64;
-    let n = count * sys::size_of::<T>();
-    memcpy64(dst as *mut u8, src as *u8, n as u64);
+    memcpy64(dst, src as *T, count as u64);
 }
 
 #[inline(always)]
+#[cfg(stage0)]
 pub unsafe fn set_memory<T>(dst: *mut T, c: int, count: uint) {
     let n = count * sys::size_of::<T>();
     libc_::memset(dst as *mut c_void, c as libc::c_int, n as size_t);
 }
 
+#[inline(always)]
+#[cfg(target_word_size = "32", not(stage0))]
+pub unsafe fn set_memory<T>(dst: *mut T, c: u8, count: uint) {
+    use unstable::intrinsics::memset32;
+    memset32(dst, c, count as u32);
+}
+
+#[inline(always)]
+#[cfg(target_word_size = "64", not(stage0))]
+pub unsafe fn set_memory<T>(dst: *mut T, c: u8, count: uint) {
+    use unstable::intrinsics::memset64;
+    memset64(dst, c, count as u64);
+}
+
 /**
   Transform a region pointer - &T - to an unsafe pointer - *T.
   This is safe, but is implemented with an unsafe block due to
@@ -581,4 +619,12 @@ pub mod ptr_tests {
             });
         }
     }
+
+    #[test]
+    fn test_set_memory() {
+        let mut xs = [0u8, ..20];
+        let ptr = vec::raw::to_mut_ptr(xs);
+        unsafe { set_memory(ptr, 5u8, xs.len()); }
+        assert_eq!(xs, [5u8, ..20]);
+    }
 }
diff --git a/src/libstd/unstable/intrinsics.rs b/src/libstd/unstable/intrinsics.rs
index 521708621fc..908c5e23ab0 100644
--- a/src/libstd/unstable/intrinsics.rs
+++ b/src/libstd/unstable/intrinsics.rs
@@ -16,7 +16,7 @@ The corresponding definitions are in librustc/middle/trans/foreign.rs.
 
 The atomic intrinsics provide common atomic operations on machine
 words, with multiple possible memory orderings. They obey the same
-semantics as C++0x. See the LLVM documentation on [[atomics]].
+semantics as C++11. See the LLVM documentation on [[atomics]].
 
 [atomics]: http://llvm.org/docs/Atomics.html
 
@@ -31,6 +31,7 @@ A quick refresher on memory ordering:
   with atomic types and is equivalent to Java's `volatile`.
 
 */
+
 #[abi = "rust-intrinsic"]
 pub extern "rust-intrinsic" {
 
@@ -127,18 +128,40 @@ pub extern "rust-intrinsic" {
     /// Get the address of the `__morestack` stack growth function.
     pub fn morestack_addr() -> *();
 
-    /// Equivalent to the `llvm.memcpy.p0i8.0i8.i32` intrinsic.
+    /// Equivalent to the `llvm.memcpy.p0i8.0i8.i32` intrinsic, with a size of
+    /// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
     #[cfg(not(stage0))]
-    pub fn memcpy32(dst: *mut u8, src: *u8, size: u32);
-    /// Equivalent to the `llvm.memcpy.p0i8.0i8.i64` intrinsic.
+    pub fn memcpy32<T>(dst: *mut T, src: *T, count: u32);
+    /// Equivalent to the `llvm.memcpy.p0i8.0i8.i64` intrinsic, with a size of
+    /// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
     #[cfg(not(stage0))]
-    pub fn memcpy64(dst: *mut u8, src: *u8, size: u64);
+    pub fn memcpy64<T>(dst: *mut T, src: *T, count: u64);
 
     /// Equivalent to the `llvm.memmove.p0i8.0i8.i32` intrinsic.
+    #[cfg(stage0)]
     pub fn memmove32(dst: *mut u8, src: *u8, size: u32);
     /// Equivalent to the `llvm.memmove.p0i8.0i8.i64` intrinsic.
+    #[cfg(stage0)]
     pub fn memmove64(dst: *mut u8, src: *u8, size: u64);
 
+    /// Equivalent to the `llvm.memmove.p0i8.0i8.i32` intrinsic, with a size of
+    /// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
+    #[cfg(not(stage0))]
+    pub fn memmove32<T>(dst: *mut T, src: *T, count: u32);
+    /// Equivalent to the `llvm.memmove.p0i8.0i8.i64` intrinsic, with a size of
+    /// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
+    #[cfg(not(stage0))]
+    pub fn memmove64<T>(dst: *mut T, src: *T, count: u64);
+
+    /// Equivalent to the `llvm.memset.p0i8.i32` intrinsic, with a size of
+    /// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
+    #[cfg(not(stage0))]
+    pub fn memset32<T>(dst: *mut T, val: u8, count: u32);
+    /// Equivalent to the `llvm.memset.p0i8.i64` intrinsic, with a size of
+    /// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
+    #[cfg(not(stage0))]
+    pub fn memset64<T>(dst: *mut T, val: u8, count: u64);
+
     pub fn sqrtf32(x: f32) -> f32;
     pub fn sqrtf64(x: f64) -> f64;