diff options
| -rw-r--r-- | src/libcore/intrinsics.rs | 30 | ||||
| -rw-r--r-- | src/libcore/ptr.rs | 259 | ||||
| -rw-r--r-- | src/libcore/slice/mod.rs | 22 | ||||
| -rw-r--r-- | src/librustc/middle/lang_items.rs | 3 | ||||
| -rw-r--r-- | src/librustc_codegen_llvm/intrinsic.rs | 32 | ||||
| -rw-r--r-- | src/librustc_typeck/check/intrinsic.rs | 3 | ||||
| -rw-r--r-- | src/test/run-pass/align-offset-sign.rs | 82 |
7 files changed, 363 insertions, 68 deletions
diff --git a/src/libcore/intrinsics.rs b/src/libcore/intrinsics.rs index 5ec6cb6c710..510a5bb3df7 100644 --- a/src/libcore/intrinsics.rs +++ b/src/libcore/intrinsics.rs @@ -1463,15 +1463,26 @@ extern "rust-intrinsic" { /// source as well as std's catch implementation. pub fn try(f: fn(*mut u8), data: *mut u8, local_ptr: *mut u8) -> i32; - /// Computes the byte offset that needs to be applied to `ptr` in order to - /// make it aligned to `align`. - /// If it is not possible to align `ptr`, the implementation returns + #[cfg(stage0)] + /// docs my friends, its friday! + pub fn align_offset(ptr: *const (), align: usize) -> usize; + + /// Computes the offset that needs to be applied to the pointer in order to make it aligned to + /// `align`. + /// + /// If it is not possible to align the pointer, the implementation returns /// `usize::max_value()`. /// - /// There are no guarantees whatsover that offsetting the pointer will not - /// overflow or go beyond the allocation that `ptr` points into. - /// It is up to the caller to ensure that the returned offset is correct - /// in all terms other than alignment. + /// The offset is expressed in number of `T` elements, and not bytes. The value returned can be + /// used with the `offset` or `offset_to` methods. + /// + /// There are no guarantees whatsover that offsetting the pointer will not overflow or go + /// beyond the allocation that the pointer points into. It is up to the caller to ensure that + /// the returned offset is correct in all terms other than alignment. + /// + /// # Unsafety + /// + /// `align` must be a power-of-two. /// /// # Examples /// @@ -1485,7 +1496,7 @@ extern "rust-intrinsic" { /// # unsafe { /// let x = [5u8, 6u8, 7u8, 8u8, 9u8]; /// let ptr = &x[n] as *const u8; - /// let offset = align_offset(ptr as *const (), align_of::<u16>()); + /// let offset = align_offset(ptr, align_of::<u16>()); /// if offset < x.len() - n - 1 { /// let u16_ptr = ptr.offset(offset as isize) as *const u16; /// assert_ne!(*u16_ptr, 500); @@ -1495,7 +1506,8 @@ extern "rust-intrinsic" { /// } /// # } } /// ``` - pub fn align_offset(ptr: *const (), align: usize) -> usize; + #[cfg(not(stage0))] + pub fn align_offset<T>(ptr: *const T, align: usize) -> usize; /// Emits a `!nontemporal` store according to LLVM (see their docs). /// Probably will never become stable. diff --git a/src/libcore/ptr.rs b/src/libcore/ptr.rs index 63bcc024020..a762a8a6f92 100644 --- a/src/libcore/ptr.rs +++ b/src/libcore/ptr.rs @@ -1433,15 +1433,22 @@ impl<T: ?Sized> *const T { copy_nonoverlapping(self, dest, count) } - /// Computes the byte offset that needs to be applied in order to - /// make the pointer aligned to `align`. + /// Computes the offset that needs to be applied to the pointer in order to make it aligned to + /// `align`. + /// /// If it is not possible to align the pointer, the implementation returns /// `usize::max_value()`. /// - /// There are no guarantees whatsover that offsetting the pointer will not - /// overflow or go beyond the allocation that the pointer points into. - /// It is up to the caller to ensure that the returned offset is correct - /// in all terms other than alignment. + /// The offset is expressed in number of `T` elements, and not bytes. The value returned can be + /// used with the `offset` or `offset_to` methods. + /// + /// There are no guarantees whatsover that offsetting the pointer will not overflow or go + /// beyond the allocation that the pointer points into. It is up to the caller to ensure that + /// the returned offset is correct in all terms other than alignment. + /// + /// # Panics + /// + /// The function panics if `align` is not a power-of-two. /// /// # Examples /// @@ -1465,13 +1472,30 @@ impl<T: ?Sized> *const T { /// # } } /// ``` #[unstable(feature = "align_offset", issue = "44488")] - pub fn align_offset(self, align: usize) -> usize { + #[cfg(not(stage0))] + pub fn align_offset(self, align: usize) -> usize where T: Sized { + if !align.is_power_of_two() { + panic!("align_offset: align is not a power-of-two"); + } unsafe { - intrinsics::align_offset(self as *const _, align) + intrinsics::align_offset(self, align) + } + } + + /// definitely docs. + #[unstable(feature = "align_offset", issue = "44488")] + #[cfg(stage0)] + pub fn align_offset(self, align: usize) -> usize where T: Sized { + if !align.is_power_of_two() { + panic!("align_offset: align is not a power-of-two"); + } + unsafe { + intrinsics::align_offset(self as *const (), align) } } } + #[lang = "mut_ptr"] impl<T: ?Sized> *mut T { /// Returns `true` if the pointer is null. @@ -1804,44 +1828,6 @@ impl<T: ?Sized> *mut T { (self as *const T).wrapping_offset_from(origin) } - /// Computes the byte offset that needs to be applied in order to - /// make the pointer aligned to `align`. - /// If it is not possible to align the pointer, the implementation returns - /// `usize::max_value()`. - /// - /// There are no guarantees whatsover that offsetting the pointer will not - /// overflow or go beyond the allocation that the pointer points into. - /// It is up to the caller to ensure that the returned offset is correct - /// in all terms other than alignment. - /// - /// # Examples - /// - /// Accessing adjacent `u8` as `u16` - /// - /// ``` - /// # #![feature(align_offset)] - /// # fn foo(n: usize) { - /// # use std::mem::align_of; - /// # unsafe { - /// let x = [5u8, 6u8, 7u8, 8u8, 9u8]; - /// let ptr = &x[n] as *const u8; - /// let offset = ptr.align_offset(align_of::<u16>()); - /// if offset < x.len() - n - 1 { - /// let u16_ptr = ptr.offset(offset as isize) as *const u16; - /// assert_ne!(*u16_ptr, 500); - /// } else { - /// // while the pointer can be aligned via `offset`, it would point - /// // outside the allocation - /// } - /// # } } - /// ``` - #[unstable(feature = "align_offset", issue = "44488")] - pub fn align_offset(self, align: usize) -> usize { - unsafe { - intrinsics::align_offset(self as *const _, align) - } - } - /// Calculates the offset from a pointer (convenience for `.offset(count as isize)`). /// /// `count` is in units of T; e.g. a `count` of 3 represents a pointer @@ -2511,8 +2497,189 @@ impl<T: ?Sized> *mut T { { swap(self, with) } + + /// Computes the offset that needs to be applied to the pointer in order to make it aligned to + /// `align`. + /// + /// If it is not possible to align the pointer, the implementation returns + /// `usize::max_value()`. + /// + /// The offset is expressed in number of `T` elements, and not bytes. The value returned can be + /// used with the `offset` or `offset_to` methods. + /// + /// There are no guarantees whatsover that offsetting the pointer will not overflow or go + /// beyond the allocation that the pointer points into. It is up to the caller to ensure that + /// the returned offset is correct in all terms other than alignment. + /// + /// # Panics + /// + /// The function panics if `align` is not a power-of-two. + /// + /// # Examples + /// + /// Accessing adjacent `u8` as `u16` + /// + /// ``` + /// # #![feature(align_offset)] + /// # fn foo(n: usize) { + /// # use std::mem::align_of; + /// # unsafe { + /// let x = [5u8, 6u8, 7u8, 8u8, 9u8]; + /// let ptr = &x[n] as *const u8; + /// let offset = ptr.align_offset(align_of::<u16>()); + /// if offset < x.len() - n - 1 { + /// let u16_ptr = ptr.offset(offset as isize) as *const u16; + /// assert_ne!(*u16_ptr, 500); + /// } else { + /// // while the pointer can be aligned via `offset`, it would point + /// // outside the allocation + /// } + /// # } } + /// ``` + #[unstable(feature = "align_offset", issue = "44488")] + #[cfg(not(stage0))] + pub fn align_offset(self, align: usize) -> usize where T: Sized { + if !align.is_power_of_two() { + panic!("align_offset: align is not a power-of-two"); + } + unsafe { + intrinsics::align_offset(self, align) + } + } + + /// definitely docs. + #[unstable(feature = "align_offset", issue = "44488")] + #[cfg(stage0)] + pub fn align_offset(self, align: usize) -> usize where T: Sized { + if !align.is_power_of_two() { + panic!("align_offset: align is not a power-of-two"); + } + unsafe { + intrinsics::align_offset(self as *const (), align) + } + } } +/// Align pointer `p`. +/// +/// Calculate offset (in terms of elements of `stride` stride) that has to be applied +/// to pointer `p` so that pointer `p` would get aligned to `a`. +/// +/// This is an implementation of the `align_offset` intrinsic for the case where `stride > 1`. +/// +/// Note: This implementation has been carefully tailored to not panic. It is UB for this to panic. +/// The only real change that can be made here is change of `INV_TABLE_MOD_16` and associated +/// constants. +/// +/// If we ever decide to make it possible to call the intrinsic with `a` that is not a +/// power-of-two, it will probably be more prudent to just change to a naive implementation rather +/// than trying to adapt this to accomodate that change. +/// +/// Any questions go to @nagisa. +#[lang="align_offset"] +#[cfg(not(stage0))] +unsafe fn align_offset(p: *const (), a: usize, stride: usize) -> usize { + /// Calculate multiplicative modular inverse of `x` modulo `m`. + /// + /// This implementation is tailored for align_offset and has following preconditions: + /// + /// * `m` is a power-of-two; + /// * `x < m`; (if `x ≥ m`, pass in `x % m` instead) + /// + /// Implementation of this function shall not panic. Ever. + fn mod_inv(x: usize, m: usize) -> usize { + /// Multiplicative modular inverse table modulo 2⁴ = 16. + /// + /// Note, that this table does not contain values where inverse does not exist (i.e. for + /// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.) + static INV_TABLE_MOD_16: [usize; 8] = [1, 11, 13, 7, 9, 3, 5, 15]; + /// Modulo for which the `INV_TABLE_MOD_16` is intended. + const INV_TABLE_MOD: usize = 16; + /// INV_TABLE_MOD² + const INV_TABLE_MOD_SQUARED: usize = INV_TABLE_MOD * INV_TABLE_MOD; + + let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1]; + if m <= INV_TABLE_MOD { + return table_inverse & (m - 1); + } else { + // We iterate "up" using the following formula: + // + // $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$ + // + // until 2²ⁿ ≥ m. Then we can reduce to our desired `m` by taking the result `mod m`. + let mut inverse = table_inverse; + let mut going_mod = INV_TABLE_MOD_SQUARED; + loop { + // y = y * (2 - xy) mod n + // + // Note, that we use wrapping operations here intentionally – the original formula + // uses e.g. subtraction `mod n`. It is entirely fine to do them `mod + // usize::max_value()` instead, because we take the result `mod n` at the end + // anyway. + inverse = inverse.wrapping_mul( + 2usize.wrapping_sub(x.wrapping_mul(inverse)) + ) & (going_mod - 1); + if going_mod > m { + return inverse & (m - 1); + } + going_mod = going_mod.wrapping_mul(going_mod); + } + } + } + + let a_minus_one = a.wrapping_sub(1); + let pmoda = p as usize & a_minus_one; + let smoda = stride & a_minus_one; + // a is power-of-two so cannot be 0. stride = 0 is handled by the intrinsic. + let gcdpow = intrinsics::cttz_nonzero(stride).min(intrinsics::cttz_nonzero(a)); + let gcd = 1usize << gcdpow; + + if pmoda == 0 { + // Already aligned. Yay! + return 0; + } + + if gcd == 1 { + // This branch solves for the variable $o$ in following linear congruence equation: + // + // ⎰ p + o ≡ 0 (mod a) # $p + o$ must be aligned to specified alignment $a$ + // ⎱ o ≡ 0 (mod s) # offset $o$ must be a multiple of stride $s$ + // + // where + // + // * a, s are co-prime + // + // This gives us the formula below: + // + // o = (a - (p mod a)) * (s⁻¹ mod a) * s + // + // The first term is “the relative alignment of p to a”, the second term is “how does + // incrementing p by one s change the relative alignment of p”, the third term is + // translating change in units of s to a byte count. + // + // Furthermore, the result produced by this solution is not “minimal”, so it is necessary + // to take the result $o mod lcm(s, a)$. Since $s$ and $a$ are co-prime (i.e. $gcd(s, a) = + // 1$) and $lcm(s, a) = s * a / gcd(s, a)$, we can replace $lcm(s, a)$ with just a $s * a$. + // + // (Author note: we decided later on to express the offset in "elements" rather than bytes, + // which drops the multiplication by `s` on both sides of the modulo.) + return intrinsics::unchecked_rem(a.wrapping_sub(pmoda).wrapping_mul(mod_inv(smoda, a)), a); + } + + if p as usize & (gcd - 1) == 0 { + // This can be aligned, but `a` and `stride` are not co-prime, so a somewhat adapted + // formula is used. + let j = a.wrapping_sub(pmoda) >> gcdpow; + let k = smoda >> gcdpow; + return intrinsics::unchecked_rem(j.wrapping_mul(mod_inv(k, a)), a >> gcdpow); + } + + // Cannot be aligned at all. + return usize::max_value(); +} + + + // Equality for pointers #[stable(feature = "rust1", since = "1.0.0")] impl<T: ?Sized> PartialEq for *const T { diff --git a/src/libcore/slice/mod.rs b/src/libcore/slice/mod.rs index 82891b691dc..6b3c1fcddc2 100644 --- a/src/libcore/slice/mod.rs +++ b/src/libcore/slice/mod.rs @@ -1696,6 +1696,28 @@ impl<T> [T] { self.as_mut_ptr(), other.as_mut_ptr(), self.len()); } } + + // #[unstable(feature = "slice_align_to", issue = "44488")] + // pub fn align_to<U>(&self) -> (&[T], &[U], &[T]) { + // // First, find at what point do we split between the first and 2nd slice. + // let x = self.as_ptr(); + // let offset = x.align_offset(::mem::align_of::<U>()); + // if offset > x * ::mem::size_of::<T>() { + // return (self, [], []); + // } + + // } + + // #[unstable(feature = "slice_align_to", issue = "44488")] + // pub fn align_to_mut<U>(&mut self) -> (&mut [T], &mut [U], &mut [T]) { + // } +}} + +#[lang = "slice"] +#[cfg(not(test))] +#[cfg(not(stage0))] +impl<T> [T] { + slice_core_methods!(); } #[lang = "slice_u8"] diff --git a/src/librustc/middle/lang_items.rs b/src/librustc/middle/lang_items.rs index 95e92e21b09..d70f994e87b 100644 --- a/src/librustc/middle/lang_items.rs +++ b/src/librustc/middle/lang_items.rs @@ -348,6 +348,9 @@ language_item_table! { I128ShroFnLangItem, "i128_shro", i128_shro_fn; U128ShroFnLangItem, "u128_shro", u128_shro_fn; + // Align offset for stride != 1, must not panic. + AlignOffsetLangItem, "align_offset", align_offset_fn; + TerminationTraitLangItem, "termination", termination; } diff --git a/src/librustc_codegen_llvm/intrinsic.rs b/src/librustc_codegen_llvm/intrinsic.rs index ba04cc7fad5..93351651db9 100644 --- a/src/librustc_codegen_llvm/intrinsic.rs +++ b/src/librustc_codegen_llvm/intrinsic.rs @@ -25,6 +25,7 @@ use type_of::LayoutLlvmExt; use rustc::ty::{self, Ty}; use rustc::ty::layout::{HasDataLayout, LayoutOf}; use rustc::hir; +use rustc::middle::lang_items::AlignOffsetLangItem; use syntax::ast; use syntax::symbol::Symbol; use builder::Builder; @@ -390,16 +391,29 @@ pub fn codegen_intrinsic_call<'a, 'tcx>(bx: &Builder<'a, 'tcx>, } "align_offset" => { - // `ptr as usize` - let ptr_val = bx.ptrtoint(args[0].immediate(), bx.cx.isize_ty); - // `ptr_val % align` - let align = args[1].immediate(); - let offset = bx.urem(ptr_val, align); + let (ptr, align) = (args[0].immediate(), args[1].immediate()); + let stride_of_t = bx.cx.layout_of(substs.type_at(0)).size_and_align().0.bytes(); + let stride = C_usize(bx.cx, stride_of_t); let zero = C_null(bx.cx.isize_ty); - // `offset == 0` - let is_zero = bx.icmp(llvm::IntPredicate::IntEQ, offset, zero); - // `if offset == 0 { 0 } else { align - offset }` - bx.select(is_zero, zero, bx.sub(align, offset)) + let max = C_int(cx.isize_ty, -1); // -1isize (wherein I cheat horribly to make !0usize) + + if stride_of_t <= 1 { + // offset = ptr as usize % align => offset = ptr as usize & (align - 1) + let modmask = bx.sub(align, C_usize(bx.cx, 1)); + let offset = bx.and(bx.ptrtoint(ptr, bx.cx.isize_ty), modmask); + let is_zero = bx.icmp(llvm::IntPredicate::IntEQ, offset, zero); + // if offset == 0 { 0 } else { if stride_of_t == 1 { align - offset } else { !0 } } + bx.select(is_zero, zero, if stride_of_t == 1 { + bx.sub(align, offset) + } else { + max + }) + } else { + let did = ::common::langcall(bx.tcx(), Some(span), "", AlignOffsetLangItem); + let instance = ty::Instance::mono(bx.tcx(), did); + let llfn = ::callee::get_fn(bx.cx, instance); + bx.call(llfn, &[ptr, align, stride], None) + } } name if name.starts_with("simd_") => { match generic_simd_intrinsic(bx, name, diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs index feb26e76162..db6062db1fb 100644 --- a/src/librustc_typeck/check/intrinsic.rs +++ b/src/librustc_typeck/check/intrinsic.rs @@ -315,8 +315,7 @@ pub fn check_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, } "align_offset" => { - let ptr_ty = tcx.mk_imm_ptr(tcx.mk_nil()); - (0, vec![ptr_ty, tcx.types.usize], tcx.types.usize) + (1, vec![tcx.mk_imm_ptr(param(0)), tcx.types.usize], tcx.types.usize) }, "nontemporal_store" => { diff --git a/src/test/run-pass/align-offset-sign.rs b/src/test/run-pass/align-offset-sign.rs index aaa0419d061..f7d427cb7b2 100644 --- a/src/test/run-pass/align-offset-sign.rs +++ b/src/test/run-pass/align-offset-sign.rs @@ -10,7 +10,85 @@ #![feature(align_offset)] +#[derive(Clone, Copy)] +#[repr(packed)] +struct A3(u16, u8); +struct A4(u32); +#[repr(packed)] +struct A5(u32, u8); +#[repr(packed)] +struct A6(u32, u16); +#[repr(packed)] +struct A7(u32, u16, u8); +#[repr(packed)] +struct A8(u32, u32); +#[repr(packed)] +struct A9(u32, u32, u8); +#[repr(packed)] +struct A10(u32, u32, u16); + +unsafe fn test_weird_stride<T>(ptr: *const T, align: usize) -> bool { + let numptr = ptr as usize; + let mut expected = usize::max_value(); + // Naive but definitely correct way to find the *first* aligned element of stride::<T>. + for el in (0..align) { + if (numptr + el * ::std::mem::size_of::<T>()) % align == 0 { + expected = el; + break; + } + } + let got = ptr.align_offset(align); + if got != expected { + eprintln!("aligning {:p} (with stride of {}) to {}, expected {}, got {}", ptr, ::std::mem::size_of::<T>(), align, expected, got); + return true; + } + return false; +} + fn main() { - let x = 1 as *const u8; - assert_eq!(x.align_offset(8), 7); + unsafe { + // For pointers of stride = 0, the pointer is already aligned or it cannot be aligned at + // all, because no amount of elements will align the pointer. + let mut p = 1; + while p < 1024 { + assert_eq!((p as *const ()).align_offset(p), 0); + if (p != 1) { + assert_eq!(((p + 1) as *const ()).align_offset(p), !0); + } + p = (p + 1).next_power_of_two(); + } + + // For pointers of stride = 1, the pointer can always be aligned. The offset is equal to + // number of bytes. + let mut align = 1; + while align < 1024 { + for ptr in 1..2*align { + let expected = ptr % align; + let offset = if expected == 0 { 0 } else { align - expected }; + assert_eq!((ptr as *const u8).align_offset(align), offset, + "ptr = {}, align = {}, size = 1", ptr, align); + align = (align + 1).next_power_of_two(); + } + } + + + // For pointers of stride != 1, we verify the algorithm against the naivest possible + // implementation + let mut align = 1; + let mut x = false; + while align < 1024 { + for ptr in 1usize..4*align { + x |= test_weird_stride::<A3>(ptr as *const A3, align); + x |= test_weird_stride::<A4>(ptr as *const A4, align); + x |= test_weird_stride::<A5>(ptr as *const A5, align); + x |= test_weird_stride::<A6>(ptr as *const A6, align); + x |= test_weird_stride::<A7>(ptr as *const A7, align); + x |= test_weird_stride::<A8>(ptr as *const A8, align); + x |= test_weird_stride::<A9>(ptr as *const A9, align); + x |= test_weird_stride::<A10>(ptr as *const A10, align); + } + align = (align + 1).next_power_of_two(); + } + assert!(!x); + } } |
