Optimize DST field access

For struct X<T: ?Sized>(T) struct Y<T: ?Sized>(u8, T) the offset of the unsized field is 0 mem::align_of_val(&self.1) respectively. This patch changes the expression used to compute these offsets so that the optimizer can perform this optimization. Consider ```rust fn test(x: &X<dyn Any>) -> &dyn Any { &x.0 } ``` Before: ```asm test: movq %rsi, %rdx movq 16(%rsi), %rax leaq -1(%rax), %rcx negq %rax andq %rcx, %rax addq %rdi, %rax retq ``` After: ```asm test: movq %rsi, %rdx movq %rdi, %rax retq ```
author: Julian Orth <ju.orth@gmail.com> 2020-12-19 19:08:03 +0100
committer: Julian Orth <ju.orth@gmail.com> 2020-12-19 20:43:15 +0100
commit: be1511408edbae0deb9b6aceed7cf22aa5ca8e76 (patch)
tree: 9d7cacbcc0f85ec877e3a115db46766bc49d27ba
parent: 8bb302d34dde36cc78a288cb5beb4af7fe30ba41 (diff)
download: rust-be1511408edbae0deb9b6aceed7cf22aa5ca8e76.tar.gz
rust-be1511408edbae0deb9b6aceed7cf22aa5ca8e76.zip
1 files changed, 44 insertions, 10 deletions
diff --git a/compiler/rustc_codegen_ssa/src/mir/place.rs b/compiler/rustc_codegen_ssa/src/mir/place.rs
index e4f4c884470..3c0cddf10bf 100644
--- a/compiler/rustc_codegen_ssa/src/mir/place.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/place.rs
@@ -178,16 +178,8 @@ impl<'a, 'tcx, V: CodegenObject> PlaceRef<'tcx, V> {
         // Get the alignment of the field
         let (_, unsized_align) = glue::size_and_align_of_dst(bx, field.ty, meta);
 
-        // Bump the unaligned offset up to the appropriate alignment using the
-        // following expression:
-        //
-        //     (unaligned offset + (align - 1)) & -align
-
-        // Calculate offset.
-        let align_sub_1 = bx.sub(unsized_align, bx.cx().const_usize(1u64));
-        let and_lhs = bx.add(unaligned_offset, align_sub_1);
-        let and_rhs = bx.neg(unsized_align);
-        let offset = bx.and(and_lhs, and_rhs);
+        // Bump the unaligned offset up to the appropriate alignment
+        let offset = round_up_const_value_to_alignment(bx, unaligned_offset, unsized_align);
 
         debug!("struct_field_ptr: DST field offset: {:?}", offset);
 
@@ -518,3 +510,45 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         self.monomorphize(place_ty.ty)
     }
 }
+
+fn round_up_const_value_to_alignment<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
+    bx: &mut Bx,
+    value: Bx::Value,
+    align: Bx::Value,
+) -> Bx::Value {
+    // In pseudo code:
+    //
+    //     if value & (align - 1) == 0 {
+    //         value
+    //     } else {
+    //         (value & !(align - 1)) + align
+    //     }
+    //
+    // Usually this is written without branches as
+    //
+    //     (value + align - 1) & !(align - 1)
+    //
+    // But this formula cannot take advantage of constant `value`. E.g. if `value` is known
+    // at compile time to be `1`, this expression should be optimized to `align`. However,
+    // optimization only holds if `align` is a power of two. Since the optimizer doesn't know
+    // that `align` is a power of two, it cannot perform this optimization.
+    //
+    // Instead we use
+    //
+    //     value + (-value & (align - 1))
+    //
+    // Since `align` is used only once, the expression can be optimized. For `value = 0`
+    // its optimized to `0` even in debug mode.
+    //
+    // NB: The previous version of this code used
+    //
+    //     (value + align - 1) & -align
+    //
+    // Even though `-align == !(align - 1)`, LLVM failed to optimize this even for
+    // `value = 0`. Bug report: https://bugs.llvm.org/show_bug.cgi?id=48559
+    let one = bx.const_usize(1);
+    let align_minus_1 = bx.sub(align, one);
+    let neg_value = bx.neg(value);
+    let offset = bx.and(neg_value, align_minus_1);
+    bx.add(value, offset)
+}
author	Julian Orth <ju.orth@gmail.com>	2020-12-19 19:08:03 +0100
committer	Julian Orth <ju.orth@gmail.com>	2020-12-19 20:43:15 +0100
commit	be1511408edbae0deb9b6aceed7cf22aa5ca8e76 (patch)
tree	9d7cacbcc0f85ec877e3a115db46766bc49d27ba
parent	8bb302d34dde36cc78a288cb5beb4af7fe30ba41 (diff)
download	rust-be1511408edbae0deb9b6aceed7cf22aa5ca8e76.tar.gz rust-be1511408edbae0deb9b6aceed7cf22aa5ca8e76.zip