diff options
| author | Julian Orth <ju.orth@gmail.com> | 2020-12-19 19:08:03 +0100 |
|---|---|---|
| committer | Julian Orth <ju.orth@gmail.com> | 2020-12-19 20:43:15 +0100 |
| commit | be1511408edbae0deb9b6aceed7cf22aa5ca8e76 (patch) | |
| tree | 9d7cacbcc0f85ec877e3a115db46766bc49d27ba | |
| parent | 8bb302d34dde36cc78a288cb5beb4af7fe30ba41 (diff) | |
| download | rust-be1511408edbae0deb9b6aceed7cf22aa5ca8e76.tar.gz rust-be1511408edbae0deb9b6aceed7cf22aa5ca8e76.zip | |
Optimize DST field access
For
struct X<T: ?Sized>(T)
struct Y<T: ?Sized>(u8, T)
the offset of the unsized field is
0
mem::align_of_val(&self.1)
respectively. This patch changes the expression used to compute these
offsets so that the optimizer can perform this optimization.
Consider
```rust
fn test(x: &X<dyn Any>) -> &dyn Any {
&x.0
}
```
Before:
```asm
test:
movq %rsi, %rdx
movq 16(%rsi), %rax
leaq -1(%rax), %rcx
negq %rax
andq %rcx, %rax
addq %rdi, %rax
retq
```
After:
```asm
test:
movq %rsi, %rdx
movq %rdi, %rax
retq
```
| -rw-r--r-- | compiler/rustc_codegen_ssa/src/mir/place.rs | 54 |
1 files changed, 44 insertions, 10 deletions
diff --git a/compiler/rustc_codegen_ssa/src/mir/place.rs b/compiler/rustc_codegen_ssa/src/mir/place.rs index e4f4c884470..3c0cddf10bf 100644 --- a/compiler/rustc_codegen_ssa/src/mir/place.rs +++ b/compiler/rustc_codegen_ssa/src/mir/place.rs @@ -178,16 +178,8 @@ impl<'a, 'tcx, V: CodegenObject> PlaceRef<'tcx, V> { // Get the alignment of the field let (_, unsized_align) = glue::size_and_align_of_dst(bx, field.ty, meta); - // Bump the unaligned offset up to the appropriate alignment using the - // following expression: - // - // (unaligned offset + (align - 1)) & -align - - // Calculate offset. - let align_sub_1 = bx.sub(unsized_align, bx.cx().const_usize(1u64)); - let and_lhs = bx.add(unaligned_offset, align_sub_1); - let and_rhs = bx.neg(unsized_align); - let offset = bx.and(and_lhs, and_rhs); + // Bump the unaligned offset up to the appropriate alignment + let offset = round_up_const_value_to_alignment(bx, unaligned_offset, unsized_align); debug!("struct_field_ptr: DST field offset: {:?}", offset); @@ -518,3 +510,45 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { self.monomorphize(place_ty.ty) } } + +fn round_up_const_value_to_alignment<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>( + bx: &mut Bx, + value: Bx::Value, + align: Bx::Value, +) -> Bx::Value { + // In pseudo code: + // + // if value & (align - 1) == 0 { + // value + // } else { + // (value & !(align - 1)) + align + // } + // + // Usually this is written without branches as + // + // (value + align - 1) & !(align - 1) + // + // But this formula cannot take advantage of constant `value`. E.g. if `value` is known + // at compile time to be `1`, this expression should be optimized to `align`. However, + // optimization only holds if `align` is a power of two. Since the optimizer doesn't know + // that `align` is a power of two, it cannot perform this optimization. + // + // Instead we use + // + // value + (-value & (align - 1)) + // + // Since `align` is used only once, the expression can be optimized. For `value = 0` + // its optimized to `0` even in debug mode. + // + // NB: The previous version of this code used + // + // (value + align - 1) & -align + // + // Even though `-align == !(align - 1)`, LLVM failed to optimize this even for + // `value = 0`. Bug report: https://bugs.llvm.org/show_bug.cgi?id=48559 + let one = bx.const_usize(1); + let align_minus_1 = bx.sub(align, one); + let neg_value = bx.neg(value); + let offset = bx.and(neg_value, align_minus_1); + bx.add(value, offset) +} |
