about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/common.rs22
-rw-r--r--src/intrinsic/mod.rs356
2 files changed, 216 insertions, 162 deletions
diff --git a/src/common.rs b/src/common.rs
index 709fa2a297b..bda08b653f0 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -302,6 +302,7 @@ pub trait SignType<'gcc, 'tcx> {
     fn is_signed(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
     fn is_unsigned(&self, cx: &CodegenCx<'gcc, 'tcx>) -> bool;
     fn to_signed(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc>;
+    fn to_unsigned(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc>;
 }
 
 impl<'gcc, 'tcx> SignType<'gcc, 'tcx> for Type<'gcc> {
@@ -333,6 +334,27 @@ impl<'gcc, 'tcx> SignType<'gcc, 'tcx> for Type<'gcc> {
             self.clone()
         }
     }
+
+    fn to_unsigned(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc> {
+        if self.is_i8(cx) {
+            cx.u8_type
+        }
+        else if self.is_i16(cx) {
+            cx.u16_type
+        }
+        else if self.is_i32(cx) {
+            cx.u32_type
+        }
+        else if self.is_i64(cx) {
+            cx.u64_type
+        }
+        else if self.is_i128(cx) {
+            cx.u128_type
+        }
+        else {
+            self.clone()
+        }
+    }
 }
 
 pub trait TypeReflection<'gcc, 'tcx>  {
diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs
index 3dc4f61a7ac..45978e14090 100644
--- a/src/intrinsic/mod.rs
+++ b/src/intrinsic/mod.rs
@@ -18,7 +18,7 @@ use rustc_target::spec::PanicStrategy;
 
 use crate::abi::GccType;
 use crate::builder::Builder;
-use crate::common::TypeReflection;
+use crate::common::{SignType, TypeReflection};
 use crate::context::CodegenCx;
 use crate::type_of::LayoutGccExt;
 use crate::intrinsic::simd::generic_simd_intrinsic;
@@ -520,163 +520,176 @@ fn int_type_width_signed<'gcc, 'tcx>(ty: Ty<'tcx>, cx: &CodegenCx<'gcc, 'tcx>) -
 
 impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     fn bit_reverse(&mut self, width: u64, value: RValue<'gcc>) -> RValue<'gcc> {
-        let typ = value.get_type();
+        let result_type = value.get_type();
+        let typ = result_type.to_unsigned(self.cx);
+
+        let value =
+            if result_type.is_signed(self.cx) {
+                self.context.new_bitcast(None, value, typ)
+            }
+            else {
+                value
+            };
+
         let context = &self.cx.context;
-        match width {
-            8 => {
-                // First step.
-                let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0));
-                let left = self.lshr(left, context.new_rvalue_from_int(typ, 4));
-                let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F));
-                let right = self.shl(right, context.new_rvalue_from_int(typ, 4));
-                let step1 = self.or(left, right);
-
-                // Second step.
-                let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC));
-                let left = self.lshr(left, context.new_rvalue_from_int(typ, 2));
-                let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33));
-                let right = self.shl(right, context.new_rvalue_from_int(typ, 2));
-                let step2 = self.or(left, right);
-
-                // Third step.
-                let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA));
-                let left = self.lshr(left, context.new_rvalue_from_int(typ, 1));
-                let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55));
-                let right = self.shl(right, context.new_rvalue_from_int(typ, 1));
-                let step3 = self.or(left, right);
-
-                step3
-            },
-            16 => {
-                // First step.
-                let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555));
-                let left = self.shl(left, context.new_rvalue_from_int(typ, 1));
-                let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA));
-                let right = self.lshr(right, context.new_rvalue_from_int(typ, 1));
-                let step1 = self.or(left, right);
-
-                // Second step.
-                let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333));
-                let left = self.shl(left, context.new_rvalue_from_int(typ, 2));
-                let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC));
-                let right = self.lshr(right, context.new_rvalue_from_int(typ, 2));
-                let step2 = self.or(left, right);
-
-                // Third step.
-                let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F));
-                let left = self.shl(left, context.new_rvalue_from_int(typ, 4));
-                let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0));
-                let right = self.lshr(right, context.new_rvalue_from_int(typ, 4));
-                let step3 = self.or(left, right);
-
-                // Fourth step.
-                let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF));
-                let left = self.shl(left, context.new_rvalue_from_int(typ, 8));
-                let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00));
-                let right = self.lshr(right, context.new_rvalue_from_int(typ, 8));
-                let step4 = self.or(left, right);
-
-                step4
-            },
-            32 => {
-                // TODO(antoyo): Refactor with other implementations.
-                // First step.
-                let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555));
-                let left = self.shl(left, context.new_rvalue_from_long(typ, 1));
-                let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA));
-                let right = self.lshr(right, context.new_rvalue_from_long(typ, 1));
-                let step1 = self.or(left, right);
-
-                // Second step.
-                let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333));
-                let left = self.shl(left, context.new_rvalue_from_long(typ, 2));
-                let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC));
-                let right = self.lshr(right, context.new_rvalue_from_long(typ, 2));
-                let step2 = self.or(left, right);
-
-                // Third step.
-                let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F));
-                let left = self.shl(left, context.new_rvalue_from_long(typ, 4));
-                let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0));
-                let right = self.lshr(right, context.new_rvalue_from_long(typ, 4));
-                let step3 = self.or(left, right);
-
-                // Fourth step.
-                let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF));
-                let left = self.shl(left, context.new_rvalue_from_long(typ, 8));
-                let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00));
-                let right = self.lshr(right, context.new_rvalue_from_long(typ, 8));
-                let step4 = self.or(left, right);
-
-                // Fifth step.
-                let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF));
-                let left = self.shl(left, context.new_rvalue_from_long(typ, 16));
-                let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000));
-                let right = self.lshr(right, context.new_rvalue_from_long(typ, 16));
-                let step5 = self.or(left, right);
-
-                step5
-            },
-            64 => {
-                // First step.
-                let left = self.shl(value, context.new_rvalue_from_long(typ, 32));
-                let right = self.lshr(value, context.new_rvalue_from_long(typ, 32));
-                let step1 = self.or(left, right);
-
-                // Second step.
-                let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF));
-                let left = self.shl(left, context.new_rvalue_from_long(typ, 15));
-                let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead?
-                let right = self.lshr(right, context.new_rvalue_from_long(typ, 17));
-                let step2 = self.or(left, right);
-
-                // Third step.
-                let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10));
-                let left = self.xor(step2, left);
-                let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F));
-
-                let left = self.shl(temp, context.new_rvalue_from_long(typ, 10));
-                let left = self.or(temp, left);
-                let step3 = self.xor(left, step2);
-
-                // Fourth step.
-                let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4));
-                let left = self.xor(step3, left);
-                let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421));
-
-                let left = self.shl(temp, context.new_rvalue_from_long(typ, 4));
-                let left = self.or(temp, left);
-                let step4 = self.xor(left, step3);
-
-                // Fifth step.
-                let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2));
-                let left = self.xor(step4, left);
-                let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842));
-
-                let left = self.shl(temp, context.new_rvalue_from_long(typ, 2));
-                let left = self.or(temp, left);
-                let step5 = self.xor(left, step4);
-
-                step5
-            },
-            128 => {
-                // TODO(antoyo): find a more efficient implementation?
-                let sixty_four = self.context.new_rvalue_from_long(typ, 64);
-                let high = self.context.new_cast(None, value >> sixty_four, self.u64_type);
-                let low = self.context.new_cast(None, value, self.u64_type);
+        let result =
+            match width {
+                8 => {
+                    // First step.
+                    let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0));
+                    let left = self.lshr(left, context.new_rvalue_from_int(typ, 4));
+                    let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F));
+                    let right = self.shl(right, context.new_rvalue_from_int(typ, 4));
+                    let step1 = self.or(left, right);
+
+                    // Second step.
+                    let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC));
+                    let left = self.lshr(left, context.new_rvalue_from_int(typ, 2));
+                    let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33));
+                    let right = self.shl(right, context.new_rvalue_from_int(typ, 2));
+                    let step2 = self.or(left, right);
+
+                    // Third step.
+                    let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA));
+                    let left = self.lshr(left, context.new_rvalue_from_int(typ, 1));
+                    let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55));
+                    let right = self.shl(right, context.new_rvalue_from_int(typ, 1));
+                    let step3 = self.or(left, right);
+
+                    step3
+                },
+                16 => {
+                    // First step.
+                    let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555));
+                    let left = self.shl(left, context.new_rvalue_from_int(typ, 1));
+                    let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA));
+                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 1));
+                    let step1 = self.or(left, right);
+
+                    // Second step.
+                    let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333));
+                    let left = self.shl(left, context.new_rvalue_from_int(typ, 2));
+                    let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC));
+                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 2));
+                    let step2 = self.or(left, right);
+
+                    // Third step.
+                    let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F));
+                    let left = self.shl(left, context.new_rvalue_from_int(typ, 4));
+                    let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0));
+                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 4));
+                    let step3 = self.or(left, right);
+
+                    // Fourth step.
+                    let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF));
+                    let left = self.shl(left, context.new_rvalue_from_int(typ, 8));
+                    let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00));
+                    let right = self.lshr(right, context.new_rvalue_from_int(typ, 8));
+                    let step4 = self.or(left, right);
+
+                    step4
+                },
+                32 => {
+                    // TODO(antoyo): Refactor with other implementations.
+                    // First step.
+                    let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555));
+                    let left = self.shl(left, context.new_rvalue_from_long(typ, 1));
+                    let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA));
+                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 1));
+                    let step1 = self.or(left, right);
+
+                    // Second step.
+                    let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333));
+                    let left = self.shl(left, context.new_rvalue_from_long(typ, 2));
+                    let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC));
+                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 2));
+                    let step2 = self.or(left, right);
+
+                    // Third step.
+                    let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F));
+                    let left = self.shl(left, context.new_rvalue_from_long(typ, 4));
+                    let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0));
+                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 4));
+                    let step3 = self.or(left, right);
+
+                    // Fourth step.
+                    let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF));
+                    let left = self.shl(left, context.new_rvalue_from_long(typ, 8));
+                    let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00));
+                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 8));
+                    let step4 = self.or(left, right);
+
+                    // Fifth step.
+                    let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF));
+                    let left = self.shl(left, context.new_rvalue_from_long(typ, 16));
+                    let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000));
+                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 16));
+                    let step5 = self.or(left, right);
+
+                    step5
+                },
+                64 => {
+                    // First step.
+                    let left = self.shl(value, context.new_rvalue_from_long(typ, 32));
+                    let right = self.lshr(value, context.new_rvalue_from_long(typ, 32));
+                    let step1 = self.or(left, right);
+
+                    // Second step.
+                    let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF));
+                    let left = self.shl(left, context.new_rvalue_from_long(typ, 15));
+                    let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead?
+                    let right = self.lshr(right, context.new_rvalue_from_long(typ, 17));
+                    let step2 = self.or(left, right);
+
+                    // Third step.
+                    let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10));
+                    let left = self.xor(step2, left);
+                    let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F));
+
+                    let left = self.shl(temp, context.new_rvalue_from_long(typ, 10));
+                    let left = self.or(temp, left);
+                    let step3 = self.xor(left, step2);
+
+                    // Fourth step.
+                    let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4));
+                    let left = self.xor(step3, left);
+                    let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421));
+
+                    let left = self.shl(temp, context.new_rvalue_from_long(typ, 4));
+                    let left = self.or(temp, left);
+                    let step4 = self.xor(left, step3);
+
+                    // Fifth step.
+                    let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2));
+                    let left = self.xor(step4, left);
+                    let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842));
+
+                    let left = self.shl(temp, context.new_rvalue_from_long(typ, 2));
+                    let left = self.or(temp, left);
+                    let step5 = self.xor(left, step4);
+
+                    step5
+                },
+                128 => {
+                    // TODO(antoyo): find a more efficient implementation?
+                    let sixty_four = self.context.new_rvalue_from_long(typ, 64);
+                    let high = self.context.new_cast(None, value >> sixty_four, self.u64_type);
+                    let low = self.context.new_cast(None, value, self.u64_type);
 
-                let reversed_high = self.bit_reverse(64, high);
-                let reversed_low = self.bit_reverse(64, low);
+                    let reversed_high = self.bit_reverse(64, high);
+                    let reversed_low = self.bit_reverse(64, low);
 
-                let new_low = self.context.new_cast(None, reversed_high, typ);
-                let new_high = self.context.new_cast(None, reversed_low, typ) << sixty_four;
+                    let new_low = self.context.new_cast(None, reversed_high, typ);
+                    let new_high = self.context.new_cast(None, reversed_low, typ) << sixty_four;
 
-                new_low | new_high
-            },
-            _ => {
-                panic!("cannot bit reverse with width = {}", width);
-            },
-        }
+                    new_low | new_high
+                },
+                _ => {
+                    panic!("cannot bit reverse with width = {}", width);
+                },
+            };
+
+        self.context.new_bitcast(None, result, result_type)
     }
 
     fn count_leading_zeroes(&self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
@@ -746,6 +759,15 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     }
 
     fn count_trailing_zeroes(&self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
+        let result_type = arg.get_type();
+        let arg =
+            if result_type.is_signed(self.cx) {
+                let new_type = result_type.to_unsigned(self.cx);
+                self.context.new_bitcast(None, arg, new_type)
+            }
+            else {
+                arg
+            };
         let arg_type = arg.get_type();
         let (count_trailing_zeroes, expected_type) =
             if arg_type.is_uchar(&self.cx) || arg_type.is_ushort(&self.cx) || arg_type.is_uint(&self.cx) {
@@ -796,7 +818,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
                 let res = self.context.new_array_access(None, result, index);
 
-                return self.context.new_cast(None, res, arg_type);
+                return self.context.new_bitcast(None, res, result_type);
             }
             else {
                 unimplemented!("count_trailing_zeroes for {:?}", arg_type);
@@ -810,7 +832,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                 arg
             };
         let res = self.context.new_call(None, count_trailing_zeroes, &[arg]);
-        self.context.new_cast(None, res, arg_type)
+        self.context.new_bitcast(None, res, result_type)
     }
 
     fn int_width(&self, typ: Type<'gcc>) -> i64 {
@@ -819,7 +841,16 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
     fn pop_count(&self, value: RValue<'gcc>) -> RValue<'gcc> {
         // TODO(antoyo): use the optimized version with fewer operations.
-        let value_type = value.get_type();
+        let result_type = value.get_type();
+        let value_type = result_type.to_unsigned(self.cx);
+
+        let value =
+            if result_type.is_signed(self.cx) {
+                self.context.new_bitcast(None, value, value_type)
+            }
+            else {
+                value
+            };
 
         if value_type.is_u128(&self.cx) {
             // TODO(antoyo): implement in the normal algorithm below to have a more efficient
@@ -830,7 +861,8 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
             let high = self.context.new_call(None, popcount, &[high]);
             let low = self.context.new_cast(None, value, self.cx.ulonglong_type);
             let low = self.context.new_call(None, popcount, &[low]);
-            return high + low;
+            let res = high + low;
+            return self.context.new_bitcast(None, res, result_type);
         }
 
         // First step.
@@ -855,7 +887,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let value = left + right;
 
         if value_type.is_u8(&self.cx) {
-            return value;
+            return self.context.new_bitcast(None, value, result_type);
         }
 
         // Fourth step.
@@ -866,7 +898,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let value = left + right;
 
         if value_type.is_u16(&self.cx) {
-            return value;
+            return self.context.new_bitcast(None, value, result_type);
         }
 
         // Fifth step.
@@ -877,7 +909,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let value = left + right;
 
         if value_type.is_u32(&self.cx) {
-            return value;
+            return self.context.new_bitcast(None, value, result_type);
         }
 
         // Sixth step.
@@ -887,7 +919,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let right = shifted & mask;
         let value = left + right;
 
-        value
+        self.context.new_bitcast(None, value, result_type)
     }
 
     // Algorithm from: https://blog.regehr.org/archives/1063