about summary refs log tree commit diff
diff options
context:
space:
mode:
authorh1467792822 <1467792822@qq.com>2023-12-05 12:42:57 +0800
committerh1467792822 <1467792822@qq.com>2024-01-26 12:39:03 +0800
commit6e53e66bd3b700b873ba93b30fac7c065954d46f (patch)
treef74e2620e2de535b5b14cdea37fe4cb975f98e54
parent7ffc697ce10f19447c0ce338428ae4b9bc0c041c (diff)
downloadrust-6e53e66bd3b700b873ba93b30fac7c065954d46f.tar.gz
rust-6e53e66bd3b700b873ba93b30fac7c065954d46f.zip
MCP #705: Provide the option `-Csymbol-mangling-version=hashed -Z unstable-options` to shorten symbol names by replacing them with a digest.
Enrich test cases
-rw-r--r--compiler/rustc_session/src/config.rs15
-rw-r--r--compiler/rustc_session/src/options.rs6
-rw-r--r--compiler/rustc_symbol_mangling/src/hashed.rs43
-rw-r--r--compiler/rustc_symbol_mangling/src/lib.rs4
-rw-r--r--compiler/rustc_symbol_mangling/src/v0.rs104
-rw-r--r--tests/run-make/symbol-mangling-hashed/Makefile48
-rw-r--r--tests/run-make/symbol-mangling-hashed/a_dylib.rs4
-rw-r--r--tests/run-make/symbol-mangling-hashed/a_rlib.rs5
-rw-r--r--tests/run-make/symbol-mangling-hashed/b_bin.rs9
-rw-r--r--tests/run-make/symbol-mangling-hashed/b_dylib.rs9
-rw-r--r--tests/ui/symbol-mangling-version/bad-value.bad.stderr2
-rw-r--r--tests/ui/symbol-mangling-version/bad-value.blank.stderr2
-rw-r--r--tests/ui/symbol-mangling-version/bad-value.no-value.stderr2
-rw-r--r--tests/ui/symbol-mangling-version/unstable.hashed.stderr2
-rw-r--r--tests/ui/symbol-mangling-version/unstable.rs5
15 files changed, 211 insertions, 49 deletions
diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs
index e751ff13a34..2ec1a726cef 100644
--- a/compiler/rustc_session/src/config.rs
+++ b/compiler/rustc_session/src/config.rs
@@ -347,6 +347,7 @@ impl SwitchWithOptPath {
 pub enum SymbolManglingVersion {
     Legacy,
     V0,
+    Hashed,
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Hash)]
@@ -2692,6 +2693,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
     match cg.symbol_mangling_version {
         // Stable values:
         None | Some(SymbolManglingVersion::V0) => {}
+
         // Unstable values:
         Some(SymbolManglingVersion::Legacy) => {
             if !unstable_opts.unstable_options {
@@ -2700,6 +2702,13 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
                 );
             }
         }
+        Some(SymbolManglingVersion::Hashed) => {
+            if !unstable_opts.unstable_options {
+                early_dcx.early_fatal(
+                    "`-C symbol-mangling-version=hashed` requires `-Z unstable-options`",
+                );
+            }
+        }
     }
 
     // Check for unstable values of `-C instrument-coverage`.
@@ -2741,6 +2750,12 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
                 );
             }
             Some(SymbolManglingVersion::V0) => {}
+            Some(SymbolManglingVersion::Hashed) => {
+                early_dcx.early_warn(
+                    "-C instrument-coverage requires symbol mangling version `v0`, \
+                    but `-C symbol-mangling-version=hashed` was specified",
+                );
+            }
         }
     }
 
diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs
index 486b6d4bf2e..d8d201d5f24 100644
--- a/compiler/rustc_session/src/options.rs
+++ b/compiler/rustc_session/src/options.rs
@@ -407,7 +407,8 @@ mod desc {
     pub const parse_switch_with_opt_path: &str =
         "an optional path to the profiling data output directory";
     pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`";
-    pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)";
+    pub const parse_symbol_mangling_version: &str =
+        "one of: `legacy`, `v0` (RFC 2603), or `hashed`";
     pub const parse_src_file_hash: &str = "either `md5` or `sha1`";
     pub const parse_relocation_model: &str =
         "one of supported relocation models (`rustc --print relocation-models`)";
@@ -1180,6 +1181,7 @@ mod parse {
         *slot = match v {
             Some("legacy") => Some(SymbolManglingVersion::Legacy),
             Some("v0") => Some(SymbolManglingVersion::V0),
+            Some("hashed") => Some(SymbolManglingVersion::Hashed),
             _ => return false,
         };
         true
@@ -1504,7 +1506,7 @@ options! {
         "tell the linker which information to strip (`none` (default), `debuginfo` or `symbols`)"),
     symbol_mangling_version: Option<SymbolManglingVersion> = (None,
         parse_symbol_mangling_version, [TRACKED],
-        "which mangling version to use for symbol names ('legacy' (default) or 'v0')"),
+        "which mangling version to use for symbol names ('legacy' (default), 'v0', or 'hashed')"),
     target_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
         "select target processor (`rustc --print target-cpus` for details)"),
     target_feature: String = (String::new(), parse_target_feature, [TRACKED],
diff --git a/compiler/rustc_symbol_mangling/src/hashed.rs b/compiler/rustc_symbol_mangling/src/hashed.rs
new file mode 100644
index 00000000000..d4cd6161ac0
--- /dev/null
+++ b/compiler/rustc_symbol_mangling/src/hashed.rs
@@ -0,0 +1,43 @@
+use crate::v0;
+use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
+use rustc_hir::def_id::CrateNum;
+use rustc_middle::ty::{Instance, TyCtxt};
+
+use std::fmt::Write;
+
+pub(super) fn mangle<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    instance: Instance<'tcx>,
+    instantiating_crate: Option<CrateNum>,
+    full_mangling_name: impl FnOnce() -> String,
+) -> String {
+    // The symbol of a generic function may be scattered in multiple downstream dylibs.
+    // If the symbol of a generic function still contains `crate name`, hash conflicts between the
+    // generic funcion and other symbols of the same `crate` cannot be detected in time during
+    // construction. This symbol conflict is left over until it occurs during run time.
+    // In this case, `instantiating-crate name` is used to replace `crate name` can completely
+    // eliminate the risk of the preceding potential hash conflict.
+    let crate_num =
+        if let Some(krate) = instantiating_crate { krate } else { instance.def_id().krate };
+
+    let mut symbol = "_RNxC".to_string();
+    v0::push_ident(tcx.crate_name(crate_num).as_str(), &mut symbol);
+
+    let hash = tcx.with_stable_hashing_context(|mut hcx| {
+        let mut hasher = StableHasher::new();
+        full_mangling_name().hash_stable(&mut hcx, &mut hasher);
+        hasher.finish::<Hash64>().as_u64()
+    });
+
+    push_hash64(hash, &mut symbol);
+
+    symbol
+}
+
+// The hash is encoded based on `base-62` and the final terminator `_` is removed because it does
+// not help prevent hash collisions
+fn push_hash64(hash: u64, output: &mut String) {
+    let hash = v0::encode_integer_62(hash);
+    let hash_len = hash.len();
+    let _ = write!(output, "{hash_len}H{}", &hash[..hash_len - 1]);
+}
diff --git a/compiler/rustc_symbol_mangling/src/lib.rs b/compiler/rustc_symbol_mangling/src/lib.rs
index 8c035ba948b..bf4ea0003ab 100644
--- a/compiler/rustc_symbol_mangling/src/lib.rs
+++ b/compiler/rustc_symbol_mangling/src/lib.rs
@@ -111,6 +111,7 @@ use rustc_middle::query::Providers;
 use rustc_middle::ty::{self, Instance, TyCtxt};
 use rustc_session::config::SymbolManglingVersion;
 
+mod hashed;
 mod legacy;
 mod v0;
 
@@ -265,6 +266,9 @@ fn compute_symbol_name<'tcx>(
     let symbol = match mangling_version {
         SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate),
         SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate),
+        SymbolManglingVersion::Hashed => hashed::mangle(tcx, instance, instantiating_crate, || {
+            v0::mangle(tcx, instance, instantiating_crate)
+        }),
     };
 
     debug_assert!(
diff --git a/compiler/rustc_symbol_mangling/src/v0.rs b/compiler/rustc_symbol_mangling/src/v0.rs
index e89a640767f..16ebda55a7a 100644
--- a/compiler/rustc_symbol_mangling/src/v0.rs
+++ b/compiler/rustc_symbol_mangling/src/v0.rs
@@ -116,10 +116,7 @@ impl<'tcx> SymbolMangler<'tcx> {
     /// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
     ///   e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
     fn push_integer_62(&mut self, x: u64) {
-        if let Some(x) = x.checked_sub(1) {
-            base_n::push_str(x as u128, 62, &mut self.out);
-        }
-        self.push("_");
+        push_integer_62(x, &mut self.out)
     }
 
     /// Push a `tag`-prefixed base 62 integer, when larger than `0`, that is:
@@ -138,45 +135,7 @@ impl<'tcx> SymbolMangler<'tcx> {
     }
 
     fn push_ident(&mut self, ident: &str) {
-        let mut use_punycode = false;
-        for b in ident.bytes() {
-            match b {
-                b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
-                0x80..=0xff => use_punycode = true,
-                _ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
-            }
-        }
-
-        let punycode_string;
-        let ident = if use_punycode {
-            self.push("u");
-
-            // FIXME(eddyb) we should probably roll our own punycode implementation.
-            let mut punycode_bytes = match punycode::encode(ident) {
-                Ok(s) => s.into_bytes(),
-                Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
-            };
-
-            // Replace `-` with `_`.
-            if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
-                *c = b'_';
-            }
-
-            // FIXME(eddyb) avoid rechecking UTF-8 validity.
-            punycode_string = String::from_utf8(punycode_bytes).unwrap();
-            &punycode_string
-        } else {
-            ident
-        };
-
-        let _ = write!(self.out, "{}", ident.len());
-
-        // Write a separating `_` if necessary (leading digit or `_`).
-        if let Some('_' | '0'..='9') = ident.chars().next() {
-            self.push("_");
-        }
-
-        self.push(ident);
+        push_ident(ident, &mut self.out)
     }
 
     fn path_append_ns(
@@ -836,3 +795,62 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
         Ok(())
     }
 }
+/// Push a `_`-terminated base 62 integer, using the format
+/// specified in the RFC as `<base-62-number>`, that is:
+/// * `x = 0` is encoded as just the `"_"` terminator
+/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
+///   e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
+pub(crate) fn push_integer_62(x: u64, output: &mut String) {
+    if let Some(x) = x.checked_sub(1) {
+        base_n::push_str(x as u128, 62, output);
+    }
+    output.push('_');
+}
+
+pub(crate) fn encode_integer_62(x: u64) -> String {
+    let mut output = String::new();
+    push_integer_62(x, &mut output);
+    output
+}
+
+pub(crate) fn push_ident(ident: &str, output: &mut String) {
+    let mut use_punycode = false;
+    for b in ident.bytes() {
+        match b {
+            b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
+            0x80..=0xff => use_punycode = true,
+            _ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
+        }
+    }
+
+    let punycode_string;
+    let ident = if use_punycode {
+        output.push('u');
+
+        // FIXME(eddyb) we should probably roll our own punycode implementation.
+        let mut punycode_bytes = match punycode::encode(ident) {
+            Ok(s) => s.into_bytes(),
+            Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
+        };
+
+        // Replace `-` with `_`.
+        if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
+            *c = b'_';
+        }
+
+        // FIXME(eddyb) avoid rechecking UTF-8 validity.
+        punycode_string = String::from_utf8(punycode_bytes).unwrap();
+        &punycode_string
+    } else {
+        ident
+    };
+
+    let _ = write!(output, "{}", ident.len());
+
+    // Write a separating `_` if necessary (leading digit or `_`).
+    if let Some('_' | '0'..='9') = ident.chars().next() {
+        output.push('_');
+    }
+
+    output.push_str(ident);
+}
diff --git a/tests/run-make/symbol-mangling-hashed/Makefile b/tests/run-make/symbol-mangling-hashed/Makefile
new file mode 100644
index 00000000000..68894b2192a
--- /dev/null
+++ b/tests/run-make/symbol-mangling-hashed/Makefile
@@ -0,0 +1,48 @@
+include ../tools.mk
+
+# ignore-cross-compile
+# only-linux
+# only-x86_64
+
+NM=nm -D
+RLIB_NAME=liba_rlib.rlib
+DYLIB_NAME=liba_dylib.so
+SO_NAME=libb_dylib.so
+BIN_NAME=b_bin
+
+ifeq ($(UNAME),Darwin)
+NM=nm -gU
+RLIB_NAME=liba_rlib.rlib
+DYLIB_NAME=liba_dylib.dylib
+SO_NAME=libb_dylib.dylib
+BIN_NAME=b_bin
+endif
+
+ifdef IS_WINDOWS
+NM=nm -g
+RLIB_NAME=liba_rlib.dll.a
+DYLIB_NAME=liba_dylib.dll
+SO_NAME=libb_dylib.dll
+BIN_NAME=b_bin.exe
+endif
+
+all:
+	$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=foo a_dylib.rs
+	$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=bar a_rlib.rs
+	$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_dylib.rs
+	$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_bin.rs
+
+    # Check hashed symbol name
+
+	[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep -c hello)" -eq "0" ]
+	[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep _RNxC7a_dylib | grep -c ' T ')" -eq "1" ]
+
+	[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep b_dylib | grep -c hello)" -eq "1" ]
+	[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC6a_rlib | grep -c ' T ')" -eq "1" ]
+	[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]
+
+	[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC6a_rlib | grep -c ' U ')" -eq "1" ]
+	[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]
+	[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep b_dylib | grep hello | grep -c ' U ')" -eq "1" ]
+
+	$(call RUN,$(BIN_NAME))
diff --git a/tests/run-make/symbol-mangling-hashed/a_dylib.rs b/tests/run-make/symbol-mangling-hashed/a_dylib.rs
new file mode 100644
index 00000000000..8aec8fd82a5
--- /dev/null
+++ b/tests/run-make/symbol-mangling-hashed/a_dylib.rs
@@ -0,0 +1,4 @@
+#![crate_type="dylib"]
+pub fn hello() {
+    println!("hello dylib");
+}
diff --git a/tests/run-make/symbol-mangling-hashed/a_rlib.rs b/tests/run-make/symbol-mangling-hashed/a_rlib.rs
new file mode 100644
index 00000000000..873c86c5d0b
--- /dev/null
+++ b/tests/run-make/symbol-mangling-hashed/a_rlib.rs
@@ -0,0 +1,5 @@
+#![crate_type="rlib"]
+
+pub fn hello() {
+    println!("hello rlib");
+}
diff --git a/tests/run-make/symbol-mangling-hashed/b_bin.rs b/tests/run-make/symbol-mangling-hashed/b_bin.rs
new file mode 100644
index 00000000000..bcc53c37e12
--- /dev/null
+++ b/tests/run-make/symbol-mangling-hashed/b_bin.rs
@@ -0,0 +1,9 @@
+extern crate a_rlib;
+extern crate a_dylib;
+extern crate b_dylib;
+
+fn main() {
+    a_rlib::hello();
+    a_dylib::hello();
+    b_dylib::hello();
+}
diff --git a/tests/run-make/symbol-mangling-hashed/b_dylib.rs b/tests/run-make/symbol-mangling-hashed/b_dylib.rs
new file mode 100644
index 00000000000..c26a04b39ec
--- /dev/null
+++ b/tests/run-make/symbol-mangling-hashed/b_dylib.rs
@@ -0,0 +1,9 @@
+#![crate_type="dylib"]
+
+extern crate a_rlib;
+extern crate a_dylib;
+
+pub fn hello() {
+    a_rlib::hello();
+    a_dylib::hello();
+}
diff --git a/tests/ui/symbol-mangling-version/bad-value.bad.stderr b/tests/ui/symbol-mangling-version/bad-value.bad.stderr
index c36c73c6069..a12e5e241c0 100644
--- a/tests/ui/symbol-mangling-version/bad-value.bad.stderr
+++ b/tests/ui/symbol-mangling-version/bad-value.bad.stderr
@@ -1,2 +1,2 @@
-error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
+error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected
 
diff --git a/tests/ui/symbol-mangling-version/bad-value.blank.stderr b/tests/ui/symbol-mangling-version/bad-value.blank.stderr
index 0e70af5b8ff..95456587781 100644
--- a/tests/ui/symbol-mangling-version/bad-value.blank.stderr
+++ b/tests/ui/symbol-mangling-version/bad-value.blank.stderr
@@ -1,2 +1,2 @@
-error: incorrect value `` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
+error: incorrect value `` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected
 
diff --git a/tests/ui/symbol-mangling-version/bad-value.no-value.stderr b/tests/ui/symbol-mangling-version/bad-value.no-value.stderr
index 77013b72b6c..325e47a281f 100644
--- a/tests/ui/symbol-mangling-version/bad-value.no-value.stderr
+++ b/tests/ui/symbol-mangling-version/bad-value.no-value.stderr
@@ -1,2 +1,2 @@
-error: codegen option `symbol-mangling-version` requires either `legacy` or `v0` (RFC 2603) (C symbol-mangling-version=<value>)
+error: codegen option `symbol-mangling-version` requires one of: `legacy`, `v0` (RFC 2603), or `hashed` (C symbol-mangling-version=<value>)
 
diff --git a/tests/ui/symbol-mangling-version/unstable.hashed.stderr b/tests/ui/symbol-mangling-version/unstable.hashed.stderr
new file mode 100644
index 00000000000..f2ae18290f2
--- /dev/null
+++ b/tests/ui/symbol-mangling-version/unstable.hashed.stderr
@@ -0,0 +1,2 @@
+error: `-C symbol-mangling-version=hashed` requires `-Z unstable-options`
+
diff --git a/tests/ui/symbol-mangling-version/unstable.rs b/tests/ui/symbol-mangling-version/unstable.rs
index df87a39cdfb..42750a64574 100644
--- a/tests/ui/symbol-mangling-version/unstable.rs
+++ b/tests/ui/symbol-mangling-version/unstable.rs
@@ -1,6 +1,9 @@
-// revisions: legacy legacy-ok
+// revisions: legacy legacy-ok hashed hashed-ok
 // [legacy] compile-flags: -Csymbol-mangling-version=legacy
 // [legacy-ok] check-pass
 // [legacy-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy
+// [hashed] compile-flags: -Csymbol-mangling-version=hashed
+// [hashed-ok] check-pass
+// [hashed-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=hashed
 
 fn main() {}