about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMichael Woerister <michaelwoerister@posteo>2022-05-05 17:26:22 +0200
committerMichael Woerister <michaelwoerister@posteo>2022-05-18 12:19:01 +0200
commit6411fef3aba5ba54a02b54b171b4e9bc83687ce9 (patch)
tree9bd85eea74036c0f5b6dc32f074f4c72d105014e
parent9e7b0ff2e11fba83c5d87cf871e6531d94edb2e5 (diff)
downloadrust-6411fef3aba5ba54a02b54b171b4e9bc83687ce9.tar.gz
rust-6411fef3aba5ba54a02b54b171b4e9bc83687ce9.zip
Properly apply path prefix remapping paths emitted into debuginfo.
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs162
-rw-r--r--compiler/rustc_codegen_llvm/src/debuginfo/mod.rs4
-rw-r--r--compiler/rustc_metadata/src/rmeta/encoder.rs7
-rw-r--r--compiler/rustc_span/src/lib.rs6
-rw-r--r--compiler/rustc_span/src/source_map.rs2
-rw-r--r--src/bootstrap/dist.rs1
-rw-r--r--src/test/codegen/remap_path_prefix/main.rs2
-rw-r--r--src/test/run-make/remap-path-prefix-dwarf/Makefile77
-rw-r--r--src/test/run-make/remap-path-prefix-dwarf/src/quux.rs5
9 files changed, 193 insertions, 73 deletions
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
index f2cf3b1ef5c..97d3acb34ce 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs
@@ -36,20 +36,21 @@ use rustc_middle::ty::subst::GenericArgKind;
 use rustc_middle::ty::{self, AdtKind, Instance, ParamEnv, Ty, TyCtxt, COMMON_VTABLE_ENTRIES};
 use rustc_session::config::{self, DebugInfo};
 use rustc_span::symbol::Symbol;
+use rustc_span::FileName;
 use rustc_span::FileNameDisplayPreference;
-use rustc_span::{self, SourceFile, SourceFileHash};
+use rustc_span::{self, SourceFile};
 use rustc_target::abi::{Align, Size};
 use smallvec::smallvec;
 use tracing::debug;
 
 use libc::{c_longlong, c_uint};
 use std::borrow::Cow;
-use std::collections::hash_map::Entry;
 use std::fmt::{self, Write};
 use std::hash::{Hash, Hasher};
 use std::iter;
 use std::path::{Path, PathBuf};
 use std::ptr;
+use tracing::instrument;
 
 impl PartialEq for llvm::Metadata {
     fn eq(&self, other: &Self) -> bool {
@@ -527,78 +528,105 @@ fn hex_encode(data: &[u8]) -> String {
 }
 
 pub fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFile) -> &'ll DIFile {
-    debug!("file_metadata: file_name: {:?}", source_file.name);
-
-    let hash = Some(&source_file.src_hash);
-    let file_name = Some(source_file.name.prefer_remapped().to_string());
-    let directory = if source_file.is_real_file() && !source_file.is_imported() {
-        Some(
-            cx.sess()
-                .opts
-                .working_dir
-                .to_string_lossy(FileNameDisplayPreference::Remapped)
-                .to_string(),
-        )
-    } else {
-        // If the path comes from an upstream crate we assume it has been made
-        // independent of the compiler's working directory one way or another.
-        None
-    };
-    file_metadata_raw(cx, file_name, directory, hash)
-}
-
-pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
-    file_metadata_raw(cx, None, None, None)
-}
-
-fn file_metadata_raw<'ll>(
-    cx: &CodegenCx<'ll, '_>,
-    file_name: Option<String>,
-    directory: Option<String>,
-    hash: Option<&SourceFileHash>,
-) -> &'ll DIFile {
-    let key = (file_name, directory);
-
-    match debug_context(cx).created_files.borrow_mut().entry(key) {
-        Entry::Occupied(o) => o.get(),
-        Entry::Vacant(v) => {
-            let (file_name, directory) = v.key();
-            debug!("file_metadata: file_name: {:?}, directory: {:?}", file_name, directory);
-
-            let file_name = file_name.as_deref().unwrap_or("<unknown>");
-            let directory = directory.as_deref().unwrap_or("");
-
-            let (hash_kind, hash_value) = match hash {
-                Some(hash) => {
-                    let kind = match hash.kind {
-                        rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
-                        rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
-                        rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
-                    };
-                    (kind, hex_encode(hash.hash_bytes()))
+    let cache_key = Some((source_file.name_hash, source_file.src_hash));
+    return debug_context(cx)
+        .created_files
+        .borrow_mut()
+        .entry(cache_key)
+        .or_insert_with(|| alloc_new_file_metadata(cx, source_file));
+
+    #[instrument(skip(cx, source_file), level = "debug")]
+    fn alloc_new_file_metadata<'ll>(
+        cx: &CodegenCx<'ll, '_>,
+        source_file: &SourceFile,
+    ) -> &'ll DIFile {
+        debug!(?source_file.name);
+
+        let (directory, file_name) = match &source_file.name {
+            FileName::Real(filename) => {
+                let working_directory = &cx.sess().opts.working_dir;
+                debug!(?working_directory);
+
+                let filename = cx
+                    .sess()
+                    .source_map()
+                    .path_mapping()
+                    .to_embeddable_absolute_path(filename.clone(), working_directory);
+
+                // Construct the absolute path of the file
+                let abs_path = filename.remapped_path_if_available();
+                debug!(?abs_path);
+
+                if let Ok(rel_path) =
+                    abs_path.strip_prefix(working_directory.remapped_path_if_available())
+                {
+                    // If the compiler's working directory (which also is the DW_AT_comp_dir of
+                    // the compilation unit) is a prefix of the path we are about to emit, then
+                    // only emit the part relative to the working directory.
+                    // Because of path remapping we sometimes see strange things here: `abs_path`
+                    // might actually look like a relative path
+                    // (e.g. `<crate-name-and-version>/src/lib.rs`), so if we emit it without
+                    // taking the working directory into account, downstream tooling will
+                    // interpret it as `<working-directory>/<crate-name-and-version>/src/lib.rs`,
+                    // which makes no sense. Usually in such cases the working directory will also
+                    // be remapped to `<crate-name-and-version>` or some other prefix of the path
+                    // we are remapping, so we end up with
+                    // `<crate-name-and-version>/<crate-name-and-version>/src/lib.rs`.
+                    // By moving the working directory portion into the `directory` part of the
+                    // DIFile, we allow LLVM to emit just the relative path for DWARF, while
+                    // still emitting the correct absolute path for CodeView.
+                    (
+                        working_directory.to_string_lossy(FileNameDisplayPreference::Remapped),
+                        rel_path.to_string_lossy().into_owned(),
+                    )
+                } else {
+                    ("".into(), abs_path.to_string_lossy().into_owned())
                 }
-                None => (llvm::ChecksumKind::None, String::new()),
-            };
+            }
+            other => ("".into(), other.prefer_remapped().to_string_lossy().into_owned()),
+        };
 
-            let file_metadata = unsafe {
-                llvm::LLVMRustDIBuilderCreateFile(
-                    DIB(cx),
-                    file_name.as_ptr().cast(),
-                    file_name.len(),
-                    directory.as_ptr().cast(),
-                    directory.len(),
-                    hash_kind,
-                    hash_value.as_ptr().cast(),
-                    hash_value.len(),
-                )
-            };
+        let hash_kind = match source_file.src_hash.kind {
+            rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5,
+            rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1,
+            rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256,
+        };
+        let hash_value = hex_encode(source_file.src_hash.hash_bytes());
 
-            v.insert(file_metadata);
-            file_metadata
+        unsafe {
+            llvm::LLVMRustDIBuilderCreateFile(
+                DIB(cx),
+                file_name.as_ptr().cast(),
+                file_name.len(),
+                directory.as_ptr().cast(),
+                directory.len(),
+                hash_kind,
+                hash_value.as_ptr().cast(),
+                hash_value.len(),
+            )
         }
     }
 }
 
+pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile {
+    debug_context(cx).created_files.borrow_mut().entry(None).or_insert_with(|| unsafe {
+        let file_name = "<unknown>";
+        let directory = "";
+        let hash_value = "";
+
+        llvm::LLVMRustDIBuilderCreateFile(
+            DIB(cx),
+            file_name.as_ptr().cast(),
+            file_name.len(),
+            directory.as_ptr().cast(),
+            directory.len(),
+            llvm::ChecksumKind::None,
+            hash_value.as_ptr().cast(),
+            hash_value.len(),
+        )
+    })
+}
+
 trait MsvcBasicName {
     fn msvc_basic_name(self) -> &'static str;
 }
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index 6a164557a47..0910e7c94ea 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -31,7 +31,7 @@ use rustc_middle::ty::{self, Instance, ParamEnv, Ty, TypeFoldable};
 use rustc_session::config::{self, DebugInfo};
 use rustc_session::Session;
 use rustc_span::symbol::Symbol;
-use rustc_span::{self, BytePos, Pos, SourceFile, SourceFileAndLine, Span};
+use rustc_span::{self, BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span};
 use rustc_target::abi::Size;
 
 use libc::c_uint;
@@ -61,7 +61,7 @@ pub struct CodegenUnitDebugContext<'ll, 'tcx> {
     llcontext: &'ll llvm::Context,
     llmod: &'ll llvm::Module,
     builder: &'ll mut DIBuilder<'ll>,
-    created_files: RefCell<FxHashMap<(Option<String>, Option<String>), &'ll DIFile>>,
+    created_files: RefCell<FxHashMap<Option<(u128, SourceFileHash)>, &'ll DIFile>>,
 
     type_map: metadata::TypeMap<'ll, 'tcx>,
     namespace_map: RefCell<DefIdMap<&'ll DIScope>>,
diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs
index a382209e206..086f1bd94b6 100644
--- a/compiler/rustc_metadata/src/rmeta/encoder.rs
+++ b/compiler/rustc_metadata/src/rmeta/encoder.rs
@@ -500,6 +500,13 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
                 (!source_file.is_imported() || self.is_proc_macro)
             })
             .map(|(_, source_file)| {
+                // At export time we expand all source file paths to absolute paths because
+                // downstream compilation sessions can have a different compiler working
+                // directory, so relative paths from this or any other upstream crate
+                // won't be valid anymore.
+                //
+                // At this point we also erase the actual on-disk path and only keep
+                // the remapped version -- as is necessary for reproducible builds.
                 match source_file.name {
                     FileName::Real(ref original_file_name) => {
                         let adapted_file_name =
diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs
index 7357cebf62e..1f4578c08a3 100644
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
@@ -335,8 +335,8 @@ impl fmt::Display for FileNameDisplay<'_> {
     }
 }
 
-impl FileNameDisplay<'_> {
-    pub fn to_string_lossy(&self) -> Cow<'_, str> {
+impl<'a> FileNameDisplay<'a> {
+    pub fn to_string_lossy(&self) -> Cow<'a, str> {
         match self.inner {
             FileName::Real(ref inner) => inner.to_string_lossy(self.display_pref),
             _ => Cow::from(format!("{}", self)),
@@ -1153,7 +1153,7 @@ impl FromStr for SourceFileHashAlgorithm {
 }
 
 /// The hash of the on-disk source file used for debug info.
-#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
 #[derive(HashStable_Generic, Encodable, Decodable)]
 pub struct SourceFileHash {
     pub kind: SourceFileHashAlgorithm,
diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs
index 44aa9f72809..020ae3ad0c7 100644
--- a/compiler/rustc_span/src/source_map.rs
+++ b/compiler/rustc_span/src/source_map.rs
@@ -1099,6 +1099,8 @@ impl FilePathMapping {
     /// the path was affected by the mapping.
     pub fn map_prefix(&self, path: PathBuf) -> (PathBuf, bool) {
         if path.as_os_str().is_empty() {
+            // Exit early if the path is empty and therefore there's nothing to remap.
+            // This is mostly to reduce spam for `RUSTC_LOG=[remap_path_prefix]`.
             return (path, false);
         }
 
diff --git a/src/bootstrap/dist.rs b/src/bootstrap/dist.rs
index 6181a611ec3..16727f4398d 100644
--- a/src/bootstrap/dist.rs
+++ b/src/bootstrap/dist.rs
@@ -2047,6 +2047,7 @@ impl Step for RustDev {
             "llvm-cov",
             "llvm-dwp",
             "llvm-nm",
+            "llvm-dwarfdump",
         ] {
             tarball.add_file(src_bindir.join(exe(bin, target)), "bin", 0o755);
         }
diff --git a/src/test/codegen/remap_path_prefix/main.rs b/src/test/codegen/remap_path_prefix/main.rs
index b13d576295c..381f11ff1ef 100644
--- a/src/test/codegen/remap_path_prefix/main.rs
+++ b/src/test/codegen/remap_path_prefix/main.rs
@@ -22,7 +22,7 @@ fn main() {
 }
 
 // Here we check that local debuginfo is mapped correctly.
-// CHECK: !DIFile(filename: "/the/src/remap_path_prefix/main.rs", directory: "/the/cwd"
+// CHECK: !DIFile(filename: "/the/src/remap_path_prefix/main.rs", directory: ""
 
 // And here that debuginfo from other crates are expanded to absolute paths.
 // CHECK: !DIFile(filename: "/the/aux-src/remap_path_prefix_aux.rs", directory: ""
diff --git a/src/test/run-make/remap-path-prefix-dwarf/Makefile b/src/test/run-make/remap-path-prefix-dwarf/Makefile
new file mode 100644
index 00000000000..561a343d60b
--- /dev/null
+++ b/src/test/run-make/remap-path-prefix-dwarf/Makefile
@@ -0,0 +1,77 @@
+# This test makes sure that --remap-path-prefix has the expected effects on paths in debuginfo.
+# It tests several cases, each of them has a detailed description attached to it.
+
+# ignore-windows
+
+SRC_DIR := $(abspath .)
+SRC_DIR_PARENT := $(abspath ..)
+
+-include ../../run-make-fulldeps/tools.mk
+
+all: \
+  abs_input_outside_working_dir \
+  rel_input_remap_working_dir \
+  rel_input_remap_working_dir_parent \
+  rel_input_remap_working_dir_child \
+  abs_input_inside_working_dir \
+  abs_input_outside_working_dir
+
+# The compiler is called with an *ABSOLUTE PATH* as input, and that absolute path *is* within
+# the working directory of the compiler. We are remapping the path that contains `src`.
+abs_input_inside_working_dir:
+	# We explicitly switch to a directory that *is* a prefix of the directory our
+	# source code is contained in.
+	cd $(SRC_DIR) && $(RUSTC) $(SRC_DIR)/src/quux.rs -o "$(TMPDIR)/abs_input_inside_working_dir.rlib" -Cdebuginfo=2 --remap-path-prefix $(SRC_DIR)=REMAPPED
+	# We expect the path to the main source file to be remapped.
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump $(TMPDIR)/abs_input_inside_working_dir.rlib | $(CGREP) "REMAPPED/src/quux.rs"
+	# No weird duplication of remapped components (see #78479)
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump $(TMPDIR)/abs_input_inside_working_dir.rlib | $(CGREP) -v "REMAPPED/REMAPPED"
+
+# The compiler is called with an *ABSOLUTE PATH* as input, and that absolute path is *not* within
+# the working directory of the compiler. We are remapping both the path that contains `src` and
+# the working directory to the same thing. This setup corresponds to a workaround that is needed
+# when trying to remap everything to something that looks like a local path.
+# Relative paths are interpreted as relative to the compiler's working directory (e.g. in
+# debuginfo). If we also remap the working directory, the compiler strip it from other paths so
+# that the final outcome is the desired one again.
+abs_input_outside_working_dir:
+	# We explicitly switch to a directory that is *not* a prefix of the directory our
+	# source code is contained in.
+	cd $(TMPDIR) && $(RUSTC) $(SRC_DIR)/src/quux.rs -o "$(TMPDIR)/abs_input_outside_working_dir.rlib" -Cdebuginfo=2 --remap-path-prefix $(SRC_DIR)=REMAPPED --remap-path-prefix $(TMPDIR)=REMAPPED
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump $(TMPDIR)/abs_input_outside_working_dir.rlib | $(CGREP) "REMAPPED/src/quux.rs"
+	# No weird duplication of remapped components (see #78479)
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump $(TMPDIR)/abs_input_outside_working_dir.rlib | $(CGREP) -v "REMAPPED/REMAPPED"
+
+# The compiler is called with a *RELATIVE PATH* as input. We are remapping the working directory of
+# the compiler, which naturally is an implicit prefix of our relative input path. Debuginfo will
+# expand the relative path to an absolute path and we expect the working directory to be remapped
+# in that expansion.
+rel_input_remap_working_dir:
+	cd $(SRC_DIR) && $(RUSTC) src/quux.rs -o "$(TMPDIR)/rel_input_remap_working_dir.rlib" -Cdebuginfo=2 --remap-path-prefix "$(SRC_DIR)=REMAPPED"
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir.rlib" | $(CGREP) "REMAPPED/src/quux.rs"
+	# No weird duplication of remapped components (see #78479)
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir.rlib" | $(CGREP) -v "REMAPPED/REMAPPED"
+
+# The compiler is called with a *RELATIVE PATH* as input. We are remapping a *SUB-DIRECTORY* of the
+# compiler's working directory. This test makes sure that that directory is remapped even though it
+# won't actually show up in this form in the compiler's SourceMap and instead is only constructed
+# on demand during debuginfo generation.
+rel_input_remap_working_dir_child:
+	cd $(SRC_DIR) && $(RUSTC) src/quux.rs -o "$(TMPDIR)/rel_input_remap_working_dir_child.rlib" -Cdebuginfo=2 --remap-path-prefix "$(SRC_DIR)/src=REMAPPED"
+	# We expect `src/quux.rs` to have been remapped to `REMAPPED/quux.rs`.
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_child.rlib" | $(CGREP) "REMAPPED/quux.rs"
+	# We don't want to find the path that we just remapped anywhere in the DWARF
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_child.rlib" | $(CGREP) -v "$(SRC_DIR)/src"
+	# No weird duplication of remapped components (see #78479)
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_child.rlib" | $(CGREP) -v "REMAPPED/REMAPPED"
+
+# The compiler is called with a *RELATIVE PATH* as input. We are remapping a *PARENT DIRECTORY* of
+# the compiler's working directory.
+rel_input_remap_working_dir_parent:
+	cd $(SRC_DIR) && $(RUSTC) src/quux.rs -o "$(TMPDIR)/rel_input_remap_working_dir_parent.rlib" -Cdebuginfo=2 --remap-path-prefix "$(SRC_DIR_PARENT)=REMAPPED"
+	# We expect `src/quux.rs` to have been remapped to `REMAPPED/remap-path-prefix-dwarf/src/quux.rs`.
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_parent.rlib" | $(CGREP) "REMAPPED/remap-path-prefix-dwarf/src/quux.rs"
+	# We don't want to find the path that we just remapped anywhere in the DWARF
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_parent.rlib" | $(CGREP) -v "$(SRC_DIR_PARENT)"
+	# No weird duplication of remapped components (see #78479)
+	"$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_parent.rlib" | $(CGREP) -v "REMAPPED/REMAPPED"
diff --git a/src/test/run-make/remap-path-prefix-dwarf/src/quux.rs b/src/test/run-make/remap-path-prefix-dwarf/src/quux.rs
new file mode 100644
index 00000000000..38d5ef6194c
--- /dev/null
+++ b/src/test/run-make/remap-path-prefix-dwarf/src/quux.rs
@@ -0,0 +1,5 @@
+#![crate_type = "rlib"]
+
+pub fn foo() {
+    println!("foo");
+}