diff options
| author | Michael Woerister <michaelwoerister@posteo> | 2022-05-05 17:26:22 +0200 |
|---|---|---|
| committer | Michael Woerister <michaelwoerister@posteo> | 2022-05-18 12:19:01 +0200 |
| commit | 6411fef3aba5ba54a02b54b171b4e9bc83687ce9 (patch) | |
| tree | 9bd85eea74036c0f5b6dc32f074f4c72d105014e | |
| parent | 9e7b0ff2e11fba83c5d87cf871e6531d94edb2e5 (diff) | |
| download | rust-6411fef3aba5ba54a02b54b171b4e9bc83687ce9.tar.gz rust-6411fef3aba5ba54a02b54b171b4e9bc83687ce9.zip | |
Properly apply path prefix remapping paths emitted into debuginfo.
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs | 162 | ||||
| -rw-r--r-- | compiler/rustc_codegen_llvm/src/debuginfo/mod.rs | 4 | ||||
| -rw-r--r-- | compiler/rustc_metadata/src/rmeta/encoder.rs | 7 | ||||
| -rw-r--r-- | compiler/rustc_span/src/lib.rs | 6 | ||||
| -rw-r--r-- | compiler/rustc_span/src/source_map.rs | 2 | ||||
| -rw-r--r-- | src/bootstrap/dist.rs | 1 | ||||
| -rw-r--r-- | src/test/codegen/remap_path_prefix/main.rs | 2 | ||||
| -rw-r--r-- | src/test/run-make/remap-path-prefix-dwarf/Makefile | 77 | ||||
| -rw-r--r-- | src/test/run-make/remap-path-prefix-dwarf/src/quux.rs | 5 |
9 files changed, 193 insertions, 73 deletions
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs index f2cf3b1ef5c..97d3acb34ce 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/metadata.rs @@ -36,20 +36,21 @@ use rustc_middle::ty::subst::GenericArgKind; use rustc_middle::ty::{self, AdtKind, Instance, ParamEnv, Ty, TyCtxt, COMMON_VTABLE_ENTRIES}; use rustc_session::config::{self, DebugInfo}; use rustc_span::symbol::Symbol; +use rustc_span::FileName; use rustc_span::FileNameDisplayPreference; -use rustc_span::{self, SourceFile, SourceFileHash}; +use rustc_span::{self, SourceFile}; use rustc_target::abi::{Align, Size}; use smallvec::smallvec; use tracing::debug; use libc::{c_longlong, c_uint}; use std::borrow::Cow; -use std::collections::hash_map::Entry; use std::fmt::{self, Write}; use std::hash::{Hash, Hasher}; use std::iter; use std::path::{Path, PathBuf}; use std::ptr; +use tracing::instrument; impl PartialEq for llvm::Metadata { fn eq(&self, other: &Self) -> bool { @@ -527,78 +528,105 @@ fn hex_encode(data: &[u8]) -> String { } pub fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFile) -> &'ll DIFile { - debug!("file_metadata: file_name: {:?}", source_file.name); - - let hash = Some(&source_file.src_hash); - let file_name = Some(source_file.name.prefer_remapped().to_string()); - let directory = if source_file.is_real_file() && !source_file.is_imported() { - Some( - cx.sess() - .opts - .working_dir - .to_string_lossy(FileNameDisplayPreference::Remapped) - .to_string(), - ) - } else { - // If the path comes from an upstream crate we assume it has been made - // independent of the compiler's working directory one way or another. - None - }; - file_metadata_raw(cx, file_name, directory, hash) -} - -pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile { - file_metadata_raw(cx, None, None, None) -} - -fn file_metadata_raw<'ll>( - cx: &CodegenCx<'ll, '_>, - file_name: Option<String>, - directory: Option<String>, - hash: Option<&SourceFileHash>, -) -> &'ll DIFile { - let key = (file_name, directory); - - match debug_context(cx).created_files.borrow_mut().entry(key) { - Entry::Occupied(o) => o.get(), - Entry::Vacant(v) => { - let (file_name, directory) = v.key(); - debug!("file_metadata: file_name: {:?}, directory: {:?}", file_name, directory); - - let file_name = file_name.as_deref().unwrap_or("<unknown>"); - let directory = directory.as_deref().unwrap_or(""); - - let (hash_kind, hash_value) = match hash { - Some(hash) => { - let kind = match hash.kind { - rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5, - rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1, - rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256, - }; - (kind, hex_encode(hash.hash_bytes())) + let cache_key = Some((source_file.name_hash, source_file.src_hash)); + return debug_context(cx) + .created_files + .borrow_mut() + .entry(cache_key) + .or_insert_with(|| alloc_new_file_metadata(cx, source_file)); + + #[instrument(skip(cx, source_file), level = "debug")] + fn alloc_new_file_metadata<'ll>( + cx: &CodegenCx<'ll, '_>, + source_file: &SourceFile, + ) -> &'ll DIFile { + debug!(?source_file.name); + + let (directory, file_name) = match &source_file.name { + FileName::Real(filename) => { + let working_directory = &cx.sess().opts.working_dir; + debug!(?working_directory); + + let filename = cx + .sess() + .source_map() + .path_mapping() + .to_embeddable_absolute_path(filename.clone(), working_directory); + + // Construct the absolute path of the file + let abs_path = filename.remapped_path_if_available(); + debug!(?abs_path); + + if let Ok(rel_path) = + abs_path.strip_prefix(working_directory.remapped_path_if_available()) + { + // If the compiler's working directory (which also is the DW_AT_comp_dir of + // the compilation unit) is a prefix of the path we are about to emit, then + // only emit the part relative to the working directory. + // Because of path remapping we sometimes see strange things here: `abs_path` + // might actually look like a relative path + // (e.g. `<crate-name-and-version>/src/lib.rs`), so if we emit it without + // taking the working directory into account, downstream tooling will + // interpret it as `<working-directory>/<crate-name-and-version>/src/lib.rs`, + // which makes no sense. Usually in such cases the working directory will also + // be remapped to `<crate-name-and-version>` or some other prefix of the path + // we are remapping, so we end up with + // `<crate-name-and-version>/<crate-name-and-version>/src/lib.rs`. + // By moving the working directory portion into the `directory` part of the + // DIFile, we allow LLVM to emit just the relative path for DWARF, while + // still emitting the correct absolute path for CodeView. + ( + working_directory.to_string_lossy(FileNameDisplayPreference::Remapped), + rel_path.to_string_lossy().into_owned(), + ) + } else { + ("".into(), abs_path.to_string_lossy().into_owned()) } - None => (llvm::ChecksumKind::None, String::new()), - }; + } + other => ("".into(), other.prefer_remapped().to_string_lossy().into_owned()), + }; - let file_metadata = unsafe { - llvm::LLVMRustDIBuilderCreateFile( - DIB(cx), - file_name.as_ptr().cast(), - file_name.len(), - directory.as_ptr().cast(), - directory.len(), - hash_kind, - hash_value.as_ptr().cast(), - hash_value.len(), - ) - }; + let hash_kind = match source_file.src_hash.kind { + rustc_span::SourceFileHashAlgorithm::Md5 => llvm::ChecksumKind::MD5, + rustc_span::SourceFileHashAlgorithm::Sha1 => llvm::ChecksumKind::SHA1, + rustc_span::SourceFileHashAlgorithm::Sha256 => llvm::ChecksumKind::SHA256, + }; + let hash_value = hex_encode(source_file.src_hash.hash_bytes()); - v.insert(file_metadata); - file_metadata + unsafe { + llvm::LLVMRustDIBuilderCreateFile( + DIB(cx), + file_name.as_ptr().cast(), + file_name.len(), + directory.as_ptr().cast(), + directory.len(), + hash_kind, + hash_value.as_ptr().cast(), + hash_value.len(), + ) } } } +pub fn unknown_file_metadata<'ll>(cx: &CodegenCx<'ll, '_>) -> &'ll DIFile { + debug_context(cx).created_files.borrow_mut().entry(None).or_insert_with(|| unsafe { + let file_name = "<unknown>"; + let directory = ""; + let hash_value = ""; + + llvm::LLVMRustDIBuilderCreateFile( + DIB(cx), + file_name.as_ptr().cast(), + file_name.len(), + directory.as_ptr().cast(), + directory.len(), + llvm::ChecksumKind::None, + hash_value.as_ptr().cast(), + hash_value.len(), + ) + }) +} + trait MsvcBasicName { fn msvc_basic_name(self) -> &'static str; } diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs index 6a164557a47..0910e7c94ea 100644 --- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs @@ -31,7 +31,7 @@ use rustc_middle::ty::{self, Instance, ParamEnv, Ty, TypeFoldable}; use rustc_session::config::{self, DebugInfo}; use rustc_session::Session; use rustc_span::symbol::Symbol; -use rustc_span::{self, BytePos, Pos, SourceFile, SourceFileAndLine, Span}; +use rustc_span::{self, BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span}; use rustc_target::abi::Size; use libc::c_uint; @@ -61,7 +61,7 @@ pub struct CodegenUnitDebugContext<'ll, 'tcx> { llcontext: &'ll llvm::Context, llmod: &'ll llvm::Module, builder: &'ll mut DIBuilder<'ll>, - created_files: RefCell<FxHashMap<(Option<String>, Option<String>), &'ll DIFile>>, + created_files: RefCell<FxHashMap<Option<(u128, SourceFileHash)>, &'ll DIFile>>, type_map: metadata::TypeMap<'ll, 'tcx>, namespace_map: RefCell<DefIdMap<&'ll DIScope>>, diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index a382209e206..086f1bd94b6 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -500,6 +500,13 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { (!source_file.is_imported() || self.is_proc_macro) }) .map(|(_, source_file)| { + // At export time we expand all source file paths to absolute paths because + // downstream compilation sessions can have a different compiler working + // directory, so relative paths from this or any other upstream crate + // won't be valid anymore. + // + // At this point we also erase the actual on-disk path and only keep + // the remapped version -- as is necessary for reproducible builds. match source_file.name { FileName::Real(ref original_file_name) => { let adapted_file_name = diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 7357cebf62e..1f4578c08a3 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -335,8 +335,8 @@ impl fmt::Display for FileNameDisplay<'_> { } } -impl FileNameDisplay<'_> { - pub fn to_string_lossy(&self) -> Cow<'_, str> { +impl<'a> FileNameDisplay<'a> { + pub fn to_string_lossy(&self) -> Cow<'a, str> { match self.inner { FileName::Real(ref inner) => inner.to_string_lossy(self.display_pref), _ => Cow::from(format!("{}", self)), @@ -1153,7 +1153,7 @@ impl FromStr for SourceFileHashAlgorithm { } /// The hash of the on-disk source file used for debug info. -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] #[derive(HashStable_Generic, Encodable, Decodable)] pub struct SourceFileHash { pub kind: SourceFileHashAlgorithm, diff --git a/compiler/rustc_span/src/source_map.rs b/compiler/rustc_span/src/source_map.rs index 44aa9f72809..020ae3ad0c7 100644 --- a/compiler/rustc_span/src/source_map.rs +++ b/compiler/rustc_span/src/source_map.rs @@ -1099,6 +1099,8 @@ impl FilePathMapping { /// the path was affected by the mapping. pub fn map_prefix(&self, path: PathBuf) -> (PathBuf, bool) { if path.as_os_str().is_empty() { + // Exit early if the path is empty and therefore there's nothing to remap. + // This is mostly to reduce spam for `RUSTC_LOG=[remap_path_prefix]`. return (path, false); } diff --git a/src/bootstrap/dist.rs b/src/bootstrap/dist.rs index 6181a611ec3..16727f4398d 100644 --- a/src/bootstrap/dist.rs +++ b/src/bootstrap/dist.rs @@ -2047,6 +2047,7 @@ impl Step for RustDev { "llvm-cov", "llvm-dwp", "llvm-nm", + "llvm-dwarfdump", ] { tarball.add_file(src_bindir.join(exe(bin, target)), "bin", 0o755); } diff --git a/src/test/codegen/remap_path_prefix/main.rs b/src/test/codegen/remap_path_prefix/main.rs index b13d576295c..381f11ff1ef 100644 --- a/src/test/codegen/remap_path_prefix/main.rs +++ b/src/test/codegen/remap_path_prefix/main.rs @@ -22,7 +22,7 @@ fn main() { } // Here we check that local debuginfo is mapped correctly. -// CHECK: !DIFile(filename: "/the/src/remap_path_prefix/main.rs", directory: "/the/cwd" +// CHECK: !DIFile(filename: "/the/src/remap_path_prefix/main.rs", directory: "" // And here that debuginfo from other crates are expanded to absolute paths. // CHECK: !DIFile(filename: "/the/aux-src/remap_path_prefix_aux.rs", directory: "" diff --git a/src/test/run-make/remap-path-prefix-dwarf/Makefile b/src/test/run-make/remap-path-prefix-dwarf/Makefile new file mode 100644 index 00000000000..561a343d60b --- /dev/null +++ b/src/test/run-make/remap-path-prefix-dwarf/Makefile @@ -0,0 +1,77 @@ +# This test makes sure that --remap-path-prefix has the expected effects on paths in debuginfo. +# It tests several cases, each of them has a detailed description attached to it. + +# ignore-windows + +SRC_DIR := $(abspath .) +SRC_DIR_PARENT := $(abspath ..) + +-include ../../run-make-fulldeps/tools.mk + +all: \ + abs_input_outside_working_dir \ + rel_input_remap_working_dir \ + rel_input_remap_working_dir_parent \ + rel_input_remap_working_dir_child \ + abs_input_inside_working_dir \ + abs_input_outside_working_dir + +# The compiler is called with an *ABSOLUTE PATH* as input, and that absolute path *is* within +# the working directory of the compiler. We are remapping the path that contains `src`. +abs_input_inside_working_dir: + # We explicitly switch to a directory that *is* a prefix of the directory our + # source code is contained in. + cd $(SRC_DIR) && $(RUSTC) $(SRC_DIR)/src/quux.rs -o "$(TMPDIR)/abs_input_inside_working_dir.rlib" -Cdebuginfo=2 --remap-path-prefix $(SRC_DIR)=REMAPPED + # We expect the path to the main source file to be remapped. + "$(LLVM_BIN_DIR)"/llvm-dwarfdump $(TMPDIR)/abs_input_inside_working_dir.rlib | $(CGREP) "REMAPPED/src/quux.rs" + # No weird duplication of remapped components (see #78479) + "$(LLVM_BIN_DIR)"/llvm-dwarfdump $(TMPDIR)/abs_input_inside_working_dir.rlib | $(CGREP) -v "REMAPPED/REMAPPED" + +# The compiler is called with an *ABSOLUTE PATH* as input, and that absolute path is *not* within +# the working directory of the compiler. We are remapping both the path that contains `src` and +# the working directory to the same thing. This setup corresponds to a workaround that is needed +# when trying to remap everything to something that looks like a local path. +# Relative paths are interpreted as relative to the compiler's working directory (e.g. in +# debuginfo). If we also remap the working directory, the compiler strip it from other paths so +# that the final outcome is the desired one again. +abs_input_outside_working_dir: + # We explicitly switch to a directory that is *not* a prefix of the directory our + # source code is contained in. + cd $(TMPDIR) && $(RUSTC) $(SRC_DIR)/src/quux.rs -o "$(TMPDIR)/abs_input_outside_working_dir.rlib" -Cdebuginfo=2 --remap-path-prefix $(SRC_DIR)=REMAPPED --remap-path-prefix $(TMPDIR)=REMAPPED + "$(LLVM_BIN_DIR)"/llvm-dwarfdump $(TMPDIR)/abs_input_outside_working_dir.rlib | $(CGREP) "REMAPPED/src/quux.rs" + # No weird duplication of remapped components (see #78479) + "$(LLVM_BIN_DIR)"/llvm-dwarfdump $(TMPDIR)/abs_input_outside_working_dir.rlib | $(CGREP) -v "REMAPPED/REMAPPED" + +# The compiler is called with a *RELATIVE PATH* as input. We are remapping the working directory of +# the compiler, which naturally is an implicit prefix of our relative input path. Debuginfo will +# expand the relative path to an absolute path and we expect the working directory to be remapped +# in that expansion. +rel_input_remap_working_dir: + cd $(SRC_DIR) && $(RUSTC) src/quux.rs -o "$(TMPDIR)/rel_input_remap_working_dir.rlib" -Cdebuginfo=2 --remap-path-prefix "$(SRC_DIR)=REMAPPED" + "$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir.rlib" | $(CGREP) "REMAPPED/src/quux.rs" + # No weird duplication of remapped components (see #78479) + "$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir.rlib" | $(CGREP) -v "REMAPPED/REMAPPED" + +# The compiler is called with a *RELATIVE PATH* as input. We are remapping a *SUB-DIRECTORY* of the +# compiler's working directory. This test makes sure that that directory is remapped even though it +# won't actually show up in this form in the compiler's SourceMap and instead is only constructed +# on demand during debuginfo generation. +rel_input_remap_working_dir_child: + cd $(SRC_DIR) && $(RUSTC) src/quux.rs -o "$(TMPDIR)/rel_input_remap_working_dir_child.rlib" -Cdebuginfo=2 --remap-path-prefix "$(SRC_DIR)/src=REMAPPED" + # We expect `src/quux.rs` to have been remapped to `REMAPPED/quux.rs`. + "$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_child.rlib" | $(CGREP) "REMAPPED/quux.rs" + # We don't want to find the path that we just remapped anywhere in the DWARF + "$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_child.rlib" | $(CGREP) -v "$(SRC_DIR)/src" + # No weird duplication of remapped components (see #78479) + "$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_child.rlib" | $(CGREP) -v "REMAPPED/REMAPPED" + +# The compiler is called with a *RELATIVE PATH* as input. We are remapping a *PARENT DIRECTORY* of +# the compiler's working directory. +rel_input_remap_working_dir_parent: + cd $(SRC_DIR) && $(RUSTC) src/quux.rs -o "$(TMPDIR)/rel_input_remap_working_dir_parent.rlib" -Cdebuginfo=2 --remap-path-prefix "$(SRC_DIR_PARENT)=REMAPPED" + # We expect `src/quux.rs` to have been remapped to `REMAPPED/remap-path-prefix-dwarf/src/quux.rs`. + "$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_parent.rlib" | $(CGREP) "REMAPPED/remap-path-prefix-dwarf/src/quux.rs" + # We don't want to find the path that we just remapped anywhere in the DWARF + "$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_parent.rlib" | $(CGREP) -v "$(SRC_DIR_PARENT)" + # No weird duplication of remapped components (see #78479) + "$(LLVM_BIN_DIR)"/llvm-dwarfdump "$(TMPDIR)/rel_input_remap_working_dir_parent.rlib" | $(CGREP) -v "REMAPPED/REMAPPED" diff --git a/src/test/run-make/remap-path-prefix-dwarf/src/quux.rs b/src/test/run-make/remap-path-prefix-dwarf/src/quux.rs new file mode 100644 index 00000000000..38d5ef6194c --- /dev/null +++ b/src/test/run-make/remap-path-prefix-dwarf/src/quux.rs @@ -0,0 +1,5 @@ +#![crate_type = "rlib"] + +pub fn foo() { + println!("foo"); +} |
