6 files changed, 191 insertions, 138 deletions
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
index 71705ecb4d0..7cd4ee539d8 100644
--- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -28,6 +28,113 @@ fn get_params(fnc: &Value) -> Vec<&Value> {
     }
 }
 
+fn match_args_from_caller_to_enzyme<'ll>(
+    cx: &SimpleCx<'ll>,
+    args: &mut Vec<&'ll llvm::Value>,
+    inputs: &[DiffActivity],
+    outer_args: &[&'ll llvm::Value],
+) {
+    debug!("matching autodiff arguments");
+    // We now handle the issue that Rust level arguments not always match the llvm-ir level
+    // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
+    // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
+    // need to match those.
+    // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
+    // using iterators and peek()?
+    let mut outer_pos: usize = 0;
+    let mut activity_pos = 0;
+
+    let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap();
+    let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap();
+    let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap();
+    let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap();
+
+    while activity_pos < inputs.len() {
+        let diff_activity = inputs[activity_pos as usize];
+        // Duplicated arguments received a shadow argument, into which enzyme will write the
+        // gradient.
+        let (activity, duplicated): (&Metadata, bool) = match diff_activity {
+            DiffActivity::None => panic!("not a valid input activity"),
+            DiffActivity::Const => (enzyme_const, false),
+            DiffActivity::Active => (enzyme_out, false),
+            DiffActivity::ActiveOnly => (enzyme_out, false),
+            DiffActivity::Dual => (enzyme_dup, true),
+            DiffActivity::DualOnly => (enzyme_dupnoneed, true),
+            DiffActivity::Duplicated => (enzyme_dup, true),
+            DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true),
+            DiffActivity::FakeActivitySize => (enzyme_const, false),
+        };
+        let outer_arg = outer_args[outer_pos];
+        args.push(cx.get_metadata_value(activity));
+        args.push(outer_arg);
+        if duplicated {
+            // We know that duplicated args by construction have a following argument,
+            // so this can not be out of bounds.
+            let next_outer_arg = outer_args[outer_pos + 1];
+            let next_outer_ty = cx.val_ty(next_outer_arg);
+            // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since
+            // vectors behind references (&Vec<T>) are already supported. Users can not pass a
+            // Vec by value for reverse mode, so this would only help forward mode autodiff.
+            let slice = {
+                if activity_pos + 1 >= inputs.len() {
+                    // If there is no arg following our ptr, it also can't be a slice,
+                    // since that would lead to a ptr, int pair.
+                    false
+                } else {
+                    let next_activity = inputs[activity_pos + 1];
+                    // We analyze the MIR types and add this dummy activity if we visit a slice.
+                    next_activity == DiffActivity::FakeActivitySize
+                }
+            };
+            if slice {
+                // A duplicated slice will have the following two outer_fn arguments:
+                // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call:
+                // (..., metadata! enzyme_dup, ptr, ptr, int1, ...).
+                // FIXME(ZuseZ4): We will upstream a safety check later which asserts that
+                // int2 >= int1, which means the shadow vector is large enough to store the gradient.
+                assert!(unsafe {
+                    llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Integer
+                });
+                let next_outer_arg2 = outer_args[outer_pos + 2];
+                let next_outer_ty2 = cx.val_ty(next_outer_arg2);
+                assert!(unsafe {
+                    llvm::LLVMRustGetTypeKind(next_outer_ty2) == llvm::TypeKind::Pointer
+                });
+                let next_outer_arg3 = outer_args[outer_pos + 3];
+                let next_outer_ty3 = cx.val_ty(next_outer_arg3);
+                assert!(unsafe {
+                    llvm::LLVMRustGetTypeKind(next_outer_ty3) == llvm::TypeKind::Integer
+                });
+                args.push(next_outer_arg2);
+                args.push(cx.get_metadata_value(enzyme_const));
+                args.push(next_outer_arg);
+                outer_pos += 4;
+                activity_pos += 2;
+            } else {
+                // A duplicated pointer will have the following two outer_fn arguments:
+                // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
+                // (..., metadata! enzyme_dup, ptr, ptr, ...).
+                if matches!(diff_activity, DiffActivity::Duplicated | DiffActivity::DuplicatedOnly)
+                {
+                    assert!(
+                        unsafe { llvm::LLVMRustGetTypeKind(next_outer_ty) }
+                            == llvm::TypeKind::Pointer
+                    );
+                }
+                // In the case of Dual we don't have assumptions, e.g. f32 would be valid.
+                args.push(next_outer_arg);
+                outer_pos += 2;
+                activity_pos += 1;
+            }
+        } else {
+            // We do not differentiate with resprect to this argument.
+            // We already added the metadata and argument above, so just increase the counters.
+            outer_pos += 1;
+            activity_pos += 1;
+        }
+    }
+}
+
 /// When differentiating `fn_to_diff`, take a `outer_fn` and generate another
 /// function with expected naming and calling conventions[^1] which will be
 /// discovered by the enzyme LLVM pass and its body populated with the differentiated
@@ -43,9 +150,6 @@ fn generate_enzyme_call<'ll>(
     outer_fn: &'ll Value,
     attrs: AutoDiffAttrs,
 ) {
-    let inputs = attrs.input_activity;
-    let output = attrs.ret_activity;
-
     // We have to pick the name depending on whether we want forward or reverse mode autodiff.
     let mut ad_name: String = match attrs.mode {
         DiffMode::Forward => "__enzyme_fwddiff",
@@ -132,111 +236,13 @@ fn generate_enzyme_call<'ll>(
         let mut args = Vec::with_capacity(num_args as usize + 1);
         args.push(fn_to_diff);
 
-        let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap();
-        let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap();
-        let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap();
-        let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap();
         let enzyme_primal_ret = cx.create_metadata("enzyme_primal_return".to_string()).unwrap();
-
-        match output {
-            DiffActivity::Dual => {
-                args.push(cx.get_metadata_value(enzyme_primal_ret));
-            }
-            DiffActivity::Active => {
-                args.push(cx.get_metadata_value(enzyme_primal_ret));
-            }
-            _ => {}
+        if matches!(attrs.ret_activity, DiffActivity::Dual | DiffActivity::Active) {
+            args.push(cx.get_metadata_value(enzyme_primal_ret));
         }
 
-        debug!("matching autodiff arguments");
-        // We now handle the issue that Rust level arguments not always match the llvm-ir level
-        // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on
-        // llvm-ir level. The number of activities matches the number of Rust level arguments, so we
-        // need to match those.
-        // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it
-        // using iterators and peek()?
-        let mut outer_pos: usize = 0;
-        let mut activity_pos = 0;
         let outer_args: Vec<&llvm::Value> = get_params(outer_fn);
-        while activity_pos < inputs.len() {
-            let diff_activity = inputs[activity_pos as usize];
-            // Duplicated arguments received a shadow argument, into which enzyme will write the
-            // gradient.
-            let (activity, duplicated): (&Metadata, bool) = match diff_activity {
-                DiffActivity::None => panic!("not a valid input activity"),
-                DiffActivity::Const => (enzyme_const, false),
-                DiffActivity::Active => (enzyme_out, false),
-                DiffActivity::ActiveOnly => (enzyme_out, false),
-                DiffActivity::Dual => (enzyme_dup, true),
-                DiffActivity::DualOnly => (enzyme_dupnoneed, true),
-                DiffActivity::Duplicated => (enzyme_dup, true),
-                DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true),
-                DiffActivity::FakeActivitySize => (enzyme_const, false),
-            };
-            let outer_arg = outer_args[outer_pos];
-            args.push(cx.get_metadata_value(activity));
-            args.push(outer_arg);
-            if duplicated {
-                // We know that duplicated args by construction have a following argument,
-                // so this can not be out of bounds.
-                let next_outer_arg = outer_args[outer_pos + 1];
-                let next_outer_ty = cx.val_ty(next_outer_arg);
-                // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since
-                // vectors behind references (&Vec<T>) are already supported. Users can not pass a
-                // Vec by value for reverse mode, so this would only help forward mode autodiff.
-                let slice = {
-                    if activity_pos + 1 >= inputs.len() {
-                        // If there is no arg following our ptr, it also can't be a slice,
-                        // since that would lead to a ptr, int pair.
-                        false
-                    } else {
-                        let next_activity = inputs[activity_pos + 1];
-                        // We analyze the MIR types and add this dummy activity if we visit a slice.
-                        next_activity == DiffActivity::FakeActivitySize
-                    }
-                };
-                if slice {
-                    // A duplicated slice will have the following two outer_fn arguments:
-                    // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call:
-                    // (..., metadata! enzyme_dup, ptr, ptr, int1, ...).
-                    // FIXME(ZuseZ4): We will upstream a safety check later which asserts that
-                    // int2 >= int1, which means the shadow vector is large enough to store the gradient.
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Integer);
-                    let next_outer_arg2 = outer_args[outer_pos + 2];
-                    let next_outer_ty2 = cx.val_ty(next_outer_arg2);
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty2) == llvm::TypeKind::Pointer);
-                    let next_outer_arg3 = outer_args[outer_pos + 3];
-                    let next_outer_ty3 = cx.val_ty(next_outer_arg3);
-                    assert!(llvm::LLVMRustGetTypeKind(next_outer_ty3) == llvm::TypeKind::Integer);
-                    args.push(next_outer_arg2);
-                    args.push(cx.get_metadata_value(enzyme_const));
-                    args.push(next_outer_arg);
-                    outer_pos += 4;
-                    activity_pos += 2;
-                } else {
-                    // A duplicated pointer will have the following two outer_fn arguments:
-                    // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call:
-                    // (..., metadata! enzyme_dup, ptr, ptr, ...).
-                    if matches!(
-                        diff_activity,
-                        DiffActivity::Duplicated | DiffActivity::DuplicatedOnly
-                    ) {
-                        assert!(
-                            llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer
-                        );
-                    }
-                    // In the case of Dual we don't have assumptions, e.g. f32 would be valid.
-                    args.push(next_outer_arg);
-                    outer_pos += 2;
-                    activity_pos += 1;
-                }
-            } else {
-                // We do not differentiate with resprect to this argument.
-                // We already added the metadata and argument above, so just increase the counters.
-                outer_pos += 1;
-                activity_pos += 1;
-            }
-        }
+        match_args_from_caller_to_enzyme(&cx, &mut args, &attrs.input_activity, &outer_args);
 
         let call = builder.call(enzyme_ty, ad_fn, &args, None);
 
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
index b617f4d37f5..f6000e72840 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
@@ -146,6 +146,7 @@ pub(crate) struct CoverageSpan {
 #[derive(Clone, Debug, Default)]
 pub(crate) struct Regions {
     pub(crate) code_regions: Vec<CodeRegion>,
+    pub(crate) expansion_regions: Vec<ExpansionRegion>,
     pub(crate) branch_regions: Vec<BranchRegion>,
     pub(crate) mcdc_branch_regions: Vec<MCDCBranchRegion>,
     pub(crate) mcdc_decision_regions: Vec<MCDCDecisionRegion>,
@@ -154,10 +155,16 @@ pub(crate) struct Regions {
 impl Regions {
     /// Returns true if none of this structure's tables contain any regions.
     pub(crate) fn has_no_regions(&self) -> bool {
-        let Self { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
-            self;
+        let Self {
+            code_regions,
+            expansion_regions,
+            branch_regions,
+            mcdc_branch_regions,
+            mcdc_decision_regions,
+        } = self;
 
         code_regions.is_empty()
+            && expansion_regions.is_empty()
             && branch_regions.is_empty()
             && mcdc_branch_regions.is_empty()
             && mcdc_decision_regions.is_empty()
@@ -172,6 +179,14 @@ pub(crate) struct CodeRegion {
     pub(crate) counter: Counter,
 }
 
+/// Must match the layout of `LLVMRustCoverageExpansionRegion`.
+#[derive(Clone, Debug)]
+#[repr(C)]
+pub(crate) struct ExpansionRegion {
+    pub(crate) cov_span: CoverageSpan,
+    pub(crate) expanded_file_id: u32,
+}
+
 /// Must match the layout of `LLVMRustCoverageBranchRegion`.
 #[derive(Clone, Debug)]
 #[repr(C)]
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
index 2cd7fa3225a..907d6d41a1f 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/llvm_cov.rs
@@ -63,8 +63,18 @@ pub(crate) fn write_function_mappings_to_buffer(
     expressions: &[ffi::CounterExpression],
     regions: &ffi::Regions,
 ) -> Vec<u8> {
-    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
-        regions;
+    let ffi::Regions {
+        code_regions,
+        expansion_regions,
+        branch_regions,
+        mcdc_branch_regions,
+        mcdc_decision_regions,
+    } = regions;
+
+    // SAFETY:
+    // - All types are FFI-compatible and have matching representations in Rust/C++.
+    // - For pointer/length pairs, the pointer and length come from the same vector or slice.
+    // - C++ code does not retain any pointers after the call returns.
     llvm::build_byte_buffer(|buffer| unsafe {
         llvm::LLVMRustCoverageWriteFunctionMappingsToBuffer(
             virtual_file_mapping.as_ptr(),
@@ -73,6 +83,8 @@ pub(crate) fn write_function_mappings_to_buffer(
             expressions.len(),
             code_regions.as_ptr(),
             code_regions.len(),
+            expansion_regions.as_ptr(),
+            expansion_regions.len(),
             branch_regions.as_ptr(),
             branch_regions.len(),
             mcdc_branch_regions.as_ptr(),
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
index 5b487bc1a8b..048e1988c32 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -120,12 +120,22 @@ fn fill_region_tables<'tcx>(
     // Associate that global file ID with a local file ID for this function.
     let local_file_id = covfun.virtual_file_mapping.local_id_for_global(global_file_id);
 
-    let ffi::Regions { code_regions, branch_regions, mcdc_branch_regions, mcdc_decision_regions } =
-        &mut covfun.regions;
-
-    let make_cov_span =
-        |span: Span| spans::make_coverage_span(local_file_id, source_map, &source_file, span);
+    // In rare cases, _all_ of a function's spans are discarded, and coverage
+    // codegen needs to handle that gracefully to avoid #133606.
+    // It's hard for tests to trigger this organically, so instead we set
+    // `-Zcoverage-options=discard-all-spans-in-codegen` to force it to occur.
     let discard_all = tcx.sess.coverage_discard_all_spans_in_codegen();
+    let make_coords = |span: Span| {
+        if discard_all { None } else { spans::make_coords(source_map, &source_file, span) }
+    };
+
+    let ffi::Regions {
+        code_regions,
+        expansion_regions: _, // FIXME(Zalathar): Fill out support for expansion regions
+        branch_regions,
+        mcdc_branch_regions,
+        mcdc_decision_regions,
+    } = &mut covfun.regions;
 
     // For each counter/region pair in this function+file, convert it to a
     // form suitable for FFI.
@@ -140,17 +150,8 @@ fn fill_region_tables<'tcx>(
             ffi::Counter::from_term(term)
         };
 
-        // Convert the `Span` into coordinates that we can pass to LLVM, or
-        // discard the span if conversion fails. In rare, cases _all_ of a
-        // function's spans are discarded, and the rest of coverage codegen
-        // needs to handle that gracefully to avoid a repeat of #133606.
-        // We don't have a good test case for triggering that organically, so
-        // instead we set `-Zcoverage-options=discard-all-spans-in-codegen`
-        // to force it to occur.
-        let Some(cov_span) = make_cov_span(span) else { continue };
-        if discard_all {
-            continue;
-        }
+        let Some(coords) = make_coords(span) else { continue };
+        let cov_span = coords.make_coverage_span(local_file_id);
 
         match *kind {
             MappingKind::Code { bcb } => {
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
index 3193be31ada..39a59560c9d 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/spans.rs
@@ -5,22 +5,40 @@ use tracing::debug;
 use crate::coverageinfo::ffi;
 use crate::coverageinfo::mapgen::LocalFileId;
 
+/// Line and byte-column coordinates of a source code span within some file.
+/// The file itself must be tracked separately.
+#[derive(Clone, Copy, Debug)]
+pub(crate) struct Coords {
+    /// 1-based starting line of the source code span.
+    pub(crate) start_line: u32,
+    /// 1-based starting column (in bytes) of the source code span.
+    pub(crate) start_col: u32,
+    /// 1-based ending line of the source code span.
+    pub(crate) end_line: u32,
+    /// 1-based ending column (in bytes) of the source code span. High bit must be unset.
+    pub(crate) end_col: u32,
+}
+
+impl Coords {
+    /// Attaches a local file ID to these coordinates to produce an `ffi::CoverageSpan`.
+    pub(crate) fn make_coverage_span(&self, local_file_id: LocalFileId) -> ffi::CoverageSpan {
+        let &Self { start_line, start_col, end_line, end_col } = self;
+        let file_id = local_file_id.as_u32();
+        ffi::CoverageSpan { file_id, start_line, start_col, end_line, end_col }
+    }
+}
+
 /// Converts the span into its start line and column, and end line and column.
 ///
 /// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by
 /// the compiler, these column numbers are denoted in **bytes**, because that's what
 /// LLVM's `llvm-cov` tool expects to see in coverage maps.
 ///
-/// Returns `None` if the conversion failed for some reason. This shouldn't happen,
+/// Returns `None` if the conversion failed for some reason. This should be uncommon,
 /// but it's hard to rule out entirely (especially in the presence of complex macros
 /// or other expansions), and if it does happen then skipping a span or function is
 /// better than an ICE or `llvm-cov` failure that the user might have no way to avoid.
-pub(crate) fn make_coverage_span(
-    file_id: LocalFileId,
-    source_map: &SourceMap,
-    file: &SourceFile,
-    span: Span,
-) -> Option<ffi::CoverageSpan> {
+pub(crate) fn make_coords(source_map: &SourceMap, file: &SourceFile, span: Span) -> Option<Coords> {
     let span = ensure_non_empty_span(source_map, span)?;
 
     let lo = span.lo();
@@ -44,8 +62,7 @@ pub(crate) fn make_coverage_span(
     start_line = source_map.doctest_offset_line(&file.name, start_line);
     end_line = source_map.doctest_offset_line(&file.name, end_line);
 
-    check_coverage_span(ffi::CoverageSpan {
-        file_id: file_id.as_u32(),
+    check_coords(Coords {
         start_line: start_line as u32,
         start_col: start_col as u32,
         end_line: end_line as u32,
@@ -80,8 +97,8 @@ fn ensure_non_empty_span(source_map: &SourceMap, span: Span) -> Option<Span> {
 /// it will immediately exit with a fatal error. To prevent that from happening,
 /// discard regions that are improperly ordered, or might be interpreted in a
 /// way that makes them improperly ordered.
-fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan> {
-    let ffi::CoverageSpan { file_id: _, start_line, start_col, end_line, end_col } = cov_span;
+fn check_coords(coords: Coords) -> Option<Coords> {
+    let Coords { start_line, start_col, end_line, end_col } = coords;
 
     // Line/column coordinates are supposed to be 1-based. If we ever emit
     // coordinates of 0, `llvm-cov` might misinterpret them.
@@ -94,17 +111,17 @@ fn check_coverage_span(cov_span: ffi::CoverageSpan) -> Option<ffi::CoverageSpan>
     let is_ordered = (start_line, start_col) <= (end_line, end_col);
 
     if all_nonzero && end_col_has_high_bit_unset && is_ordered {
-        Some(cov_span)
+        Some(coords)
     } else {
         debug!(
-            ?cov_span,
+            ?coords,
             ?all_nonzero,
             ?end_col_has_high_bit_unset,
             ?is_ordered,
             "Skipping source region that would be misinterpreted or rejected by LLVM"
         );
         // If this happens in a debug build, ICE to make it easier to notice.
-        debug_assert!(false, "Improper source region: {cov_span:?}");
+        debug_assert!(false, "Improper source region: {coords:?}");
         None
     }
 }
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
index 39087a4d6f4..83efb3ea660 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -2019,6 +2019,8 @@ unsafe extern "C" {
         NumExpressions: size_t,
         CodeRegions: *const crate::coverageinfo::ffi::CodeRegion,
         NumCodeRegions: size_t,
+        ExpansionRegions: *const crate::coverageinfo::ffi::ExpansionRegion,
+        NumExpansionRegions: size_t,
         BranchRegions: *const crate::coverageinfo::ffi::BranchRegion,
         NumBranchRegions: size_t,
         MCDCBranchRegions: *const crate::coverageinfo::ffi::MCDCBranchRegion,