about summary refs log tree commit diff
path: root/src/librustdoc/doctest/make.rs
diff options
context:
space:
mode:
authorGuillaume Gomez <guillaume1.gomez@gmail.com>2025-03-05 23:33:54 +0100
committerGuillaume Gomez <guillaume1.gomez@gmail.com>2025-03-27 11:18:43 +0100
commit1f1c630a58297d49e92b79cf072559924b4f1123 (patch)
tree103820c4f74ded58d17745b67c7fb6fcfa2f4af2 /src/librustdoc/doctest/make.rs
parent7d49ae9731555937177d01e9fa39dbf22eb60399 (diff)
downloadrust-1f1c630a58297d49e92b79cf072559924b4f1123.tar.gz
rust-1f1c630a58297d49e92b79cf072559924b4f1123.zip
Greatly simplify doctest parsing and information extraction
Diffstat (limited to 'src/librustdoc/doctest/make.rs')
-rw-r--r--src/librustdoc/doctest/make.rs459
1 files changed, 136 insertions, 323 deletions
diff --git a/src/librustdoc/doctest/make.rs b/src/librustdoc/doctest/make.rs
index 9935074877b..cb14608b35a 100644
--- a/src/librustdoc/doctest/make.rs
+++ b/src/librustdoc/doctest/make.rs
@@ -5,11 +5,10 @@ use std::fmt::{self, Write as _};
 use std::io;
 use std::sync::Arc;
 
-use rustc_ast as ast;
+use rustc_ast::{self as ast, HasAttrs};
+use rustc_errors::ColorConfig;
 use rustc_errors::emitter::stderr_destination;
-use rustc_errors::{ColorConfig, FatalError};
 use rustc_parse::new_parser_from_source_str;
-use rustc_parse::parser::attr::InnerAttrPolicy;
 use rustc_session::parse::ParseSess;
 use rustc_span::FileName;
 use rustc_span::edition::Edition;
@@ -21,6 +20,19 @@ use super::GlobalTestOptions;
 use crate::display::Joined as _;
 use crate::html::markdown::LangString;
 
+#[derive(Default)]
+struct ParseSourceInfo {
+    has_main_fn: bool,
+    found_extern_crate: bool,
+    supports_color: bool,
+    has_global_allocator: bool,
+    has_macro_def: bool,
+    everything_else: String,
+    crates: String,
+    crate_attrs: String,
+    maybe_crate_attrs: String,
+}
+
 /// This struct contains information about the doctest itself which is then used to generate
 /// doctest source code appropriately.
 pub(crate) struct DocTestBuilder {
@@ -53,8 +65,23 @@ impl DocTestBuilder {
                 !lang_str.compile_fail && !lang_str.test_harness && !lang_str.standalone_crate
             });
 
-        let Some(SourceInfo { crate_attrs, maybe_crate_attrs, crates, everything_else }) =
-            partition_source(source, edition)
+        let result = rustc_driver::catch_fatal_errors(|| {
+            rustc_span::create_session_if_not_set_then(edition, |_| {
+                parse_source(source, &crate_name)
+            })
+        });
+
+        let Ok(Ok(ParseSourceInfo {
+            has_main_fn,
+            found_extern_crate,
+            supports_color,
+            has_global_allocator,
+            has_macro_def,
+            everything_else,
+            crates,
+            crate_attrs,
+            maybe_crate_attrs,
+        })) = result
         else {
             return Self::invalid(
                 String::new(),
@@ -65,35 +92,13 @@ impl DocTestBuilder {
             );
         };
 
-        // Uses librustc_ast to parse the doctest and find if there's a main fn and the extern
-        // crate already is included.
-        let Ok((
-            ParseSourceInfo {
-                has_main_fn,
-                found_extern_crate,
-                supports_color,
-                has_global_allocator,
-                has_macro_def,
-                ..
-            },
-            failed_ast,
-        )) = check_for_main_and_extern_crate(
-            crate_name,
-            source,
-            &everything_else,
-            &crates,
-            edition,
-            can_merge_doctests,
-        )
-        else {
-            // If the parser panicked due to a fatal error, pass the test code through unchanged.
-            // The error will be reported during compilation.
-            return Self::invalid(crate_attrs, maybe_crate_attrs, crates, everything_else, test_id);
-        };
+        debug!("crate_attrs:\n{crate_attrs}{maybe_crate_attrs}");
+        debug!("crates:\n{crates}");
+        debug!("after:\n{everything_else}");
+
         // If the AST returned an error, we don't want this doctest to be merged with the
         // others. Same if it contains `#[feature]` or `#[no_std]`.
         let can_be_merged = can_merge_doctests
-            && !failed_ast
             && !has_global_allocator
             && crate_attrs.is_empty()
             // If this is a merged doctest and a defined macro uses `$crate`, then the path will
@@ -146,6 +151,7 @@ impl DocTestBuilder {
         if self.failed_ast {
             // If the AST failed to compile, no need to go generate a complete doctest, the error
             // will be better this way.
+            debug!("failed AST:\n{test_code}");
             return (test_code.to_string(), 0);
         }
         let mut line_offset = 0;
@@ -255,13 +261,6 @@ impl DocTestBuilder {
     }
 }
 
-#[derive(PartialEq, Eq, Debug)]
-enum ParsingResult {
-    Failed,
-    AstError,
-    Ok,
-}
-
 fn cancel_error_count(psess: &ParseSess) {
     // Reset errors so that they won't be reported as compiler bugs when dropping the
     // dcx. Any errors in the tests will be reported when the test file is compiled,
@@ -270,17 +269,20 @@ fn cancel_error_count(psess: &ParseSess) {
     psess.dcx().reset_err_count();
 }
 
-fn parse_source(
-    source: String,
-    info: &mut ParseSourceInfo,
-    crate_name: &Option<&str>,
-) -> ParsingResult {
+const DOCTEST_CODE_WRAPPER: &str = "fn f(){";
+
+fn parse_source(source: &str, crate_name: &Option<&str>) -> Result<ParseSourceInfo, ()> {
     use rustc_errors::DiagCtxt;
     use rustc_errors::emitter::{Emitter, HumanEmitter};
-    use rustc_parse::parser::ForceCollect;
+    // use rustc_parse::parser::ForceCollect;
     use rustc_span::source_map::FilePathMapping;
 
-    let filename = FileName::anon_source_code(&source);
+    let mut info =
+        ParseSourceInfo { found_extern_crate: crate_name.is_none(), ..Default::default() };
+
+    let wrapped_source = format!("{DOCTEST_CODE_WRAPPER}{source}\n}}");
+
+    let filename = FileName::anon_source_code(&wrapped_source);
 
     // Any errors in parsing should also appear when the doctest is compiled for real, so just
     // send all the errors that librustc_ast emits directly into a `Sink` instead of stderr.
@@ -299,15 +301,32 @@ fn parse_source(
     let dcx = DiagCtxt::new(Box::new(emitter)).disable_warnings();
     let psess = ParseSess::with_dcx(dcx, sm);
 
-    let mut parser = match new_parser_from_source_str(&psess, filename, source) {
+    let mut parser = match new_parser_from_source_str(&psess, filename, wrapped_source) {
         Ok(p) => p,
         Err(errs) => {
             errs.into_iter().for_each(|err| err.cancel());
             cancel_error_count(&psess);
-            return ParsingResult::Failed;
+            return Err(());
         }
     };
-    let mut parsing_result = ParsingResult::Ok;
+
+    fn push_to_s(
+        s: &mut String,
+        source: &str,
+        span: rustc_span::Span,
+        prev_span_hi: &mut Option<usize>,
+    ) {
+        let extra_len = DOCTEST_CODE_WRAPPER.len();
+        // We need to shift by 1 because we added `{` at the beginning of the source.we provided
+        // to the parser.
+        let lo = prev_span_hi.unwrap_or(span.lo().0 as usize - extra_len);
+        let mut hi = span.hi().0 as usize - extra_len;
+        if hi > source.len() {
+            hi = source.len();
+        }
+        s.push_str(&source[lo..hi]);
+        *prev_span_hi = Some(hi);
+    }
 
     // Recurse through functions body. It is necessary because the doctest source code is
     // wrapped in a function to limit the number of AST errors. If we don't recurse into
@@ -325,6 +344,8 @@ fn parse_source(
         }
         match item.kind {
             ast::ItemKind::Fn(ref fn_item) if !info.has_main_fn => {
+                // We only push if it's the top item because otherwise, we would duplicate
+                // its content since the top-level item was already added.
                 if item.ident.name == sym::main && is_top_level {
                     info.has_main_fn = true;
                 }
@@ -334,7 +355,6 @@ fn parse_source(
                             ast::StmtKind::Item(ref item) => {
                                 check_item(item, info, crate_name, false)
                             }
-                            ast::StmtKind::MacCall(..) => info.found_macro = true,
                             _ => {}
                         }
                     }
@@ -350,295 +370,88 @@ fn parse_source(
                     };
                 }
             }
-            ast::ItemKind::MacCall(..) => info.found_macro = true,
-            ast::ItemKind::MacroDef(..) => info.has_macro_def = true,
-            _ => {}
-        }
-    }
-
-    loop {
-        match parser.parse_item(ForceCollect::No) {
-            Ok(Some(item)) => {
-                check_item(&item, info, crate_name, true);
-
-                if info.has_main_fn && info.found_extern_crate {
-                    break;
-                }
-            }
-            Ok(None) => break,
-            Err(e) => {
-                parsing_result = ParsingResult::AstError;
-                e.cancel();
-                break;
+            ast::ItemKind::MacroDef(..) => {
+                info.has_macro_def = true;
             }
+            _ => {}
         }
-
-        // The supplied item is only used for diagnostics,
-        // which are swallowed here anyway.
-        parser.maybe_consume_incorrect_semicolon(None);
-    }
-
-    cancel_error_count(&psess);
-    parsing_result
-}
-
-#[derive(Default)]
-struct ParseSourceInfo {
-    has_main_fn: bool,
-    found_extern_crate: bool,
-    found_macro: bool,
-    supports_color: bool,
-    has_global_allocator: bool,
-    has_macro_def: bool,
-}
-
-fn check_for_main_and_extern_crate(
-    crate_name: Option<&str>,
-    original_source_code: &str,
-    everything_else: &str,
-    crates: &str,
-    edition: Edition,
-    can_merge_doctests: bool,
-) -> Result<(ParseSourceInfo, bool), FatalError> {
-    let result = rustc_driver::catch_fatal_errors(|| {
-        rustc_span::create_session_if_not_set_then(edition, |_| {
-            let mut info =
-                ParseSourceInfo { found_extern_crate: crate_name.is_none(), ..Default::default() };
-
-            let mut parsing_result =
-                parse_source(format!("{crates}{everything_else}"), &mut info, &crate_name);
-            // No need to double-check this if the "merged doctests" feature isn't enabled (so
-            // before the 2024 edition).
-            if can_merge_doctests && parsing_result != ParsingResult::Ok {
-                // If we found an AST error, we want to ensure it's because of an expression being
-                // used outside of a function.
-                //
-                // To do so, we wrap in a function in order to make sure that the doctest AST is
-                // correct. For example, if your doctest is `foo::bar()`, if we don't wrap it in a
-                // block, it would emit an AST error, which would be problematic for us since we
-                // want to filter out such errors which aren't "real" errors.
-                //
-                // The end goal is to be able to merge as many doctests as possible as one for much
-                // faster doctests run time.
-                parsing_result = parse_source(
-                    format!("{crates}\nfn __doctest_wrap(){{{everything_else}\n}}"),
-                    &mut info,
-                    &crate_name,
-                );
-            }
-
-            (info, parsing_result)
-        })
-    });
-    let (mut info, parsing_result) = match result {
-        Err(..) | Ok((_, ParsingResult::Failed)) => return Err(FatalError),
-        Ok((info, parsing_result)) => (info, parsing_result),
-    };
-
-    // If a doctest's `fn main` is being masked by a wrapper macro, the parsing loop above won't
-    // see it. In that case, run the old text-based scan to see if they at least have a main
-    // function written inside a macro invocation. See
-    // https://github.com/rust-lang/rust/issues/56898
-    if info.found_macro
-        && !info.has_main_fn
-        && original_source_code
-            .lines()
-            .map(|line| {
-                let comment = line.find("//");
-                if let Some(comment_begins) = comment { &line[0..comment_begins] } else { line }
-            })
-            .any(|code| code.contains("fn main"))
-    {
-        info.has_main_fn = true;
     }
 
-    Ok((info, parsing_result != ParsingResult::Ok))
-}
-
-enum AttrKind {
-    CrateAttr,
-    Attr,
-}
-
-/// Returns `Some` if the attribute is complete and `Some(true)` if it is an attribute that can be
-/// placed at the crate root.
-fn check_if_attr_is_complete(source: &str, edition: Edition) -> Option<AttrKind> {
-    if source.is_empty() {
-        // Empty content so nothing to check in here...
-        return None;
-    }
+    let mut prev_span_hi = None;
     let not_crate_attrs = [sym::forbid, sym::allow, sym::warn, sym::deny];
+    let parsed = parser.parse_item(rustc_parse::parser::ForceCollect::No);
 
-    rustc_driver::catch_fatal_errors(|| {
-        rustc_span::create_session_if_not_set_then(edition, |_| {
-            use rustc_errors::DiagCtxt;
-            use rustc_errors::emitter::HumanEmitter;
-            use rustc_span::source_map::FilePathMapping;
-
-            let filename = FileName::anon_source_code(source);
-            // Any errors in parsing should also appear when the doctest is compiled for real, so just
-            // send all the errors that librustc_ast emits directly into a `Sink` instead of stderr.
-            let sm = Arc::new(SourceMap::new(FilePathMapping::empty()));
-            let fallback_bundle = rustc_errors::fallback_fluent_bundle(
-                rustc_driver::DEFAULT_LOCALE_RESOURCES.to_vec(),
-                false,
-            );
+    debug!("+++++> {parsed:#?}");
 
-            let emitter = HumanEmitter::new(Box::new(io::sink()), fallback_bundle);
-
-            let dcx = DiagCtxt::new(Box::new(emitter)).disable_warnings();
-            let psess = ParseSess::with_dcx(dcx, sm);
-            let mut parser = match new_parser_from_source_str(&psess, filename, source.to_owned()) {
-                Ok(p) => p,
-                Err(errs) => {
-                    errs.into_iter().for_each(|err| err.cancel());
-                    // If there is an unclosed delimiter, an error will be returned by the
-                    // tokentrees.
-                    return None;
-                }
-            };
-            // If a parsing error happened, it's very likely that the attribute is incomplete.
-            let ret = match parser.parse_attribute(InnerAttrPolicy::Permitted) {
-                Ok(attr) => {
-                    let attr_name = attr.name_or_empty();
-
-                    if not_crate_attrs.contains(&attr_name) {
-                        // There is one exception to these attributes:
-                        // `#![allow(internal_features)]`. If this attribute is used, we need to
-                        // consider it only as a crate-level attribute.
-                        if attr_name == sym::allow
-                            && let Some(list) = attr.meta_item_list()
-                            && list.iter().any(|sub_attr| {
-                                sub_attr.name_or_empty().as_str() == "internal_features"
-                            })
-                        {
-                            Some(AttrKind::CrateAttr)
-                        } else {
-                            Some(AttrKind::Attr)
-                        }
+    let result = match parsed {
+        Ok(Some(ref item))
+            if let ast::ItemKind::Fn(ref fn_item) = item.kind
+                && let Some(ref body) = fn_item.body =>
+        {
+            for attr in &item.attrs {
+                let attr_name = attr.name_or_empty();
+
+                if attr.style == ast::AttrStyle::Outer || not_crate_attrs.contains(&attr_name) {
+                    // There is one exception to these attributes:
+                    // `#![allow(internal_features)]`. If this attribute is used, we need to
+                    // consider it only as a crate-level attribute.
+                    if attr_name == sym::allow
+                        && let Some(list) = attr.meta_item_list()
+                        && list.iter().any(|sub_attr| {
+                            sub_attr.name_or_empty().as_str() == "internal_features"
+                        })
+                    {
+                        push_to_s(&mut info.crate_attrs, source, attr.span, &mut prev_span_hi);
                     } else {
-                        Some(AttrKind::CrateAttr)
+                        push_to_s(
+                            &mut info.maybe_crate_attrs,
+                            source,
+                            attr.span,
+                            &mut prev_span_hi,
+                        );
                     }
+                } else {
+                    push_to_s(&mut info.crate_attrs, source, attr.span, &mut prev_span_hi);
                 }
-                Err(e) => {
-                    e.cancel();
-                    None
+            }
+            for stmt in &body.stmts {
+                match stmt.kind {
+                    ast::StmtKind::Item(ref item) => check_item(&item, &mut info, crate_name, true),
+                    ast::StmtKind::Expr(ref expr) if matches!(expr.kind, ast::ExprKind::Err(_)) => {
+                        cancel_error_count(&psess);
+                        return Err(());
+                    }
+                    _ => {}
                 }
-            };
-            ret
-        })
-    })
-    .unwrap_or(None)
-}
 
-fn handle_attr(mod_attr_pending: &mut String, source_info: &mut SourceInfo, edition: Edition) {
-    if let Some(attr_kind) = check_if_attr_is_complete(mod_attr_pending, edition) {
-        let push_to = match attr_kind {
-            AttrKind::CrateAttr => &mut source_info.crate_attrs,
-            AttrKind::Attr => &mut source_info.maybe_crate_attrs,
-        };
-        push_to.push_str(mod_attr_pending);
-        push_to.push('\n');
-        // If it's complete, then we can clear the pending content.
-        mod_attr_pending.clear();
-    } else {
-        mod_attr_pending.push('\n');
-    }
-}
-
-#[derive(Default)]
-struct SourceInfo {
-    crate_attrs: String,
-    maybe_crate_attrs: String,
-    crates: String,
-    everything_else: String,
-}
-
-fn partition_source(s: &str, edition: Edition) -> Option<SourceInfo> {
-    #[derive(Copy, Clone, PartialEq)]
-    enum PartitionState {
-        Attrs,
-        Crates,
-        Other,
-    }
-    let mut source_info = SourceInfo::default();
-    let mut state = PartitionState::Attrs;
-    let mut mod_attr_pending = String::new();
-
-    for line in s.lines() {
-        let trimline = line.trim();
-
-        // FIXME(misdreavus): if a doc comment is placed on an extern crate statement, it will be
-        // shunted into "everything else"
-        match state {
-            PartitionState::Attrs => {
-                state = if trimline.starts_with("#![") {
-                    mod_attr_pending = line.to_owned();
-                    handle_attr(&mut mod_attr_pending, &mut source_info, edition);
-                    continue;
-                } else if trimline.chars().all(|c| c.is_whitespace())
-                    || (trimline.starts_with("//") && !trimline.starts_with("///"))
-                {
-                    PartitionState::Attrs
-                } else if trimline.starts_with("extern crate")
-                    || trimline.starts_with("#[macro_use] extern crate")
-                {
-                    PartitionState::Crates
-                } else {
-                    // First we check if the previous attribute was "complete"...
-                    if !mod_attr_pending.is_empty() {
-                        // If not, then we append the new line into the pending attribute to check
-                        // if this time it's complete...
-                        mod_attr_pending.push_str(line);
-                        if !trimline.is_empty() {
-                            handle_attr(&mut mod_attr_pending, &mut source_info, edition);
-                        }
-                        continue;
-                    } else {
-                        PartitionState::Other
-                    }
-                };
-            }
-            PartitionState::Crates => {
-                state = if trimline.starts_with("extern crate")
-                    || trimline.starts_with("#[macro_use] extern crate")
-                    || trimline.chars().all(|c| c.is_whitespace())
-                    || (trimline.starts_with("//") && !trimline.starts_with("///"))
+                // Weirdly enough, the `Stmt` span doesn't include its attributes, so we need to
+                // tweak the span to include the attributes as well.
+                let mut span = stmt.span;
+                if let Some(attr) = stmt.kind.attrs().first() {
+                    span = span.with_lo(attr.span.lo());
+                }
+                if info.everything_else.is_empty()
+                    && (!info.maybe_crate_attrs.is_empty() || !info.crate_attrs.is_empty())
                 {
-                    PartitionState::Crates
-                } else {
-                    PartitionState::Other
-                };
+                    // We add potential backlines into attributes if there are some.
+                    push_to_s(
+                        &mut info.maybe_crate_attrs,
+                        source,
+                        span.shrink_to_lo(),
+                        &mut prev_span_hi,
+                    );
+                }
+                push_to_s(&mut info.everything_else, source, span, &mut prev_span_hi);
             }
-            PartitionState::Other => {}
+            Ok(info)
         }
-
-        match state {
-            PartitionState::Attrs => {
-                source_info.crate_attrs.push_str(line);
-                source_info.crate_attrs.push('\n');
-            }
-            PartitionState::Crates => {
-                source_info.crates.push_str(line);
-                source_info.crates.push('\n');
-            }
-            PartitionState::Other => {
-                source_info.everything_else.push_str(line);
-                source_info.everything_else.push('\n');
-            }
+        Err(e) => {
+            e.cancel();
+            Err(())
         }
-    }
-
-    if !mod_attr_pending.is_empty() {
-        debug!("invalid doctest code: {s:?}");
-        return None;
-    }
-
-    source_info.everything_else = source_info.everything_else.trim().to_string();
-
-    debug!("crate_attrs:\n{}{}", source_info.crate_attrs, source_info.maybe_crate_attrs);
-    debug!("crates:\n{}", source_info.crates);
-    debug!("after:\n{}", source_info.everything_else);
+        _ => Err(()),
+    };
 
-    Some(source_info)
+    cancel_error_count(&psess);
+    result
 }