about summary refs log tree commit diff
path: root/src/librustdoc/passes/lint/bare_urls.rs
blob: ffa160beae30bacedde7238b603bff9d6920e798 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
//! Detects links that are not linkified, e.g., in Markdown such as `Go to https://example.com/.`
//! Suggests wrapping the link with angle brackets: `Go to <https://example.com/>.` to linkify it.

use core::ops::Range;
use std::mem;
use std::sync::LazyLock;

use pulldown_cmark::{Event, Parser, Tag};
use regex::Regex;
use rustc_errors::Applicability;
use rustc_hir::HirId;
use rustc_resolve::rustdoc::source_span_for_markdown_range;
use tracing::trace;

use crate::clean::*;
use crate::core::DocContext;
use crate::html::markdown::main_body_opts;

pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: &str) {
    let report_diag = |cx: &DocContext<'_>,
                       msg: &'static str,
                       range: Range<usize>,
                       without_brackets: Option<&str>| {
        let maybe_sp = source_span_for_markdown_range(cx.tcx, dox, &range, &item.attrs.doc_strings)
            .map(|(sp, _)| sp);
        let sp = maybe_sp.unwrap_or_else(|| item.attr_span(cx.tcx));
        cx.tcx.node_span_lint(crate::lint::BARE_URLS, hir_id, sp, |lint| {
            lint.primary_message(msg)
                .note("bare URLs are not automatically turned into clickable links");
            // The fallback of using the attribute span is suitable for
            // highlighting where the error is, but not for placing the < and >
            if let Some(sp) = maybe_sp {
                if let Some(without_brackets) = without_brackets {
                    lint.multipart_suggestion(
                        "use an automatic link instead",
                        vec![(sp, format!("<{without_brackets}>"))],
                        Applicability::MachineApplicable,
                    );
                } else {
                    lint.multipart_suggestion(
                        "use an automatic link instead",
                        vec![
                            (sp.shrink_to_lo(), "<".to_string()),
                            (sp.shrink_to_hi(), ">".to_string()),
                        ],
                        Applicability::MachineApplicable,
                    );
                }
            }
        });
    };

    let mut p = Parser::new_ext(dox, main_body_opts()).into_offset_iter();

    while let Some((event, range)) = p.next() {
        match event {
            Event::Text(s) => find_raw_urls(cx, dox, &s, range, &report_diag),
            // We don't want to check the text inside code blocks or links.
            Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link { .. })) => {
                for (event, _) in p.by_ref() {
                    match event {
                        Event::End(end)
                            if mem::discriminant(&end) == mem::discriminant(&tag.to_end()) =>
                        {
                            break;
                        }
                        _ => {}
                    }
                }
            }
            _ => {}
        }
    }
}

static URL_REGEX: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(concat!(
        r"https?://",                          // url scheme
        r"([-a-zA-Z0-9@:%._\+~#=]{2,256}\.)+", // one or more subdomains
        r"[a-zA-Z]{2,63}",                     // root domain
        r"\b([-a-zA-Z0-9@:%_\+.~#?&/=]*)",     // optional query or url fragments
    ))
    .expect("failed to build regex")
});

fn find_raw_urls(
    cx: &DocContext<'_>,
    whole_text: &str,
    text: &str,
    range: Range<usize>,
    f: &impl Fn(&DocContext<'_>, &'static str, Range<usize>, Option<&str>),
) {
    trace!("looking for raw urls in {text}");
    // For now, we only check "full" URLs (meaning, starting with "http://" or "https://").
    for match_ in URL_REGEX.find_iter(text) {
        let url_range = match_.range();
        let mut without_brackets = None;
        let mut extra_range = 0;
        // If the whole text is contained inside `[]`, then we need to replace the brackets and
        // not just add `<>`.
        if whole_text[..range.start + url_range.start].ends_with('[')
            && range.start + url_range.end <= whole_text.len()
            && whole_text[range.start + url_range.end..].starts_with(']')
        {
            extra_range = 1;
            without_brackets = Some(match_.as_str());
        }
        f(
            cx,
            "this URL is not a hyperlink",
            Range {
                start: range.start + url_range.start - extra_range,
                end: range.start + url_range.end + extra_range,
            },
            without_brackets,
        );
    }
}