diff options
| author | Casey Rodarmor <casey@rodarmor.com> | 2020-10-26 19:08:42 -0700 |
|---|---|---|
| committer | Casey Rodarmor <casey@rodarmor.com> | 2021-02-12 00:02:52 -0800 |
| commit | 66f4883308d999c8b405fdfd442562b8600a462d (patch) | |
| tree | 86a5d6b52b22914633ca307b7739549f72b4f5e6 /src | |
| parent | e9920ef7749d11fc71cc32ca4ba055bcfeaab945 (diff) | |
| download | rust-66f4883308d999c8b405fdfd442562b8600a462d.tar.gz rust-66f4883308d999c8b405fdfd442562b8600a462d.zip | |
[librustdoc] Reform lang string token splitting
Only split doctest lang strings on `,`, ` `, and `\t`. Additionally, to
preserve backwards compatibility with pandoc-style langstrings, strip a
surrounding `{}`, and remove leading `.`s from each token.
Prior to this change, doctest lang strings were split on all
non-alphanumeric characters except `-` or `_`, which limited future
extensions to doctest lang string tokens, for example using `=` for
key-value tokens.
This is a breaking change, although it is not expected to be disruptive,
because lang strings using separators other than `,` and ` ` are not
very common
Diffstat (limited to 'src')
| -rw-r--r-- | src/librustdoc/html/markdown.rs | 32 | ||||
| -rw-r--r-- | src/librustdoc/html/markdown/tests.rs | 46 |
2 files changed, 71 insertions, 7 deletions
diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index a81fd55f6f1..edc1229d0e4 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -779,6 +779,31 @@ impl LangString { Self::parse(string, allow_error_code_check, enable_per_target_ignores, None) } + fn tokens(string: &str) -> impl Iterator<Item = &str> { + // Pandoc, which Rust once used for generating documentation, + // expects lang strings to be surrounded by `{}` and for each token + // to be proceeded by a `.`. Since some of these lang strings are still + // loose in the wild, we strip a pair of surrounding `{}` from the lang + // string and a leading `.` from each token. + + let string = string.trim(); + + let first = string.chars().next(); + let last = string.chars().last(); + + let string = if first == Some('{') && last == Some('}') { + &string[1..string.len() - 1] + } else { + string + }; + + string + .split(|c| c == ',' || c == ' ' || c == '\t') + .map(str::trim) + .map(|token| if token.chars().next() == Some('.') { &token[1..] } else { token }) + .filter(|token| !token.is_empty()) + } + fn parse( string: &str, allow_error_code_check: ErrorCodes, @@ -792,11 +817,11 @@ impl LangString { let mut ignores = vec![]; data.original = string.to_owned(); - let tokens = string.split(|c: char| !(c == '_' || c == '-' || c.is_alphanumeric())); + + let tokens = Self::tokens(string).collect::<Vec<&str>>(); for token in tokens { - match token.trim() { - "" => {} + match token { "should_panic" => { data.should_panic = true; seen_rust_tags = !seen_other_tags; @@ -893,6 +918,7 @@ impl LangString { _ => seen_other_tags = true, } } + // ignore-foo overrides ignore if !ignores.is_empty() { data.ignore = Ignore::Some(ignores); diff --git a/src/librustdoc/html/markdown/tests.rs b/src/librustdoc/html/markdown/tests.rs index 9da3072ec28..53f85d02575 100644 --- a/src/librustdoc/html/markdown/tests.rs +++ b/src/librustdoc/html/markdown/tests.rs @@ -58,6 +58,9 @@ fn test_lang_string_parse() { t(Default::default()); t(LangString { original: "rust".into(), ..Default::default() }); + t(LangString { original: ".rust".into(), ..Default::default() }); + t(LangString { original: "{rust}".into(), ..Default::default() }); + t(LangString { original: "{.rust}".into(), ..Default::default() }); t(LangString { original: "sh".into(), rust: false, ..Default::default() }); t(LangString { original: "ignore".into(), ignore: Ignore::All, ..Default::default() }); t(LangString { @@ -75,16 +78,16 @@ fn test_lang_string_parse() { ..Default::default() }); t(LangString { original: "allow_fail".into(), allow_fail: true, ..Default::default() }); - t(LangString { original: "{.no_run .example}".into(), no_run: true, ..Default::default() }); + t(LangString { original: "no_run,example".into(), no_run: true, ..Default::default() }); t(LangString { - original: "{.sh .should_panic}".into(), + original: "sh,should_panic".into(), should_panic: true, rust: false, ..Default::default() }); - t(LangString { original: "{.example .rust}".into(), ..Default::default() }); + t(LangString { original: "example,rust".into(), ..Default::default() }); t(LangString { - original: "{.test_harness .rust}".into(), + original: "test_harness,.rust".into(), test_harness: true, ..Default::default() }); @@ -101,6 +104,18 @@ fn test_lang_string_parse() { ..Default::default() }); t(LangString { + original: "text,no_run, ".into(), + no_run: true, + rust: false, + ..Default::default() + }); + t(LangString { + original: "text,no_run,".into(), + no_run: true, + rust: false, + ..Default::default() + }); + t(LangString { original: "edition2015".into(), edition: Some(Edition::Edition2015), ..Default::default() @@ -113,6 +128,29 @@ fn test_lang_string_parse() { } #[test] +fn test_lang_string_tokenizer() { + fn case(lang_string: &str, want: &[&str]) { + let have = LangString::tokens(lang_string).collect::<Vec<&str>>(); + assert_eq!(have, want, "Unexpected lang string split for `{}`", lang_string); + } + + case("", &[]); + case("foo", &["foo"]); + case("foo,bar", &["foo", "bar"]); + case(".foo,.bar", &["foo", "bar"]); + case("{.foo,.bar}", &["foo", "bar"]); + case(" {.foo,.bar} ", &["foo", "bar"]); + case("foo bar", &["foo", "bar"]); + case("foo\tbar", &["foo", "bar"]); + case("foo\t, bar", &["foo", "bar"]); + case(" foo , bar ", &["foo", "bar"]); + case(",,foo,,bar,,", &["foo", "bar"]); + case("foo=bar", &["foo=bar"]); + case("a-b-c", &["a-b-c"]); + case("a_b_c", &["a_b_c"]); +} + +#[test] fn test_header() { fn t(input: &str, expect: &str) { let mut map = IdMap::new(); |
