about summary refs log tree commit diff
path: root/src/tools/generate-copyright
diff options
context:
space:
mode:
authorJonathan Pallant <jonathan.pallant@ferrous-systems.com>2024-07-29 11:41:02 +0100
committerJonathan Pallant <jonathan.pallant@ferrous-systems.com>2024-08-06 11:04:55 +0100
commit204e3eadf1323ebd886ee159b193e231ec4906c9 (patch)
tree6350501252ebae7ba07e6de40b33bfebd4623b2c /src/tools/generate-copyright
parentba0d6c973994915faef00e0e15e2957ff2b286b7 (diff)
downloadrust-204e3eadf1323ebd886ee159b193e231ec4906c9.tar.gz
rust-204e3eadf1323ebd886ee159b193e231ec4906c9.zip
generate-copyright: Produce HTML, not Markdown
This format works better with large amounts of structured data.

We also mark which deps are in the stdlib
Diffstat (limited to 'src/tools/generate-copyright')
-rw-r--r--src/tools/generate-copyright/src/cargo_metadata.rs81
-rw-r--r--src/tools/generate-copyright/src/main.rs172
2 files changed, 151 insertions, 102 deletions
diff --git a/src/tools/generate-copyright/src/cargo_metadata.rs b/src/tools/generate-copyright/src/cargo_metadata.rs
index 721a6b1c6e6..eda53c73c0a 100644
--- a/src/tools/generate-copyright/src/cargo_metadata.rs
+++ b/src/tools/generate-copyright/src/cargo_metadata.rs
@@ -1,6 +1,6 @@
 //! Gets metadata about a workspace from Cargo
 
-use std::collections::{BTreeMap, BTreeSet};
+use std::collections::BTreeMap;
 use std::ffi::{OsStr, OsString};
 use std::path::Path;
 
@@ -23,13 +23,18 @@ pub enum Error {
     RunningVendor,
 }
 
-/// Describes one of our dependencies
+/// Uniquely describes a package on crates.io
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
-pub struct Dependency {
+pub struct Package {
     /// The name of the package
     pub name: String,
     /// The version number
     pub version: String,
+}
+
+/// Extra data about a package
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct PackageMetadata {
     /// The license it is under
     pub license: String,
     /// The list of authors from the package metadata
@@ -40,20 +45,44 @@ pub struct Dependency {
     pub notices: BTreeMap<OsString, String>,
 }
 
-/// Use `cargo` to get a list of dependencies and their license data.
+/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data.
 ///
 /// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
 /// grab the license files.
 ///
 /// Any dependency with a path beginning with `root_path` is ignored, as we
 /// assume `reuse` has covered it already.
-pub fn get(
+pub fn get_metadata_and_notices(
     cargo: &Path,
     dest: &Path,
     root_path: &Path,
     manifest_paths: &[&Path],
-) -> Result<BTreeSet<Dependency>, Error> {
-    let mut temp_set = BTreeSet::new();
+) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
+    let mut output = get_metadata(cargo, root_path, manifest_paths)?;
+
+    // Now do a cargo-vendor and grab everything
+    let vendor_path = dest.join("vendor");
+    println!("Vendoring deps into {}...", vendor_path.display());
+    run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
+
+    // Now for each dependency we found, go and grab any important looking files
+    for (package, metadata) in output.iter_mut() {
+        load_important_files(package, metadata, &vendor_path)?;
+    }
+
+    Ok(output)
+}
+
+/// Use `cargo metadata` to get a list of dependencies and their license data.
+///
+/// Any dependency with a path beginning with `root_path` is ignored, as we
+/// assume `reuse` has covered it already.
+pub fn get_metadata(
+    cargo: &Path,
+    root_path: &Path,
+    manifest_paths: &[&Path],
+) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
+    let mut output = BTreeMap::new();
     // Look at the metadata for each manifest
     for manifest_path in manifest_paths {
         if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {
@@ -71,7 +100,7 @@ pub fn get(
                 .and_then(|v| v.as_str())
                 .map(Path::new)
                 .ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?;
-            if manifest_path.starts_with(&root_path) {
+            if manifest_path.starts_with(root_path) {
                 // it's an in-tree dependency and reuse covers it
                 continue;
             }
@@ -93,28 +122,14 @@ pub fn get(
                 .ok_or_else(|| Error::MissingJsonElement("package.authors"))?;
             let authors: Vec<String> =
                 authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect();
-            temp_set.insert(Dependency {
-                name: name.to_owned(),
-                version: version.to_owned(),
-                license: license.to_owned(),
-                authors,
-                notices: BTreeMap::new(),
-            });
+            let package = Package { name: name.to_owned(), version: version.to_owned() };
+            output.insert(
+                package.clone(),
+                PackageMetadata { license: license.to_owned(), authors, notices: BTreeMap::new() },
+            );
         }
     }
 
-    // Now do a cargo-vendor and grab everything
-    let vendor_path = dest.join("vendor");
-    println!("Vendoring deps into {}...", vendor_path.display());
-    run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
-
-    // Now for each dependency we found, go and grab any important looking files
-    let mut output = BTreeSet::new();
-    for mut dep in temp_set {
-        load_important_files(&mut dep, &vendor_path)?;
-        output.insert(dep);
-    }
-
     Ok(output)
 }
 
@@ -128,7 +143,7 @@ fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result<serde_json::V
         .arg(manifest_path)
         .env("RUSTC_BOOTSTRAP", "1")
         .output()
-        .map_err(|e| Error::LaunchingMetadata(e))?;
+        .map_err(Error::LaunchingMetadata)?;
     if !metadata_output.status.success() {
         return Err(Error::GettingMetadata(
             String::from_utf8(metadata_output.stderr).expect("UTF-8 output from cargo"),
@@ -151,7 +166,7 @@ fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Resu
     }
     vendor_command.arg(dest);
 
-    let vendor_status = vendor_command.status().map_err(|e| Error::LaunchingVendor(e))?;
+    let vendor_status = vendor_command.status().map_err(Error::LaunchingVendor)?;
 
     if !vendor_status.success() {
         return Err(Error::RunningVendor);
@@ -164,8 +179,12 @@ fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Resu
 ///
 /// Maybe one-day Cargo.toml will contain enough information that we don't need
 /// to do this manual scraping.
-fn load_important_files(dep: &mut Dependency, vendor_root: &Path) -> Result<(), Error> {
-    let name_version = format!("{}-{}", dep.name, dep.version);
+fn load_important_files(
+    package: &Package,
+    dep: &mut PackageMetadata,
+    vendor_root: &Path,
+) -> Result<(), Error> {
+    let name_version = format!("{}-{}", package.name, package.version);
     println!("Scraping notices for {}...", name_version);
     let dep_vendor_path = vendor_root.join(name_version);
     for entry in std::fs::read_dir(dep_vendor_path)? {
diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs
index 6191cd158bc..efccba0651e 100644
--- a/src/tools/generate-copyright/src/main.rs
+++ b/src/tools/generate-copyright/src/main.rs
@@ -1,3 +1,4 @@
+use std::collections::BTreeMap;
 use std::io::Write;
 use std::path::{Path, PathBuf};
 
@@ -5,6 +6,33 @@ use anyhow::Error;
 
 mod cargo_metadata;
 
+static TOP_BOILERPLATE: &str = r##"
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <title>Copyright notices for The Rust Toolchain</title>
+</head>
+<body>
+
+<h1>Copyright notices for The Rust Toolchain</h1>
+
+<p>This file describes the copyright and licensing information for the source
+code within The Rust Project git tree, and the third-party dependencies used
+when building the Rust toolchain (including the Rust Standard Library).</p>
+
+<h2>Table of Contents</h2>
+<ul>
+    <li><a href="#in-tree-files">In-tree files</a></li>
+    <li><a href="#out-of-tree-dependencies">Out-of-tree dependencies</a></li>
+</ul>
+"##;
+
+static BOTTOM_BOILERPLATE: &str = r#"
+</body>
+</html>
+"#;
+
 /// The entry point to the binary.
 ///
 /// You should probably let `bootstrap` execute this program instead of running it directly.
@@ -26,43 +54,28 @@ fn main() -> Result<(), Error> {
         Path::new("./library/std/Cargo.toml"),
     ];
     let collected_cargo_metadata =
-        cargo_metadata::get(&cargo, &out_dir, &root_path, &workspace_paths)?;
+        cargo_metadata::get_metadata_and_notices(&cargo, &out_dir, &root_path, &workspace_paths)?;
+
+    let stdlib_set =
+        cargo_metadata::get_metadata(&cargo, &root_path, &[Path::new("./library/std/Cargo.toml")])?;
 
     let mut buffer = Vec::new();
 
-    writeln!(buffer, "# COPYRIGHT for Rust")?;
-    writeln!(buffer)?;
-    writeln!(
-        buffer,
-        "This file describes the copyright and licensing information for the source code within The Rust Project git tree, and the third-party dependencies used when building the Rust toolchain (including the Rust Standard Library)"
-    )?;
-    writeln!(buffer)?;
-    writeln!(buffer, "## Table of Contents")?;
-    writeln!(buffer)?;
-    writeln!(buffer, "* [In-tree files](#in-tree-files)")?;
-    writeln!(buffer, "* [Out-of-tree files](#out-of-tree-files)")?;
-    // writeln!(buffer, "* [License Texts](#license-texts)")?;
-    writeln!(buffer)?;
-
-    writeln!(buffer, "## In-tree files")?;
-    writeln!(buffer)?;
+    writeln!(buffer, "{}", TOP_BOILERPLATE)?;
+
     writeln!(
         buffer,
-        "The following licenses cover the in-tree source files that were used in this release:"
+        r#"<h2 id="in-tree-files">In-tree files</h2><p>The following licenses cover the in-tree source files that were used in this release:</p>"#
     )?;
-    writeln!(buffer)?;
-    render_tree_recursive(&collected_tree_metadata.files, &mut buffer, 0)?;
-
-    writeln!(buffer)?;
+    render_tree_recursive(&collected_tree_metadata.files, &mut buffer)?;
 
-    writeln!(buffer, "## Out-of-tree files")?;
-    writeln!(buffer)?;
     writeln!(
         buffer,
-        "The following licenses cover the out-of-tree crates that were used in this release:"
+        r#"<h2 id="out-of-tree-dependencies">Out-of-tree dependencies</h2><p>The following licenses cover the out-of-tree crates that were used in this release:</p>"#
     )?;
-    writeln!(buffer)?;
-    render_deps(collected_cargo_metadata.iter(), &mut buffer)?;
+    render_deps(&collected_cargo_metadata, &stdlib_set, &mut buffer)?;
+
+    writeln!(buffer, "{}", BOTTOM_BOILERPLATE)?;
 
     std::fs::write(&dest_file, &buffer)?;
 
@@ -71,56 +84,51 @@ fn main() -> Result<(), Error> {
 
 /// Recursively draw the tree of files/folders we found on disk and their licenses, as
 /// markdown, into the given Vec.
-fn render_tree_recursive(node: &Node, buffer: &mut Vec<u8>, depth: usize) -> Result<(), Error> {
-    let prefix = std::iter::repeat("> ").take(depth + 1).collect::<String>();
-
+fn render_tree_recursive(node: &Node, buffer: &mut Vec<u8>) -> Result<(), Error> {
+    writeln!(buffer, r#"<div style="border:1px solid black; padding: 5px;">"#)?;
     match node {
         Node::Root { children } => {
             for child in children {
-                render_tree_recursive(child, buffer, depth)?;
+                render_tree_recursive(child, buffer)?;
             }
         }
         Node::Directory { name, children, license } => {
-            render_tree_license(&prefix, std::iter::once(name), license.as_ref(), buffer)?;
+            render_tree_license(std::iter::once(name), license.as_ref(), buffer)?;
             if !children.is_empty() {
-                writeln!(buffer, "{prefix}")?;
-                writeln!(buffer, "{prefix}*Exceptions:*")?;
+                writeln!(buffer, "<p><b>Exceptions:</b></p>")?;
                 for child in children {
-                    writeln!(buffer, "{prefix}")?;
-                    render_tree_recursive(child, buffer, depth + 1)?;
+                    render_tree_recursive(child, buffer)?;
                 }
             }
         }
         Node::Group { files, directories, license } => {
-            render_tree_license(
-                &prefix,
-                directories.iter().chain(files.iter()),
-                Some(license),
-                buffer,
-            )?;
+            render_tree_license(directories.iter().chain(files.iter()), Some(license), buffer)?;
         }
         Node::File { name, license } => {
-            render_tree_license(&prefix, std::iter::once(name), Some(license), buffer)?;
+            render_tree_license(std::iter::once(name), Some(license), buffer)?;
         }
     }
+    writeln!(buffer, "</div>")?;
 
     Ok(())
 }
 
 /// Draw a series of sibling files/folders, as markdown, into the given Vec.
 fn render_tree_license<'a>(
-    prefix: &str,
     names: impl Iterator<Item = &'a String>,
     license: Option<&License>,
     buffer: &mut Vec<u8>,
 ) -> Result<(), Error> {
+    writeln!(buffer, "<p><b>File/Directory:</b> ")?;
     for name in names {
-        writeln!(buffer, "{prefix}**`{name}`**  ")?;
+        writeln!(buffer, "<code>{name}</code>")?;
     }
+    writeln!(buffer, "</p>")?;
+
     if let Some(license) = license {
-        writeln!(buffer, "{prefix}License: `{}`", license.spdx)?;
+        writeln!(buffer, "<p><b>License:</b> {}</p>", license.spdx)?;
         for copyright in license.copyright.iter() {
-            writeln!(buffer, "{prefix}Copyright: {copyright}")?;
+            writeln!(buffer, "<p><b>Copyright:</b> {copyright}</p>")?;
         }
     }
 
@@ -128,36 +136,48 @@ fn render_tree_license<'a>(
 }
 
 /// Render a list of out-of-tree dependencies as markdown into the given Vec.
-fn render_deps<'a, 'b>(
-    deps: impl Iterator<Item = &'a cargo_metadata::Dependency>,
-    buffer: &'b mut Vec<u8>,
+fn render_deps(
+    all_deps: &BTreeMap<cargo_metadata::Package, cargo_metadata::PackageMetadata>,
+    stdlib_set: &BTreeMap<cargo_metadata::Package, cargo_metadata::PackageMetadata>,
+    buffer: &mut Vec<u8>,
 ) -> Result<(), Error> {
-    for dep in deps {
-        let authors_list = dep.authors.join(", ").replace("<", "\\<").replace(">", "\\>");
-        let url = format!("https://crates.io/crates/{}/{}", dep.name, dep.version);
+    for (package, metadata) in all_deps {
+        let authors_list = if metadata.authors.is_empty() {
+            "None Specified".to_owned()
+        } else {
+            metadata.authors.join(", ")
+        };
+        let url = format!("https://crates.io/crates/{}/{}", package.name, package.version);
         writeln!(buffer)?;
         writeln!(
             buffer,
-            "### [{name} {version}]({url})",
-            name = dep.name,
-            version = dep.version,
-            url = url,
+            r#"<h3>📦 {name}-{version}</h3>"#,
+            name = package.name,
+            version = package.version,
         )?;
-        writeln!(buffer)?;
-        writeln!(buffer, "* Authors: {}", authors_list)?;
-        writeln!(buffer, "* License: {}", dep.license)?;
-        for (name, contents) in &dep.notices {
-            writeln!(buffer)?;
-            writeln!(buffer, "#### {}", name.to_string_lossy())?;
-            writeln!(buffer)?;
-            writeln!(buffer, "<details><summary>Click to expand</summary>")?;
-            writeln!(buffer)?;
-            writeln!(buffer, "```")?;
-            writeln!(buffer, "{}", contents)?;
-            writeln!(buffer, "```")?;
-            writeln!(buffer)?;
-            writeln!(buffer, "</details>")?;
+        writeln!(buffer, r#"<p><b>URL:</b> <a href="{url}">{url}</a></p>"#,)?;
+        writeln!(
+            buffer,
+            "<p><b>In libstd:</b> {}</p>",
+            if stdlib_set.contains_key(package) { "Yes" } else { "No" }
+        )?;
+        writeln!(buffer, "<p><b>Authors:</b> {}</p>", escape_html(&authors_list))?;
+        writeln!(buffer, "<p><b>License:</b> {}</p>", escape_html(&metadata.license))?;
+        writeln!(buffer, "<p><b>Notices:</b> ")?;
+        if metadata.notices.is_empty() {
+            writeln!(buffer, "None")?;
+        } else {
+            for (name, contents) in &metadata.notices {
+                writeln!(
+                    buffer,
+                    "<details><summary><code>{}</code></summary>",
+                    name.to_string_lossy()
+                )?;
+                writeln!(buffer, "<pre>\n{}\n</pre>", contents)?;
+                writeln!(buffer, "</details>")?;
+            }
         }
+        writeln!(buffer, "</p>")?;
     }
     Ok(())
 }
@@ -192,3 +212,13 @@ fn env_path(var: &str) -> Result<PathBuf, Error> {
         anyhow::bail!("missing environment variable {var}")
     }
 }
+
+/// Escapes any invalid HTML characters
+fn escape_html(input: &str) -> String {
+    static MAPPING: [(char, &str); 3] = [('&', "&amp;"), ('<', "&lt;"), ('>', "&gt;")];
+    let mut output = input.to_owned();
+    for (ch, s) in &MAPPING {
+        output = output.replace(*ch, s);
+    }
+    output
+}