diff options
| author | Matthias Krüger <matthias.krueger@famsik.de> | 2022-12-06 13:27:40 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-12-06 13:27:40 +0100 |
| commit | e5a01b97ee9a3dc45d7f5055c8b0f4482b2ebad7 (patch) | |
| tree | f885419104becd80c9114c525cf823d993758d43 | |
| parent | 9db224fc908059986c179fc6ec433944e9cfce50 (diff) | |
| parent | f8a7123b276ed18e6cdd488818a8f4e5e3e1cf94 (diff) | |
| download | rust-e5a01b97ee9a3dc45d7f5055c8b0f4482b2ebad7.tar.gz rust-e5a01b97ee9a3dc45d7f5055c8b0f4482b2ebad7.zip | |
Rollup merge of #104439 - ferrocene:pa-generate-copyright, r=pnkfelix
Add prototype to generate `COPYRIGHT` from REUSE metadata This PR adds a prototype to generate the `COPYRIGHT` file from the metadata gathered with REUSE. There are two new tools: * `src/tools/collect-license-metadata` invokes REUSE, parses its output and stores a concise JSON representation of the metadata in `src/etc/license-metadata.json`. * `src/tools/generate-copyright` parses the metadata generated above, (in the future will) gather crate dependencies metadata, and renders the `COPYRIGHT.md` file. Note that since the contents of those files are currently incorrect, rather than outputting in the paths above, the files will be stored in `build/` and not committed. This will be changed once we're confident about the metadata. Eventually, `src/etc/license-metadata.json` will be committed into the repository and verified to be up to date by CI (similar to our GitHub Actions configuration), to avoid having people install REUSE on their local machine in most cases. You can see the (incorrect) generated files in https://gist.github.com/pietroalbini/3f3f22b6f9cc8533abf7494b6a50cf97. r? `@pnkfelix`
| -rw-r--r-- | Cargo.lock | 77 | ||||
| -rw-r--r-- | Cargo.toml | 2 | ||||
| -rw-r--r-- | config.toml.example | 10 | ||||
| -rw-r--r-- | src/bootstrap/builder.rs | 2 | ||||
| -rw-r--r-- | src/bootstrap/config.rs | 3 | ||||
| -rw-r--r-- | src/bootstrap/run.rs | 63 | ||||
| -rw-r--r-- | src/bootstrap/sanity.rs | 7 | ||||
| -rw-r--r-- | src/bootstrap/tool.rs | 2 | ||||
| -rw-r--r-- | src/tools/collect-license-metadata/Cargo.toml | 10 | ||||
| -rw-r--r-- | src/tools/collect-license-metadata/src/licenses.rs | 65 | ||||
| -rw-r--r-- | src/tools/collect-license-metadata/src/main.rs | 30 | ||||
| -rw-r--r-- | src/tools/collect-license-metadata/src/path_tree.rs | 294 | ||||
| -rw-r--r-- | src/tools/collect-license-metadata/src/reuse.rs | 49 | ||||
| -rw-r--r-- | src/tools/generate-copyright/Cargo.toml | 11 | ||||
| -rw-r--r-- | src/tools/generate-copyright/src/main.rs | 94 |
15 files changed, 719 insertions, 0 deletions
diff --git a/Cargo.lock b/Cargo.lock index 970b1719aa1..150a70341f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -581,6 +581,7 @@ dependencies = [ "libc", "num-integer", "num-traits", + "serde", "time", "winapi", ] @@ -731,6 +732,16 @@ dependencies = [ ] [[package]] +name = "collect-license-metadata" +version = "0.1.0" +dependencies = [ + "anyhow", + "serde", + "serde_json", + "spdx-rs", +] + +[[package]] name = "color-eyre" version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1553,6 +1564,15 @@ dependencies = [ ] [[package]] +name = "generate-copyright" +version = "0.1.0" +dependencies = [ + "anyhow", + "serde", + "serde_json", +] + +[[package]] name = "generic-array" version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -4865,6 +4885,35 @@ dependencies = [ ] [[package]] +name = "spdx-expression" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d7ac03c67c572d85049d6db815e20a4a19b41b3d5cca732ac582342021ad77" +dependencies = [ + "nom", + "serde", + "thiserror", + "tracing", +] + +[[package]] +name = "spdx-rs" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3c02f6eb7e7b4100c272f685a9ccaccaab302324e8c7ec3e2ee72340fb29ff3" +dependencies = [ + "chrono", + "log", + "nom", + "serde", + "spdx-expression", + "strum", + "strum_macros", + "thiserror", + "uuid", +] + +[[package]] name = "stable_deref_trait" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -4968,6 +5017,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" + +[[package]] +name = "strum_macros" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] name = "syn" version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -5597,6 +5665,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8772a4ccbb4e89959023bc5b7cb8623a795caa7092d99f3aa9501b9484d4557d" [[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom 0.2.0", +] + +[[package]] name = "valuable" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/Cargo.toml b/Cargo.toml index 13a98eedde8..000c10a1f90 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,8 @@ members = [ "src/tools/bump-stage0", "src/tools/replace-version-placeholder", "src/tools/lld-wrapper", + "src/tools/collect-license-metadata", + "src/tools/generate-copyright", ] exclude = [ diff --git a/config.toml.example b/config.toml.example index c94a27b12a3..ca54cbd2d68 100644 --- a/config.toml.example +++ b/config.toml.example @@ -255,6 +255,16 @@ changelog-seen = 2 # Defaults to the Python interpreter used to execute x.py #python = "python" +# The path to the REUSE executable to use. Note that REUSE is not required in +# most cases, as our tooling relies on a cached (and shrinked) copy of the +# REUSE output present in the git repository and in our source tarballs. +# +# REUSE is only needed if your changes caused the overral licensing of the +# repository to change, and the cached copy has to be regenerated. +# +# Defaults to the "reuse" command in the system path. +#reuse = "reuse" + # Force Cargo to check that Cargo.lock describes the precise dependency # set that all the Cargo.toml files create, instead of updating it. #locked-deps = false diff --git a/src/bootstrap/builder.rs b/src/bootstrap/builder.rs index cff5fd8c5b0..8ee6d49da8f 100644 --- a/src/bootstrap/builder.rs +++ b/src/bootstrap/builder.rs @@ -754,6 +754,8 @@ impl<'a> Builder<'a> { run::BumpStage0, run::ReplaceVersionPlaceholder, run::Miri, + run::CollectLicenseMetadata, + run::GenerateCopyright, ), // These commands either don't use paths, or they're special-cased in Build::build() Kind::Clean | Kind::Format | Kind::Setup => vec![], diff --git a/src/bootstrap/config.rs b/src/bootstrap/config.rs index babf09d2b93..d8c15c76e2d 100644 --- a/src/bootstrap/config.rs +++ b/src/bootstrap/config.rs @@ -213,6 +213,7 @@ pub struct Config { pub npm: Option<PathBuf>, pub gdb: Option<PathBuf>, pub python: Option<PathBuf>, + pub reuse: Option<PathBuf>, pub cargo_native_static: bool, pub configure_args: Vec<String>, @@ -611,6 +612,7 @@ define_config! { nodejs: Option<String> = "nodejs", npm: Option<String> = "npm", python: Option<String> = "python", + reuse: Option<String> = "reuse", locked_deps: Option<bool> = "locked-deps", vendor: Option<bool> = "vendor", full_bootstrap: Option<bool> = "full-bootstrap", @@ -1004,6 +1006,7 @@ impl Config { config.npm = build.npm.map(PathBuf::from); config.gdb = build.gdb.map(PathBuf::from); config.python = build.python.map(PathBuf::from); + config.reuse = build.reuse.map(PathBuf::from); config.submodules = build.submodules; set(&mut config.low_priority, build.low_priority); set(&mut config.compiler_docs, build.compiler_docs); diff --git a/src/bootstrap/run.rs b/src/bootstrap/run.rs index d49b41c5132..05de51f8cc5 100644 --- a/src/bootstrap/run.rs +++ b/src/bootstrap/run.rs @@ -1,3 +1,4 @@ +use std::path::PathBuf; use std::process::Command; use crate::builder::{Builder, RunConfig, ShouldRun, Step}; @@ -189,3 +190,65 @@ impl Step for Miri { builder.run(&mut miri); } } + +#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)] +pub struct CollectLicenseMetadata; + +impl Step for CollectLicenseMetadata { + type Output = PathBuf; + const ONLY_HOSTS: bool = true; + + fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> { + run.path("src/tools/collect-license-metadata") + } + + fn make_run(run: RunConfig<'_>) { + run.builder.ensure(CollectLicenseMetadata); + } + + fn run(self, builder: &Builder<'_>) -> Self::Output { + let Some(reuse) = &builder.config.reuse else { + panic!("REUSE is required to collect the license metadata"); + }; + + // Temporary location, it will be moved to src/etc once it's accurate. + let dest = builder.out.join("license-metadata.json"); + + let mut cmd = builder.tool_cmd(Tool::CollectLicenseMetadata); + cmd.env("REUSE_EXE", reuse); + cmd.env("DEST", &dest); + builder.run(&mut cmd); + + dest + } +} + +#[derive(Debug, PartialOrd, Ord, Copy, Clone, Hash, PartialEq, Eq)] +pub struct GenerateCopyright; + +impl Step for GenerateCopyright { + type Output = PathBuf; + const ONLY_HOSTS: bool = true; + + fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> { + run.path("src/tools/generate-copyright") + } + + fn make_run(run: RunConfig<'_>) { + run.builder.ensure(GenerateCopyright); + } + + fn run(self, builder: &Builder<'_>) -> Self::Output { + let license_metadata = builder.ensure(CollectLicenseMetadata); + + // Temporary location, it will be moved to the proper one once it's accurate. + let dest = builder.out.join("COPYRIGHT.md"); + + let mut cmd = builder.tool_cmd(Tool::GenerateCopyright); + cmd.env("LICENSE_METADATA", &license_metadata); + cmd.env("DEST", &dest); + builder.run(&mut cmd); + + dest + } +} diff --git a/src/bootstrap/sanity.rs b/src/bootstrap/sanity.rs index 631d42acb93..8a40b0f64f4 100644 --- a/src/bootstrap/sanity.rs +++ b/src/bootstrap/sanity.rs @@ -140,6 +140,13 @@ than building it. .map(|p| cmd_finder.must_have(p)) .or_else(|| cmd_finder.maybe_have("gdb")); + build.config.reuse = build + .config + .reuse + .take() + .map(|p| cmd_finder.must_have(p)) + .or_else(|| cmd_finder.maybe_have("reuse")); + // We're gonna build some custom C code here and there, host triples // also build some C++ shims for LLVM so we need a C++ compiler. for target in &build.targets { diff --git a/src/bootstrap/tool.rs b/src/bootstrap/tool.rs index ba329ea6c75..e0be4c432f1 100644 --- a/src/bootstrap/tool.rs +++ b/src/bootstrap/tool.rs @@ -380,6 +380,8 @@ bootstrap_tool!( HtmlChecker, "src/tools/html-checker", "html-checker"; BumpStage0, "src/tools/bump-stage0", "bump-stage0"; ReplaceVersionPlaceholder, "src/tools/replace-version-placeholder", "replace-version-placeholder"; + CollectLicenseMetadata, "src/tools/collect-license-metadata", "collect-license-metadata"; + GenerateCopyright, "src/tools/generate-copyright", "generate-copyright"; ); #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] diff --git a/src/tools/collect-license-metadata/Cargo.toml b/src/tools/collect-license-metadata/Cargo.toml new file mode 100644 index 00000000000..d0820cfc2a0 --- /dev/null +++ b/src/tools/collect-license-metadata/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "collect-license-metadata" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.65" +serde = { version = "1.0.147", features = ["derive"] } +serde_json = "1.0.85" +spdx-rs = "0.5.1" diff --git a/src/tools/collect-license-metadata/src/licenses.rs b/src/tools/collect-license-metadata/src/licenses.rs new file mode 100644 index 00000000000..1c95b1bc8e9 --- /dev/null +++ b/src/tools/collect-license-metadata/src/licenses.rs @@ -0,0 +1,65 @@ +use std::collections::HashMap; + +const COPYRIGHT_PREFIXES: &[&str] = &["SPDX-FileCopyrightText:", "Copyright", "(c)", "(C)", "©"]; + +pub(crate) struct LicensesInterner { + by_id: Vec<License>, + by_struct: HashMap<License, usize>, +} + +impl LicensesInterner { + pub(crate) fn new() -> Self { + LicensesInterner { by_id: Vec::new(), by_struct: HashMap::new() } + } + + pub(crate) fn intern(&mut self, mut license: License) -> LicenseId { + license.simplify(); + if let Some(id) = self.by_struct.get(&license) { + LicenseId(*id) + } else { + let id = self.by_id.len(); + self.by_id.push(license.clone()); + self.by_struct.insert(license, id); + LicenseId(id) + } + } + + pub(crate) fn resolve(&self, id: LicenseId) -> &License { + &self.by_id[id.0] + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, serde::Serialize)] +#[serde(transparent)] +pub(crate) struct LicenseId(usize); + +#[derive(Clone, Hash, PartialEq, Eq, serde::Serialize)] +pub(crate) struct License { + pub(crate) spdx: String, + pub(crate) copyright: Vec<String>, +} + +impl License { + fn simplify(&mut self) { + self.remove_copyright_prefixes(); + self.copyright.sort(); + self.copyright.dedup(); + } + + fn remove_copyright_prefixes(&mut self) { + for copyright in &mut self.copyright { + let mut stripped = copyright.trim(); + let mut previous_stripped; + loop { + previous_stripped = stripped; + for pattern in COPYRIGHT_PREFIXES { + stripped = stripped.trim_start_matches(pattern).trim_start(); + } + if stripped == previous_stripped { + break; + } + } + *copyright = stripped.into(); + } + } +} diff --git a/src/tools/collect-license-metadata/src/main.rs b/src/tools/collect-license-metadata/src/main.rs new file mode 100644 index 00000000000..ca2a6f4b8c8 --- /dev/null +++ b/src/tools/collect-license-metadata/src/main.rs @@ -0,0 +1,30 @@ +mod licenses; +mod path_tree; +mod reuse; + +use crate::licenses::LicensesInterner; +use anyhow::Error; +use std::path::PathBuf; + +fn main() -> Result<(), Error> { + let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into(); + let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into(); + + let mut interner = LicensesInterner::new(); + let paths = crate::reuse::collect(&reuse_exe, &mut interner)?; + + let mut tree = crate::path_tree::build(paths); + tree.simplify(); + + if let Some(parent) = dest.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write( + &dest, + &serde_json::to_vec_pretty(&serde_json::json!({ + "files": crate::path_tree::expand_interned_licenses(tree, &interner), + }))?, + )?; + + Ok(()) +} diff --git a/src/tools/collect-license-metadata/src/path_tree.rs b/src/tools/collect-license-metadata/src/path_tree.rs new file mode 100644 index 00000000000..133ff683737 --- /dev/null +++ b/src/tools/collect-license-metadata/src/path_tree.rs @@ -0,0 +1,294 @@ +//! Tools like REUSE output per-file licensing information, but we need to condense it in the +//! minimum amount of data that still represents the same licensing metadata. This module is +//! responsible for that, by turning the list of paths into a tree and executing simplification +//! passes over the tree to remove redundant information. + +use crate::licenses::{License, LicenseId, LicensesInterner}; +use std::collections::BTreeMap; +use std::path::{Path, PathBuf}; + +#[derive(serde::Serialize)] +#[serde(rename_all = "kebab-case", tag = "type")] +pub(crate) enum Node<L> { + Root { childs: Vec<Node<L>> }, + Directory { name: PathBuf, childs: Vec<Node<L>>, license: Option<L> }, + File { name: PathBuf, license: L }, + FileGroup { names: Vec<PathBuf>, license: L }, + Empty, +} + +impl Node<LicenseId> { + pub(crate) fn simplify(&mut self) { + self.merge_directories(); + self.collapse_in_licensed_directories(); + self.merge_directory_licenses(); + self.merge_file_groups(); + self.remove_empty(); + } + + /// Initially, the build() function constructs a list of separate paths from the file + /// system root down to each file, like so: + /// + /// ```text + /// ┌─► ./ ──► compiler/ ──► rustc/ ──► src/ ──► main.rs + /// │ + /// <root> ─┼─► ./ ──► compiler/ ──► rustc/ ──► Cargo.toml + /// │ + /// └─► ./ ──► library/ ───► std/ ──► Cargo.toml + /// ``` + /// + /// This pass is responsible for turning that into a proper directory tree: + /// + /// ```text + /// ┌─► compiler/ ──► rustc/ ──┬─► src/ ──► main.rs + /// │ │ + /// <root> ──► ./ ──┤ └─► Cargo.toml + /// │ + /// └─► library/ ───► std/ ──► Cargo.toml + /// ``` + fn merge_directories(&mut self) { + match self { + Node::Root { childs } | Node::Directory { childs, license: None, .. } => { + let mut directories = BTreeMap::new(); + let mut files = Vec::new(); + + for child in childs.drain(..) { + match child { + Node::Directory { name, mut childs, license: None } => { + directories.entry(name).or_insert_with(Vec::new).append(&mut childs); + } + file @ Node::File { .. } => { + files.push(file); + } + Node::Empty => {} + Node::Root { .. } => { + panic!("can't have a root inside another element"); + } + Node::FileGroup { .. } => { + panic!("FileGroup should not be present at this stage"); + } + Node::Directory { license: Some(_), .. } => { + panic!("license should not be set at this stage"); + } + } + } + + childs.extend(directories.into_iter().map(|(name, childs)| Node::Directory { + name, + childs, + license: None, + })); + childs.append(&mut files); + + for child in &mut *childs { + child.merge_directories(); + } + } + Node::Empty => {} + Node::File { .. } => {} + Node::FileGroup { .. } => { + panic!("FileGroup should not be present at this stage"); + } + Node::Directory { license: Some(_), .. } => { + panic!("license should not be set at this stage"); + } + } + } + + /// In our codebase, most files in a directory have the same license as the other files in that + /// same directory, so it's redundant to store licensing metadata for all the files. Instead, + /// we can add a license for a whole directory, and only record the exceptions to a directory + /// licensing metadata. + /// + /// We cannot instead record only the difference to Rust's standard licensing, as the majority + /// of the files in our repository are *not* licensed under Rust's standard licensing due to + /// our inclusion of LLVM. + fn collapse_in_licensed_directories(&mut self) { + match self { + Node::Directory { childs, license, .. } => { + for child in &mut *childs { + child.collapse_in_licensed_directories(); + } + + let mut licenses_count = BTreeMap::new(); + for child in &*childs { + let Some(license) = child.license() else { continue }; + *licenses_count.entry(license).or_insert(0) += 1; + } + + let most_popular_license = licenses_count + .into_iter() + .max_by_key(|(_, count)| *count) + .map(|(license, _)| license); + + if let Some(most_popular_license) = most_popular_license { + childs.retain(|child| child.license() != Some(most_popular_license)); + *license = Some(most_popular_license); + } + } + Node::Root { childs } => { + for child in &mut *childs { + child.collapse_in_licensed_directories(); + } + } + Node::File { .. } => {} + Node::FileGroup { .. } => {} + Node::Empty => {} + } + } + + /// Reduce the depth of the tree by merging subdirectories with the same license as their + /// parent directory into their parent, and adjusting the paths of the childs accordingly. + fn merge_directory_licenses(&mut self) { + match self { + Node::Root { childs } => { + for child in &mut *childs { + child.merge_directory_licenses(); + } + } + Node::Directory { childs, license, .. } => { + let mut to_add = Vec::new(); + for child in &mut *childs { + child.merge_directory_licenses(); + + let Node::Directory { + name: child_name, + childs: child_childs, + license: child_license, + } = child else { continue }; + + if child_license != license { + continue; + } + for mut child_child in child_childs.drain(..) { + match &mut child_child { + Node::Root { .. } => { + panic!("can't have a root inside another element"); + } + Node::FileGroup { .. } => { + panic!("FileGroup should not be present at this stage"); + } + Node::Directory { name: child_child_name, .. } => { + *child_child_name = child_name.join(&child_child_name); + } + Node::File { name: child_child_name, .. } => { + *child_child_name = child_name.join(&child_child_name); + } + Node::Empty => {} + } + to_add.push(child_child); + } + + *child = Node::Empty; + } + childs.append(&mut to_add); + } + Node::Empty => {} + Node::File { .. } => {} + Node::FileGroup { .. } => {} + } + } + + /// This pass groups multiple files in a directory with the same license into a single + /// "FileGroup", so that the license of all those files can be reported as a group. + /// + /// Crucially this pass runs after collapse_in_licensed_directories, so the most common license + /// will already be marked as the directory's license and won't be turned into a group. + fn merge_file_groups(&mut self) { + match self { + Node::Root { childs } | Node::Directory { childs, .. } => { + let mut grouped = BTreeMap::new(); + + for child in &mut *childs { + child.merge_file_groups(); + if let Node::File { name, license } = child { + grouped.entry(*license).or_insert_with(Vec::new).push(name.clone()); + *child = Node::Empty; + } + } + + for (license, mut names) in grouped.into_iter() { + if names.len() == 1 { + childs.push(Node::File { license, name: names.pop().unwrap() }); + } else { + childs.push(Node::FileGroup { license, names }); + } + } + } + Node::File { .. } => {} + Node::FileGroup { .. } => panic!("FileGroup should not be present at this stage"), + Node::Empty => {} + } + } + + /// Some nodes were replaced with Node::Empty to mark them for deletion. As the last step, make + /// sure to remove them from the tree. + fn remove_empty(&mut self) { + match self { + Node::Root { childs } | Node::Directory { childs, .. } => { + for child in &mut *childs { + child.remove_empty(); + } + childs.retain(|child| !matches!(child, Node::Empty)); + } + Node::FileGroup { .. } => {} + Node::File { .. } => {} + Node::Empty => {} + } + } + + fn license(&self) -> Option<LicenseId> { + match self { + Node::Directory { childs, license: Some(license), .. } if childs.is_empty() => { + Some(*license) + } + Node::File { license, .. } => Some(*license), + _ => None, + } + } +} + +pub(crate) fn build(mut input: Vec<(PathBuf, LicenseId)>) -> Node<LicenseId> { + let mut childs = Vec::new(); + + // Ensure reproducibility of all future steps. + input.sort(); + + for (path, license) in input { + let mut node = Node::File { name: path.file_name().unwrap().into(), license }; + for component in path.parent().unwrap_or_else(|| Path::new(".")).components().rev() { + node = Node::Directory { + name: component.as_os_str().into(), + childs: vec![node], + license: None, + }; + } + + childs.push(node); + } + + Node::Root { childs } +} + +/// Convert a `Node<LicenseId>` into a `Node<&License>`, expanding all interned license IDs with a +/// reference to the actual license metadata. +pub(crate) fn expand_interned_licenses( + node: Node<LicenseId>, + interner: &LicensesInterner, +) -> Node<&License> { + match node { + Node::Root { childs } => Node::Root { + childs: childs.into_iter().map(|child| strip_interning(child, interner)).collect(), + }, + Node::Directory { name, childs, license } => Node::Directory { + childs: childs.into_iter().map(|child| strip_interning(child, interner)).collect(), + license: license.map(|license| interner.resolve(license)), + name, + }, + Node::File { name, license } => Node::File { name, license: interner.resolve(license) }, + Node::FileGroup { names, license } => { + Node::FileGroup { names, license: interner.resolve(license) } + } + Node::Empty => Node::Empty, + } +} diff --git a/src/tools/collect-license-metadata/src/reuse.rs b/src/tools/collect-license-metadata/src/reuse.rs new file mode 100644 index 00000000000..d6b3772ba51 --- /dev/null +++ b/src/tools/collect-license-metadata/src/reuse.rs @@ -0,0 +1,49 @@ +use crate::licenses::{License, LicenseId, LicensesInterner}; +use anyhow::Error; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::time::Instant; + +pub(crate) fn collect( + reuse_exe: &Path, + interner: &mut LicensesInterner, +) -> Result<Vec<(PathBuf, LicenseId)>, Error> { + eprintln!("gathering license information from REUSE"); + let start = Instant::now(); + let raw = &obtain_spdx_document(reuse_exe)?; + eprintln!("finished gathering the license information from REUSE in {:.2?}", start.elapsed()); + + let document = spdx_rs::parsers::spdx_from_tag_value(&raw)?; + + let mut result = Vec::new(); + for file in document.file_information { + let license = interner.intern(License { + spdx: file.concluded_license.to_string(), + copyright: file.copyright_text.split('\n').map(|s| s.into()).collect(), + }); + + result.push((file.file_name.into(), license)); + } + + Ok(result) +} + +fn obtain_spdx_document(reuse_exe: &Path) -> Result<String, Error> { + let output = Command::new(reuse_exe) + .args(&["spdx", "--add-license-concluded", "--creator-person=bors"]) + .stdout(Stdio::piped()) + .spawn()? + .wait_with_output()?; + + if !output.status.success() { + eprintln!(); + eprintln!("Note that Rust requires some REUSE features that might not be present in the"); + eprintln!("release you're using. Make sure your REUSE release includes these PRs:"); + eprintln!(); + eprintln!(" - https://github.com/fsfe/reuse-tool/pull/623"); + eprintln!(); + anyhow::bail!("collecting licensing information with REUSE failed"); + } + + Ok(String::from_utf8(output.stdout)?) +} diff --git a/src/tools/generate-copyright/Cargo.toml b/src/tools/generate-copyright/Cargo.toml new file mode 100644 index 00000000000..899ef0f8a6c --- /dev/null +++ b/src/tools/generate-copyright/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "generate-copyright" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.65" +serde = { version = "1.0.147", features = ["derive"] } +serde_json = "1.0.85" diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs new file mode 100644 index 00000000000..d172c9e157b --- /dev/null +++ b/src/tools/generate-copyright/src/main.rs @@ -0,0 +1,94 @@ +use anyhow::Error; +use std::io::Write; +use std::path::PathBuf; + +fn main() -> Result<(), Error> { + let dest = env_path("DEST")?; + let license_metadata = env_path("LICENSE_METADATA")?; + + let metadata: Metadata = serde_json::from_slice(&std::fs::read(&license_metadata)?)?; + + let mut buffer = Vec::new(); + render_recursive(&metadata.files, &mut buffer, 0)?; + + std::fs::write(&dest, &buffer)?; + + Ok(()) +} + +fn render_recursive(node: &Node, buffer: &mut Vec<u8>, depth: usize) -> Result<(), Error> { + let prefix = std::iter::repeat("> ").take(depth + 1).collect::<String>(); + + match node { + Node::Root { childs } => { + for child in childs { + render_recursive(child, buffer, depth)?; + } + } + Node::Directory { name, childs, license } => { + render_license(&prefix, std::iter::once(name), license, buffer)?; + if !childs.is_empty() { + writeln!(buffer, "{prefix}")?; + writeln!(buffer, "{prefix}*Exceptions:*")?; + for child in childs { + writeln!(buffer, "{prefix}")?; + render_recursive(child, buffer, depth + 1)?; + } + } + } + Node::FileGroup { names, license } => { + render_license(&prefix, names.iter(), license, buffer)?; + } + Node::File { name, license } => { + render_license(&prefix, std::iter::once(name), license, buffer)?; + } + } + + Ok(()) +} + +fn render_license<'a>( + prefix: &str, + names: impl Iterator<Item = &'a String>, + license: &License, + buffer: &mut Vec<u8>, +) -> Result<(), Error> { + for name in names { + writeln!(buffer, "{prefix}**`{name}`** ")?; + } + writeln!(buffer, "{prefix}License: `{}` ", license.spdx)?; + for (i, copyright) in license.copyright.iter().enumerate() { + let suffix = if i == license.copyright.len() - 1 { "" } else { " " }; + writeln!(buffer, "{prefix}Copyright: {copyright}{suffix}")?; + } + + Ok(()) +} + +#[derive(serde::Deserialize)] +struct Metadata { + files: Node, +} + +#[derive(serde::Deserialize)] +#[serde(rename_all = "kebab-case", tag = "type")] +pub(crate) enum Node { + Root { childs: Vec<Node> }, + Directory { name: String, childs: Vec<Node>, license: License }, + File { name: String, license: License }, + FileGroup { names: Vec<String>, license: License }, +} + +#[derive(serde::Deserialize)] +struct License { + spdx: String, + copyright: Vec<String>, +} + +fn env_path(var: &str) -> Result<PathBuf, Error> { + if let Some(var) = std::env::var_os(var) { + Ok(var.into()) + } else { + anyhow::bail!("missing environment variable {var}") + } +} |
