diff options
| -rw-r--r-- | src/tools/collect-license-metadata/src/licenses.rs | 30 |
1 files changed, 29 insertions, 1 deletions
diff --git a/src/tools/collect-license-metadata/src/licenses.rs b/src/tools/collect-license-metadata/src/licenses.rs index 34aabc87301..1c95b1bc8e9 100644 --- a/src/tools/collect-license-metadata/src/licenses.rs +++ b/src/tools/collect-license-metadata/src/licenses.rs @@ -1,5 +1,7 @@ use std::collections::HashMap; +const COPYRIGHT_PREFIXES: &[&str] = &["SPDX-FileCopyrightText:", "Copyright", "(c)", "(C)", "©"]; + pub(crate) struct LicensesInterner { by_id: Vec<License>, by_struct: HashMap<License, usize>, @@ -10,7 +12,8 @@ impl LicensesInterner { LicensesInterner { by_id: Vec::new(), by_struct: HashMap::new() } } - pub(crate) fn intern(&mut self, license: License) -> LicenseId { + pub(crate) fn intern(&mut self, mut license: License) -> LicenseId { + license.simplify(); if let Some(id) = self.by_struct.get(&license) { LicenseId(*id) } else { @@ -35,3 +38,28 @@ pub(crate) struct License { pub(crate) spdx: String, pub(crate) copyright: Vec<String>, } + +impl License { + fn simplify(&mut self) { + self.remove_copyright_prefixes(); + self.copyright.sort(); + self.copyright.dedup(); + } + + fn remove_copyright_prefixes(&mut self) { + for copyright in &mut self.copyright { + let mut stripped = copyright.trim(); + let mut previous_stripped; + loop { + previous_stripped = stripped; + for pattern in COPYRIGHT_PREFIXES { + stripped = stripped.trim_start_matches(pattern).trim_start(); + } + if stripped == previous_stripped { + break; + } + } + *copyright = stripped.into(); + } + } +} |
