about summary refs log tree commit diff
diff options
context:
space:
mode:
authorPietro Albini <pietro.albini@ferrous-systems.com>2022-11-17 11:05:27 +0100
committerPietro Albini <pietro.albini@ferrous-systems.com>2022-11-17 11:25:00 +0100
commit51d5ae63d0845322cd274a67a504513df929b310 (patch)
tree337e9a59ecedba60a9fa7f8c7ccd2aab128c7243
parent4af7de13d26aabe9bc459771bd5641c140407954 (diff)
downloadrust-51d5ae63d0845322cd274a67a504513df929b310.tar.gz
rust-51d5ae63d0845322cd274a67a504513df929b310.zip
merge together similar copyright statements
-rw-r--r--src/tools/collect-license-metadata/src/licenses.rs30
1 files changed, 29 insertions, 1 deletions
diff --git a/src/tools/collect-license-metadata/src/licenses.rs b/src/tools/collect-license-metadata/src/licenses.rs
index 34aabc87301..1c95b1bc8e9 100644
--- a/src/tools/collect-license-metadata/src/licenses.rs
+++ b/src/tools/collect-license-metadata/src/licenses.rs
@@ -1,5 +1,7 @@
 use std::collections::HashMap;
 
+const COPYRIGHT_PREFIXES: &[&str] = &["SPDX-FileCopyrightText:", "Copyright", "(c)", "(C)", "©"];
+
 pub(crate) struct LicensesInterner {
     by_id: Vec<License>,
     by_struct: HashMap<License, usize>,
@@ -10,7 +12,8 @@ impl LicensesInterner {
         LicensesInterner { by_id: Vec::new(), by_struct: HashMap::new() }
     }
 
-    pub(crate) fn intern(&mut self, license: License) -> LicenseId {
+    pub(crate) fn intern(&mut self, mut license: License) -> LicenseId {
+        license.simplify();
         if let Some(id) = self.by_struct.get(&license) {
             LicenseId(*id)
         } else {
@@ -35,3 +38,28 @@ pub(crate) struct License {
     pub(crate) spdx: String,
     pub(crate) copyright: Vec<String>,
 }
+
+impl License {
+    fn simplify(&mut self) {
+        self.remove_copyright_prefixes();
+        self.copyright.sort();
+        self.copyright.dedup();
+    }
+
+    fn remove_copyright_prefixes(&mut self) {
+        for copyright in &mut self.copyright {
+            let mut stripped = copyright.trim();
+            let mut previous_stripped;
+            loop {
+                previous_stripped = stripped;
+                for pattern in COPYRIGHT_PREFIXES {
+                    stripped = stripped.trim_start_matches(pattern).trim_start();
+                }
+                if stripped == previous_stripped {
+                    break;
+                }
+            }
+            *copyright = stripped.into();
+        }
+    }
+}