From 16a78526cedfdb9af5d6befce3c77436aabc9a2f Mon Sep 17 00:00:00 2001
From: Jonathan Pallant <jonathan.pallant@ferrous-systems.com>
Date: Mon, 25 Nov 2024 11:26:47 +0000
Subject: [PATCH 1/4] generate-copyright: Ensure output has UNIX line-endings
 for consistency.

---
 src/tools/generate-copyright/src/main.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs
index f9d96b594626..f83d16d0cabf 100644
--- a/src/tools/generate-copyright/src/main.rs
+++ b/src/tools/generate-copyright/src/main.rs
@@ -57,6 +57,10 @@ fn main() -> Result<(), Error> {
         dependencies: collected_cargo_metadata,
     };
     let output = template.render()?;
+    // Git stores text files with \n, but this file may contain \r\n in files
+    // copied from dependencies. Normalise them before we write them out, for
+    // consistency.
+    let output = output.replace("\r\n", "\n");
     std::fs::write(&dest_file, output)?;
 
     // Output libstd subset file
@@ -65,6 +69,10 @@ fn main() -> Result<(), Error> {
         dependencies: library_collected_cargo_metadata,
     };
     let output = template.render()?;
+    // Git stores text files with \n, but this file may contain \r\n in files
+    // copied from dependencies. Normalise them before we write them out, for
+    // consistency.
+    let output = output.replace("\r\n", "\n");
     std::fs::write(&libstd_dest_file, output)?;
 
     Ok(())

From 03cdaeed972c730912713ed3151f353367a87561 Mon Sep 17 00:00:00 2001
From: Jonathan Pallant <jonathan.pallant@ferrous-systems.com>
Date: Mon, 25 Nov 2024 14:14:57 +0000
Subject: [PATCH 2/4] collect-license-metadata: move JSON to root, and add a
 'check' mode

---
 REUSE.toml                                    |   1 +
 license-metadata.json                         | 269 ++++++++++++++++++
 src/bootstrap/src/core/build_steps/run.rs     |   3 +-
 src/bootstrap/src/core/build_steps/test.rs    |  32 +++
 src/bootstrap/src/core/builder/mod.rs         |   1 +
 .../collect-license-metadata/src/main.rs      |  39 ++-
 .../collect-license-metadata/src/reuse.rs     |   4 +-
 7 files changed, 335 insertions(+), 14 deletions(-)
 create mode 100644 license-metadata.json

diff --git a/REUSE.toml b/REUSE.toml
index 3a11c37854e6..6b16d97ed806 100644
--- a/REUSE.toml
+++ b/REUSE.toml
@@ -28,6 +28,7 @@ path = [
     "COPYRIGHT",
     "INSTALL.md",
     "LICENSE-APACHE",
+    "license-metadata.json",
     "LICENSE-MIT",
     "README.md",
     "RELEASES.md",
diff --git a/license-metadata.json b/license-metadata.json
new file mode 100644
index 000000000000..09cc36935658
--- /dev/null
+++ b/license-metadata.json
@@ -0,0 +1,269 @@
+{
+  "files": {
+    "children": [
+      {
+        "children": [
+          {
+            "children": [
+              {
+                "children": [
+                  {
+                    "children": [
+                      {
+                        "directories": [],
+                        "files": [
+                          "analyzer-decls.h",
+                          "malloc-macro.h"
+                        ],
+                        "license": {
+                          "copyright": [
+                            "2000-2024 Free Software Foundation, Inc"
+                          ],
+                          "spdx": "GPL-2.0-only"
+                        },
+                        "type": "group"
+                      }
+                    ],
+                    "license": {
+                      "copyright": [
+                        "2007-2011 Atheros Communications Inc",
+                        "2011-2012,2017 Qualcomm Atheros, Inc",
+                        "2016-2017 Erik Stromdahl <erik.stromdahl@gmail.com>"
+                      ],
+                      "spdx": "ISC"
+                    },
+                    "name": "c-c++-common/analyzer",
+                    "type": "directory"
+                  }
+                ],
+                "license": {
+                  "copyright": [
+                    "2000-2024 Free Software Foundation, Inc"
+                  ],
+                  "spdx": "GPL-2.0-only"
+                },
+                "name": "gcc/testsuite",
+                "type": "directory"
+              },
+              {
+                "license": {
+                  "copyright": [
+                    "2000-2024 Free Software Foundation, Inc"
+                  ],
+                  "spdx": "GCC-exception-3.1"
+                },
+                "name": "libstdc++-v3/config/os/aix/os_defines.h",
+                "type": "file"
+              }
+            ],
+            "license": {
+              "copyright": [
+                "1997-2024 Free Software Foundation, Inc"
+              ],
+              "spdx": "GPL-3.0-or-later"
+            },
+            "name": "src/gcc",
+            "type": "directory"
+          },
+          {
+            "children": [
+              {
+                "license": {
+                  "copyright": [
+                    "The Rust Project Developers (see https://thanks.rust-lang.org)"
+                  ],
+                  "spdx": "Apache-2.0 OR MIT"
+                },
+                "name": "noscript.css",
+                "type": "file"
+              },
+              {
+                "license": {
+                  "copyright": [
+                    "Nicolas Gallagher and Jonathan Neal"
+                  ],
+                  "spdx": "MIT"
+                },
+                "name": "normalize.css",
+                "type": "file"
+              }
+            ],
+            "license": {
+              "copyright": [
+                "2016 Ike Ku, Jessica Stokes and Leon Guan",
+                "The Rust Project Developers (see https://thanks.rust-lang.org)"
+              ],
+              "spdx": "Apache-2.0 OR MIT"
+            },
+            "name": "src/librustdoc/html/static/css",
+            "type": "directory"
+          },
+          {
+            "children": [
+              {
+                "license": {
+                  "copyright": [
+                    "The Rust Project Developers (see https://thanks.rust-lang.org)"
+                  ],
+                  "spdx": "Apache-2.0 OR MIT"
+                },
+                "name": "README.txt",
+                "type": "file"
+              },
+              {
+                "directories": [],
+                "files": [
+                  "FiraSans-LICENSE.txt",
+                  "FiraSans-Medium.woff2",
+                  "FiraSans-Regular.woff2"
+                ],
+                "license": {
+                  "copyright": [
+                    "2014, Mozilla Foundation",
+                    "2014, Telefonica S.A"
+                  ],
+                  "spdx": "OFL-1.1"
+                },
+                "type": "group"
+              },
+              {
+                "directories": [],
+                "files": [
+                  "NanumBarunGothic-LICENSE.txt",
+                  "NanumBarunGothic.ttf.woff2"
+                ],
+                "license": {
+                  "copyright": [
+                    "2010 NAVER Corporation"
+                  ],
+                  "spdx": "OFL-1.1"
+                },
+                "type": "group"
+              }
+            ],
+            "license": {
+              "copyright": [
+                "2010, 2012, 2014-2023, Adobe Systems Incorporated"
+              ],
+              "spdx": "OFL-1.1"
+            },
+            "name": "src/librustdoc/html/static/fonts",
+            "type": "directory"
+          },
+          {
+            "license": {
+              "copyright": [
+                "2003-2019 University of Illinois at Urbana-Champaign",
+                "The Rust Project Developers (see https://thanks.rust-lang.org)"
+              ],
+              "spdx": "Apache-2.0 WITH LLVM-exception AND (Apache-2.0 OR MIT)"
+            },
+            "name": "compiler/rustc_llvm/llvm-wrapper/SymbolWrapper.cpp",
+            "type": "file"
+          },
+          {
+            "children": [],
+            "license": {
+              "copyright": [
+                "2014 Alex Crichton",
+                "The Rust Project Developers (see https://thanks.rust-lang.org)"
+              ],
+              "spdx": "Apache-2.0 OR MIT"
+            },
+            "name": "library/backtrace",
+            "type": "directory"
+          },
+          {
+            "license": {
+              "copyright": [
+                "1991-2024 Unicode, Inc"
+              ],
+              "spdx": "Unicode-3.0"
+            },
+            "name": "library/core/src/unicode/unicode_data.rs",
+            "type": "file"
+          },
+          {
+            "children": [],
+            "license": {
+              "copyright": [
+                "2019 The Crossbeam Project Developers",
+                "The Rust Project Developers (see https://thanks.rust-lang.org)"
+              ],
+              "spdx": "Apache-2.0 OR MIT"
+            },
+            "name": "library/std/src/sync/mpmc",
+            "type": "directory"
+          },
+          {
+            "license": {
+              "copyright": [
+                "2016 The Fuchsia Authors",
+                "The Rust Project Developers (see https://thanks.rust-lang.org)"
+              ],
+              "spdx": "BSD-2-Clause AND (Apache-2.0 OR MIT)"
+            },
+            "name": "library/std/src/sys/sync/mutex/fuchsia.rs",
+            "type": "file"
+          },
+          {
+            "children": [],
+            "license": {
+              "copyright": [
+                "Rust on Embedded Devices Working Group",
+                "The Rust Project Developers (see https://thanks.rust-lang.org)"
+              ],
+              "spdx": "Apache-2.0 OR CC-BY-SA-4.0 OR MIT"
+            },
+            "name": "src/doc/embedded-book",
+            "type": "directory"
+          },
+          {
+            "children": [],
+            "license": {
+              "copyright": [
+                "2014 Jorge Aparicio",
+                "The Rust Project Developers (see https://thanks.rust-lang.org)"
+              ],
+              "spdx": "Apache-2.0 OR MIT"
+            },
+            "name": "src/doc/rust-by-example",
+            "type": "directory"
+          },
+          {
+            "license": {
+              "copyright": [
+                "2014-2021 Knut Sveidqvist"
+              ],
+              "spdx": "MIT"
+            },
+            "name": "src/doc/rustc-dev-guide/mermaid.min.js",
+            "type": "file"
+          },
+          {
+            "children": [],
+            "license": {
+              "copyright": [
+                "2003-2019 University of Illinois at Urbana-Champaign",
+                "2003-2019 by the contributors listed in CREDITS.TXT (https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)",
+                "2010 Apple Inc"
+              ],
+              "spdx": "Apache-2.0 WITH LLVM-exception AND NCSA"
+            },
+            "name": "src/llvm-project",
+            "type": "directory"
+          }
+        ],
+        "license": {
+          "copyright": [
+            "The Rust Project Developers (see https://thanks.rust-lang.org)"
+          ],
+          "spdx": "Apache-2.0 OR MIT"
+        },
+        "name": ".",
+        "type": "directory"
+      }
+    ],
+    "type": "root"
+  }
+}
\ No newline at end of file
diff --git a/src/bootstrap/src/core/build_steps/run.rs b/src/bootstrap/src/core/build_steps/run.rs
index 1a0a90564e6a..4052de9a240c 100644
--- a/src/bootstrap/src/core/build_steps/run.rs
+++ b/src/bootstrap/src/core/build_steps/run.rs
@@ -181,8 +181,7 @@ impl Step for CollectLicenseMetadata {
             panic!("REUSE is required to collect the license metadata");
         };
 
-        // Temporary location, it will be moved to src/etc once it's accurate.
-        let dest = builder.out.join("license-metadata.json");
+        let dest = builder.src.join("license-metadata.json");
 
         let mut cmd = builder.tool_cmd(Tool::CollectLicenseMetadata);
         cmd.env("REUSE_EXE", reuse);
diff --git a/src/bootstrap/src/core/build_steps/test.rs b/src/bootstrap/src/core/build_steps/test.rs
index 1cabd1c39f19..58d55d65aea2 100644
--- a/src/bootstrap/src/core/build_steps/test.rs
+++ b/src/bootstrap/src/core/build_steps/test.rs
@@ -3675,3 +3675,35 @@ impl Step for TestFloatParse {
         cargo_run.into_cmd().run(builder);
     }
 }
+
+#[derive(Debug, PartialOrd, Ord, Clone, Hash, PartialEq, Eq)]
+pub struct CollectLicenseMetadata;
+
+impl Step for CollectLicenseMetadata {
+    type Output = PathBuf;
+    const ONLY_HOSTS: bool = true;
+
+    fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
+        run.path("src/tools/collect-license-metadata")
+    }
+
+    fn make_run(run: RunConfig<'_>) {
+        run.builder.ensure(CollectLicenseMetadata);
+    }
+
+    fn run(self, builder: &Builder<'_>) -> Self::Output {
+        let Some(reuse) = &builder.config.reuse else {
+            panic!("REUSE is required to collect the license metadata");
+        };
+
+        let dest = builder.src.join("license-metadata.json");
+
+        let mut cmd = builder.tool_cmd(Tool::CollectLicenseMetadata);
+        cmd.env("REUSE_EXE", reuse);
+        cmd.env("DEST", &dest);
+        cmd.env("ONLY_CHECK", "1");
+        cmd.run(builder);
+
+        dest
+    }
+}
diff --git a/src/bootstrap/src/core/builder/mod.rs b/src/bootstrap/src/core/builder/mod.rs
index d59e0fa72880..e6902bb8cee5 100644
--- a/src/bootstrap/src/core/builder/mod.rs
+++ b/src/bootstrap/src/core/builder/mod.rs
@@ -915,6 +915,7 @@ impl<'a> Builder<'a> {
                 test::HtmlCheck,
                 test::RustInstaller,
                 test::TestFloatParse,
+                test::CollectLicenseMetadata,
                 // Run bootstrap close to the end as it's unlikely to fail
                 test::Bootstrap,
                 // Run run-make last, since these won't pass without make on Windows
diff --git a/src/tools/collect-license-metadata/src/main.rs b/src/tools/collect-license-metadata/src/main.rs
index dce36bb17b60..08a30d0b8994 100644
--- a/src/tools/collect-license-metadata/src/main.rs
+++ b/src/tools/collect-license-metadata/src/main.rs
@@ -4,7 +4,7 @@ mod reuse;
 
 use std::path::PathBuf;
 
-use anyhow::Error;
+use anyhow::{Context, Error};
 
 use crate::licenses::LicensesInterner;
 
@@ -12,10 +12,12 @@ use crate::licenses::LicensesInterner;
 ///
 /// You should probably let `bootstrap` execute this program instead of running it directly.
 ///
-/// Run `x.py run collect-license-metadata`
+/// * Run `x.py run collect-license-metadata` to re-regenerate the file.
+/// * Run `x.py test collect-license-metadata` to check if the file you have is correct.
 fn main() -> Result<(), Error> {
     let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
     let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();
+    let only_check = std::env::var_os("ONLY_CHECK").is_some();
 
     let mut interner = LicensesInterner::new();
     let paths = crate::reuse::collect(&reuse_exe, &mut interner)?;
@@ -23,15 +25,32 @@ fn main() -> Result<(), Error> {
     let mut tree = crate::path_tree::build(paths);
     tree.simplify();
 
-    if let Some(parent) = dest.parent() {
-        std::fs::create_dir_all(parent)?;
+    let output = serde_json::json!({
+        "files": crate::path_tree::expand_interned_licenses(tree, &interner)
+    });
+
+    if only_check {
+        println!("loading existing license information");
+        let existing = std::fs::read_to_string(&dest).with_context(|| {
+            format!("Failed to read existing license JSON at {}", dest.display())
+        })?;
+        let existing_json: serde_json::Value =
+            serde_json::from_str(&existing).with_context(|| {
+                format!("Failed to read existing license JSON at {}", dest.display())
+            })?;
+        if existing_json != output {
+            eprintln!("The existing {} file is out of date.", dest.display());
+            eprintln!("Run ./x run collect-license-metadata to update it.");
+            anyhow::bail!("The existing {} file doesn't match what REUSE reports.", dest.display());
+        }
+        println!("license information matches");
+    } else {
+        if let Some(parent) = dest.parent() {
+            std::fs::create_dir_all(parent)?;
+        }
+        std::fs::write(&dest, &serde_json::to_vec_pretty(&output)?)?;
+        println!("license information written to {}", dest.display());
     }
-    std::fs::write(
-        &dest,
-        &serde_json::to_vec_pretty(&serde_json::json!({
-            "files": crate::path_tree::expand_interned_licenses(tree, &interner),
-        }))?,
-    )?;
 
     Ok(())
 }
diff --git a/src/tools/collect-license-metadata/src/reuse.rs b/src/tools/collect-license-metadata/src/reuse.rs
index e5ee8f0da5ef..dbe46781b7c5 100644
--- a/src/tools/collect-license-metadata/src/reuse.rs
+++ b/src/tools/collect-license-metadata/src/reuse.rs
@@ -10,10 +10,10 @@ pub(crate) fn collect(
     reuse_exe: &Path,
     interner: &mut LicensesInterner,
 ) -> Result<Vec<(PathBuf, LicenseId)>, Error> {
-    eprintln!("gathering license information from REUSE");
+    println!("gathering license information from REUSE (this might take a minute...)");
     let start = Instant::now();
     let raw = &obtain_spdx_document(reuse_exe)?;
-    eprintln!("finished gathering the license information from REUSE in {:.2?}", start.elapsed());
+    println!("finished gathering the license information from REUSE in {:.2?}", start.elapsed());
 
     let document = spdx_rs::parsers::spdx_from_tag_value(&raw)?;
 

From 587369b95e2e5d99b5d808180b90542a6b38096a Mon Sep 17 00:00:00 2001
From: Jonathan Pallant <jonathan.pallant@ferrous-systems.com>
Date: Mon, 25 Nov 2024 14:15:08 +0000
Subject: [PATCH 3/4] Run the license-metadata check in CI.

This will tell you if license-metadata.json is out of date.
---
 src/ci/docker/host-x86_64/mingw-check/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ci/docker/host-x86_64/mingw-check/Dockerfile b/src/ci/docker/host-x86_64/mingw-check/Dockerfile
index f0afb570cc4a..d408cd518a00 100644
--- a/src/ci/docker/host-x86_64/mingw-check/Dockerfile
+++ b/src/ci/docker/host-x86_64/mingw-check/Dockerfile
@@ -63,6 +63,7 @@ ENV SCRIPT \
            /scripts/validate-toolstate.sh && \
            /scripts/validate-error-codes.sh && \
            reuse --include-submodules lint && \
+           python3 ../x.py test collect-license-metadata && \
            # Runs checks to ensure that there are no issues in our JS code.
            es-check es2019 ../src/librustdoc/html/static/js/*.js && \
            eslint -c ../src/librustdoc/html/static/.eslintrc.js ../src/librustdoc/html/static/js/*.js && \

From db71194416700d2c1365ae1ee7f882aa3a055a67 Mon Sep 17 00:00:00 2001
From: Jonathan Pallant <jonathan.pallant@ferrous-systems.com>
Date: Mon, 25 Nov 2024 14:18:08 +0000
Subject: [PATCH 4/4] generate-copyright: Use license-metadata.json from git.

---
 src/bootstrap/src/core/build_steps/run.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/bootstrap/src/core/build_steps/run.rs b/src/bootstrap/src/core/build_steps/run.rs
index 4052de9a240c..c76504761beb 100644
--- a/src/bootstrap/src/core/build_steps/run.rs
+++ b/src/bootstrap/src/core/build_steps/run.rs
@@ -208,8 +208,7 @@ impl Step for GenerateCopyright {
     }
 
     fn run(self, builder: &Builder<'_>) -> Self::Output {
-        let license_metadata = builder.ensure(CollectLicenseMetadata);
-
+        let license_metadata = builder.src.join("license-metadata.json");
         let dest = builder.out.join("COPYRIGHT.html");
         let dest_libstd = builder.out.join("COPYRIGHT-library.html");