From 1a4330d2a23f8b9912cb4ca54d259333b0133b76 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 27 Aug 2019 12:25:35 -0700 Subject: [PATCH] rustc: Handle modules in "fat" LTO more robustly When performing a "fat" LTO the compiler has a whole mess of codegen units that it links together. To do this it needs to select one module as a "base" module and then link everything else into this module. Previously LTO passes assume that there's at least one module in-memory to link into, but nowadays that's not always true! With incremental compilation modules may actually largely be cached and it may be possible that there's no in-memory modules to work with. This commit updates the logic of the LTO backend to handle modules a bit more uniformly during a fat LTO. This commit immediately splits them into two lists, one serialized and one in-memory. The in-memory list is then searched for the largest module and failing that we simply deserialize the first serialized module and link into that. This refactoring avoids juggling three lists, two of which are serialized modules and one of which is half serialized and half in-memory. Closes #63349 --- src/librustc_codegen_llvm/back/lto.rs | 86 +++++++++---------- src/librustc_codegen_llvm/lib.rs | 7 +- src/test/run-make-fulldeps/lto-empty/Makefile | 12 +++ src/test/run-make-fulldeps/lto-empty/lib.rs | 1 + src/test/ui/lto-duplicate-symbols.stderr | 2 +- 5 files changed, 59 insertions(+), 49 deletions(-) create mode 100644 src/test/run-make-fulldeps/lto-empty/Makefile create mode 100644 src/test/run-make-fulldeps/lto-empty/lib.rs diff --git a/src/librustc_codegen_llvm/back/lto.rs b/src/librustc_codegen_llvm/back/lto.rs index 5ed08943fe6fd..a43fbb68dbaed 100644 --- a/src/librustc_codegen_llvm/back/lto.rs +++ b/src/librustc_codegen_llvm/back/lto.rs @@ -183,7 +183,7 @@ pub(crate) fn prepare_thin( fn fat_lto(cgcx: &CodegenContext, diag_handler: &Handler, - mut modules: Vec>, + modules: Vec>, cached_modules: Vec<(SerializedModule, WorkProduct)>, mut serialized_modules: Vec<(SerializedModule, CString)>, symbol_white_list: &[*const libc::c_char]) @@ -191,6 +191,32 @@ fn fat_lto(cgcx: &CodegenContext, { info!("going for a fat lto"); + // Sort out all our lists of incoming modules into two lists. + // + // * `serialized_modules` (also and argument to this function) contains all + // modules that are serialized in-memory. + // * `in_memory` contains modules which are already parsed and in-memory, + // such as from multi-CGU builds. + // + // All of `cached_modules` (cached from previous incremental builds) can + // immediately go onto the `serialized_modules` modules list and then we can + // split the `modules` array into these two lists. + let mut in_memory = Vec::new(); + serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| { + info!("pushing cached module {:?}", wp.cgu_name); + (buffer, CString::new(wp.cgu_name).unwrap()) + })); + for module in modules { + match module { + FatLTOInput::InMemory(m) => in_memory.push(m), + FatLTOInput::Serialized { name, buffer } => { + info!("pushing serialized module {:?}", name); + let buffer = SerializedModule::Local(buffer); + serialized_modules.push((buffer, CString::new(name).unwrap())); + } + } + } + // Find the "costliest" module and merge everything into that codegen unit. // All the other modules will be serialized and reparsed into the new // context, so this hopefully avoids serializing and parsing the largest @@ -200,14 +226,8 @@ fn fat_lto(cgcx: &CodegenContext, // file copy operations in the backend work correctly. The only other kind // of module here should be an allocator one, and if your crate is smaller // than the allocator module then the size doesn't really matter anyway. - let costliest_module = modules.iter() + let costliest_module = in_memory.iter() .enumerate() - .filter_map(|(i, module)| { - match module { - FatLTOInput::InMemory(m) => Some((i, m)), - FatLTOInput::Serialized { .. } => None, - } - }) .filter(|&(_, module)| module.kind == ModuleKind::Regular) .map(|(i, module)| { let cost = unsafe { @@ -223,26 +243,14 @@ fn fat_lto(cgcx: &CodegenContext, // re-executing the LTO passes. If that's the case deserialize the first // module and create a linker with it. let module: ModuleCodegen = match costliest_module { - Some((_cost, i)) => { - match modules.remove(i) { - FatLTOInput::InMemory(m) => m, - FatLTOInput::Serialized { .. } => unreachable!(), - } - } + Some((_cost, i)) => in_memory.remove(i), None => { - let pos = modules.iter().position(|m| { - match m { - FatLTOInput::InMemory(_) => false, - FatLTOInput::Serialized { .. } => true, - } - }).expect("must have at least one serialized module"); - let (name, buffer) = match modules.remove(pos) { - FatLTOInput::Serialized { name, buffer } => (name, buffer), - FatLTOInput::InMemory(_) => unreachable!(), - }; + assert!(serialized_modules.len() > 0, "must have at least one serialized module"); + let (buffer, name) = serialized_modules.remove(0); + info!("no in-memory regular modules to choose from, parsing {:?}", name); ModuleCodegen { - module_llvm: ModuleLlvm::parse(cgcx, &name, &buffer, diag_handler)?, - name, + module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), diag_handler)?, + name: name.into_string().unwrap(), kind: ModuleKind::Regular, } } @@ -265,25 +273,13 @@ fn fat_lto(cgcx: &CodegenContext, // and we want to move everything to the same LLVM context. Currently the // way we know of to do that is to serialize them to a string and them parse // them later. Not great but hey, that's why it's "fat" LTO, right? - let mut new_modules = modules.into_iter().map(|module| { - match module { - FatLTOInput::InMemory(module) => { - let buffer = ModuleBuffer::new(module.module_llvm.llmod()); - let llmod_id = CString::new(&module.name[..]).unwrap(); - (SerializedModule::Local(buffer), llmod_id) - } - FatLTOInput::Serialized { name, buffer } => { - let llmod_id = CString::new(name).unwrap(); - (SerializedModule::Local(buffer), llmod_id) - } - } - }).collect::>(); + for module in in_memory { + let buffer = ModuleBuffer::new(module.module_llvm.llmod()); + let llmod_id = CString::new(&module.name[..]).unwrap(); + serialized_modules.push((SerializedModule::Local(buffer), llmod_id)); + } // Sort the modules to ensure we produce deterministic results. - new_modules.sort_by(|module1, module2| module1.1.partial_cmp(&module2.1).unwrap()); - serialized_modules.extend(new_modules); - serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| { - (buffer, CString::new(wp.cgu_name).unwrap()) - })); + serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1)); // For all serialized bitcode files we parse them and link them in as we did // above, this is all mostly handled in C++. Like above, though, we don't @@ -850,7 +846,7 @@ fn module_name_to_str(c_str: &CStr) -> &str { bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e)) } -fn parse_module<'a>( +pub fn parse_module<'a>( cx: &'a llvm::Context, name: &CStr, data: &[u8], diff --git a/src/librustc_codegen_llvm/lib.rs b/src/librustc_codegen_llvm/lib.rs index 653dd8868f479..2fd78885bd01e 100644 --- a/src/librustc_codegen_llvm/lib.rs +++ b/src/librustc_codegen_llvm/lib.rs @@ -54,6 +54,7 @@ use syntax_pos::symbol::InternedString; pub use llvm_util::target_features; use std::any::Any; use std::sync::{mpsc, Arc}; +use std::ffi::CStr; use rustc::dep_graph::DepGraph; use rustc::middle::cstore::{EncodedMetadata, MetadataLoader}; @@ -386,13 +387,13 @@ impl ModuleLlvm { fn parse( cgcx: &CodegenContext, - name: &str, - buffer: &back::lto::ModuleBuffer, + name: &CStr, + buffer: &[u8], handler: &Handler, ) -> Result { unsafe { let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names); - let llmod_raw = buffer.parse(name, llcx, handler)?; + let llmod_raw = back::lto::parse_module(llcx, name, buffer, handler)?; let tm = match (cgcx.tm_factory.0)() { Ok(m) => m, Err(e) => { diff --git a/src/test/run-make-fulldeps/lto-empty/Makefile b/src/test/run-make-fulldeps/lto-empty/Makefile new file mode 100644 index 0000000000000..345d10bc4b9ea --- /dev/null +++ b/src/test/run-make-fulldeps/lto-empty/Makefile @@ -0,0 +1,12 @@ +-include ../tools.mk + +all: cdylib-fat cdylib-thin + +cdylib-fat: + $(RUSTC) lib.rs -C lto=fat -C opt-level=3 -C incremental=$(TMPDIR)/inc-fat + $(RUSTC) lib.rs -C lto=fat -C opt-level=3 -C incremental=$(TMPDIR)/inc-fat + +cdylib-thin: + $(RUSTC) lib.rs -C lto=thin -C opt-level=3 -C incremental=$(TMPDIR)/inc-thin + $(RUSTC) lib.rs -C lto=thin -C opt-level=3 -C incremental=$(TMPDIR)/inc-thin + diff --git a/src/test/run-make-fulldeps/lto-empty/lib.rs b/src/test/run-make-fulldeps/lto-empty/lib.rs new file mode 100644 index 0000000000000..e3663c79078f4 --- /dev/null +++ b/src/test/run-make-fulldeps/lto-empty/lib.rs @@ -0,0 +1 @@ +#![crate_type = "cdylib"] diff --git a/src/test/ui/lto-duplicate-symbols.stderr b/src/test/ui/lto-duplicate-symbols.stderr index 5760cb9a8fdb6..b7a930b61cc96 100644 --- a/src/test/ui/lto-duplicate-symbols.stderr +++ b/src/test/ui/lto-duplicate-symbols.stderr @@ -1,6 +1,6 @@ warning: Linking globals named 'foo': symbol multiply defined! -error: failed to load bc of "lto_duplicate_symbols1.3a1fbbbh-cgu.0": +error: failed to load bc of "lto_duplicate_symbols2.3a1fbbbh-cgu.0": error: aborting due to previous error