Skip to content

Commit 8c65a66

Browse files
committed
Introduce -Zsplit-metadata option
This will split the crate metadata out of library files. Instead only the svh is preserved to allow for loading the right rmeta file. This significicantly reduces library size. In addition it allows for cheaper checks if different library files are the same crate.
1 parent 6d5a457 commit 8c65a66

File tree

9 files changed

+119
-24
lines changed

9 files changed

+119
-24
lines changed

compiler/rustc_codegen_ssa/src/back/link.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ fn link_rlib<'a, B: ArchiveBuilder<'a>>(
359359
// metadata in rlib files is wrapped in a "dummy" object file for
360360
// the target platform so the rlib can be processed entirely by
361361
// normal linkers for the platform.
362-
let metadata = create_rmeta_file(sess, codegen_results.metadata.raw_data());
362+
let metadata = create_rmeta_file(sess, codegen_results.metadata.maybe_reference());
363363
ab.add_file(&emit_metadata(sess, &metadata, tmpdir));
364364
}
365365

compiler/rustc_codegen_ssa/src/back/metadata.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ pub fn create_rmeta_file(sess: &Session, metadata: &[u8]) -> Vec<u8> {
252252
// As a result, we choose a slightly shorter name! As to why
253253
// `.note.rustc` works on MinGW, see
254254
// https://github.com/llvm/llvm-project/blob/llvmorg-12.0.0/lld/COFF/Writer.cpp#L1190-L1197
255+
// TODO rename function
255256
pub fn create_compressed_metadata_file(
256257
sess: &Session,
257258
metadata: &EncodedMetadata,
@@ -260,7 +261,7 @@ pub fn create_compressed_metadata_file(
260261
let mut file = if let Some(file) = create_object_file(sess) {
261262
file
262263
} else {
263-
return metadata.raw_data().to_vec();
264+
return metadata.maybe_reference().to_vec();
264265
};
265266
let section = file.add_section(
266267
file.segment_name(StandardSegment::Data).to_vec(),
@@ -274,14 +275,14 @@ pub fn create_compressed_metadata_file(
274275
}
275276
_ => {}
276277
};
277-
let offset = file.append_section_data(section, metadata.raw_data(), 1);
278+
let offset = file.append_section_data(section, metadata.maybe_reference(), 1);
278279

279280
// For MachO and probably PE this is necessary to prevent the linker from throwing away the
280281
// .rustc section. For ELF this isn't necessary, but it also doesn't harm.
281282
file.add_symbol(Symbol {
282283
name: symbol_name.as_bytes().to_vec(),
283284
value: offset,
284-
size: metadata.raw_data().len() as u64,
285+
size: metadata.maybe_reference().len() as u64,
285286
kind: SymbolKind::Data,
286287
scope: SymbolScope::Dynamic,
287288
weak: false,

compiler/rustc_interface/src/passes.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1036,7 +1036,7 @@ fn encode_and_write_metadata(
10361036
enum MetadataKind {
10371037
None,
10381038
Uncompressed,
1039-
Compressed,
1039+
Compressed, // TODO remove this variant
10401040
}
10411041

10421042
let metadata_kind = tcx
@@ -1074,7 +1074,7 @@ fn encode_and_write_metadata(
10741074
.tempdir_in(out_filename.parent().unwrap())
10751075
.unwrap_or_else(|err| tcx.sess.fatal(&format!("couldn't create a temp dir: {}", err)));
10761076
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
1077-
let metadata_filename = emit_metadata(tcx.sess, metadata.raw_data(), &metadata_tmpdir);
1077+
let metadata_filename = emit_metadata(tcx.sess, metadata.full(), &metadata_tmpdir);
10781078
if let Err(e) = util::non_durable_rename(&metadata_filename, &out_filename) {
10791079
tcx.sess.fatal(&format!("failed to write {}: {}", out_filename.display(), e));
10801080
}

compiler/rustc_interface/src/tests.rs

+1
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,7 @@ fn test_debugging_options_tracking_hash() {
689689
untracked!(self_profile_events, Some(vec![String::new()]));
690690
untracked!(span_debug, true);
691691
untracked!(span_free_formats, true);
692+
untracked!(split_metadata, true);
692693
untracked!(temps_dir, Some(String::from("abc")));
693694
untracked!(terminal_width, Some(80));
694695
untracked!(threads, 99);

compiler/rustc_metadata/src/locator.rs

+18
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,11 @@ impl<'a> CrateLocator<'a> {
539539
match get_metadata_section(self.target, flavor, &lib, self.metadata_loader) {
540540
Ok(blob) => {
541541
if let Some(h) = self.crate_matches(&blob, &lib) {
542+
if blob.is_reference_only() {
543+
if slot.is_none() {
544+
todo!("return error");
545+
}
546+
}
542547
(h, blob)
543548
} else {
544549
info!("metadata mismatch");
@@ -603,6 +608,19 @@ impl<'a> CrateLocator<'a> {
603608
}
604609

605610
fn crate_matches(&mut self, metadata: &MetadataBlob, libpath: &Path) -> Option<Svh> {
611+
if metadata.is_reference_only() {
612+
let hash = metadata.get_hash();
613+
if let Some(expected_hash) = self.hash {
614+
if hash != expected_hash {
615+
info!("Rejecting via hash: expected {} got {}", expected_hash, hash);
616+
self.crate_rejections
617+
.via_hash
618+
.push(CrateMismatch { path: libpath.to_path_buf(), got: hash.to_string() });
619+
return None;
620+
}
621+
}
622+
}
623+
606624
let root = metadata.get_root();
607625
if root.is_proc_macro_crate() != self.is_proc_macro {
608626
info!(

compiler/rustc_metadata/src/rmeta/decoder.rs

+31-5
Original file line numberDiff line numberDiff line change
@@ -628,30 +628,56 @@ impl<'tcx> MetadataBlob {
628628

629629
crate fn check_compatibility(&self) -> Result<(), String> {
630630
if !self.blob().starts_with(METADATA_HEADER) {
631-
if self.blob().starts_with(b"rust") {
631+
if self.blob().starts_with(PREV_METADATA_HEADER) {
632+
let found_version = Lazy::<String>::from_position(
633+
NonZeroUsize::new(PREV_METADATA_HEADER.len() + 4).unwrap(),
634+
)
635+
.decode(self);
636+
return Err(found_version);
637+
} else if self.blob().starts_with(b"rust") {
632638
return Err("<unknown rustc version>".to_string());
633639
}
634640
return Err("<invalid metadata header>".to_string());
635641
}
636642

637-
let found_version =
638-
Lazy::<String>::from_position(NonZeroUsize::new(METADATA_HEADER.len() + 4).unwrap())
639-
.decode(self);
643+
let found_version = Lazy::<String>::from_position(
644+
NonZeroUsize::new(METADATA_HEADER.len() + 8 + 4 + 4).unwrap(),
645+
)
646+
.decode(self);
640647
if rustc_version() != found_version {
641648
return Err(found_version);
642649
}
643650

644651
Ok(())
645652
}
646653

654+
crate fn is_reference_only(&self) -> bool {
655+
let slice = &self.blob()[..];
656+
let offset = METADATA_HEADER.len() + 8;
657+
let pos = u32::from_le_bytes(slice[offset..offset + 4].try_into().unwrap());
658+
pos == 0
659+
}
660+
661+
crate fn get_hash(&self) -> Svh {
662+
let slice = &self.blob()[..];
663+
let offset = METADATA_HEADER.len() + 4;
664+
Svh::new(u64::from_le_bytes(slice[offset..offset + 8].try_into().unwrap()))
665+
}
666+
647667
crate fn get_root(&self) -> CrateRoot<'tcx> {
648668
let slice = &self.blob()[..];
649-
let offset = METADATA_HEADER.len();
669+
let offset = METADATA_HEADER.len() + 8;
650670
let pos = (u32::from_le_bytes(slice[offset..offset + 4].try_into().unwrap())) as usize;
671+
assert_ne!(pos, 0, "Tried to get crate root for reference-only metadata");
651672
Lazy::<CrateRoot<'tcx>>::from_position(NonZeroUsize::new(pos).unwrap()).decode(self)
652673
}
653674

654675
crate fn list_crate_metadata(&self, out: &mut dyn io::Write) -> io::Result<()> {
676+
if self.is_reference_only() {
677+
writeln!(out, "Split metadata crate hash {}", self.get_hash())?;
678+
return Ok(());
679+
}
680+
655681
let root = self.get_root();
656682
writeln!(out, "Crate info:")?;
657683
writeln!(out, "name {}{}", root.name, root.extra_filename)?;

compiler/rustc_metadata/src/rmeta/encoder.rs

+38-10
Original file line numberDiff line numberDiff line change
@@ -2136,18 +2136,24 @@ fn prefetch_mir(tcx: TyCtxt<'_>) {
21362136

21372137
#[derive(Encodable, Decodable)]
21382138
pub struct EncodedMetadata {
2139-
raw_data: Vec<u8>,
2139+
full: Vec<u8>,
2140+
reference: Option<Vec<u8>>,
21402141
}
21412142

21422143
impl EncodedMetadata {
21432144
#[inline]
21442145
pub fn new() -> EncodedMetadata {
2145-
EncodedMetadata { raw_data: Vec::new() }
2146+
EncodedMetadata { full: Vec::new(), reference: None }
21462147
}
21472148

21482149
#[inline]
2149-
pub fn raw_data(&self) -> &[u8] {
2150-
&self.raw_data
2150+
pub fn full(&self) -> &[u8] {
2151+
&self.full
2152+
}
2153+
2154+
#[inline]
2155+
pub fn maybe_reference(&self) -> &[u8] {
2156+
self.reference.as_ref().unwrap_or(&self.full)
21512157
}
21522158
}
21532159

@@ -2173,20 +2179,26 @@ pub fn encode_metadata(tcx: TyCtxt<'_>) -> EncodedMetadata {
21732179
.0
21742180
}
21752181

2176-
fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
2182+
fn encode_metadata_header<'a, 'tcx>(
2183+
tcx: TyCtxt<'tcx>,
2184+
hygiene_ctxt: &'a HygieneEncodeContext,
2185+
) -> EncodeContext<'a, 'tcx> {
21772186
let mut encoder = opaque::Encoder::new(vec![]);
21782187
encoder.emit_raw_bytes(METADATA_HEADER).unwrap();
21792188

2189+
encoder.emit_raw_bytes(&tcx.crate_hash(LOCAL_CRATE).as_u64().to_le_bytes()).unwrap();
2190+
21802191
// Will be filled with the root position after encoding everything.
21812192
encoder.emit_raw_bytes(&[0, 0, 0, 0]).unwrap();
21822193

2194+
// Reserved for future extension
2195+
encoder.emit_raw_bytes(&[0, 0, 0, 0]).unwrap();
2196+
21832197
let source_map_files = tcx.sess.source_map().files();
21842198
let source_file_cache = (source_map_files[0].clone(), 0);
21852199
let required_source_files = Some(GrowableBitSet::with_capacity(source_map_files.len()));
21862200
drop(source_map_files);
21872201

2188-
let hygiene_ctxt = HygieneEncodeContext::default();
2189-
21902202
let mut ecx = EncodeContext {
21912203
opaque: encoder,
21922204
tcx,
@@ -2199,27 +2211,43 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
21992211
interpret_allocs: Default::default(),
22002212
required_source_files,
22012213
is_proc_macro: tcx.sess.crate_types().contains(&CrateType::ProcMacro),
2202-
hygiene_ctxt: &hygiene_ctxt,
2214+
hygiene_ctxt,
22032215
};
22042216

22052217
// Encode the rustc version string in a predictable location.
22062218
rustc_version().encode(&mut ecx).unwrap();
22072219

2220+
ecx
2221+
}
2222+
2223+
fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
2224+
let hygiene_ctxt = HygieneEncodeContext::default();
2225+
let mut ecx = encode_metadata_header(tcx, &hygiene_ctxt);
2226+
22082227
// Encode all the entries and extra information in the crate,
22092228
// culminating in the `CrateRoot` which points to all of it.
22102229
let root = ecx.encode_crate_root();
22112230

22122231
let mut result = ecx.opaque.into_inner();
22132232

22142233
// Encode the root position.
2215-
let header = METADATA_HEADER.len();
2234+
let header = METADATA_HEADER.len() + 8;
22162235
let pos = root.position.get();
22172236
result[header..header + 4].copy_from_slice(&pos.to_le_bytes());
22182237

22192238
// Record metadata size for self-profiling
22202239
tcx.prof.artifact_size("crate_metadata", "crate_metadata", result.len() as u64);
22212240

2222-
EncodedMetadata { raw_data: result }
2241+
let reference_result = if tcx.sess.opts.debugging_opts.split_metadata {
2242+
let hygiene_ctxt = HygieneEncodeContext::default();
2243+
let ecx = encode_metadata_header(tcx, &hygiene_ctxt);
2244+
// Don't fill in the root position for reference metadata
2245+
Some(ecx.opaque.into_inner())
2246+
} else {
2247+
None
2248+
};
2249+
2250+
EncodedMetadata { full: result, reference: reference_result }
22232251
}
22242252

22252253
pub fn provide(providers: &mut Providers) {

compiler/rustc_metadata/src/rmeta/mod.rs

+22-3
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,30 @@ const METADATA_VERSION: u8 = 6;
5454

5555
/// Metadata header which includes `METADATA_VERSION`.
5656
///
57-
/// This header is followed by the position of the `CrateRoot`,
58-
/// which is encoded as a 32-bit big-endian unsigned integer,
59-
/// and further followed by the rustc version string.
57+
/// # Format
58+
///
59+
/// |field |size |
60+
/// |--------|--------|
61+
/// |magic |8 |
62+
/// |svh |8 |
63+
/// |root |4 |
64+
/// |reserved|4 |
65+
/// |version |variable|
6066
pub const METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, METADATA_VERSION];
6167

68+
/// The previous metadata header.
69+
///
70+
/// This is only used for reporting the rustc version of the incompatible crate.
71+
///
72+
/// # Format
73+
///
74+
/// |field |size |
75+
/// |-------|--------|
76+
/// |magic |8 |
77+
/// |root |4 |
78+
/// |version|variable|
79+
pub const PREV_METADATA_HEADER: &[u8] = &[b'r', b'u', b's', b't', 0, 0, 0, 5];
80+
6281
/// Additional metadata for a `Lazy<T>` where `T` may not be `Sized`,
6382
/// e.g. for `Lazy<[T]>`, this is the length (count of `T` values).
6483
trait LazyMeta {

compiler/rustc_session/src/options.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1427,6 +1427,8 @@ options! {
14271427
split_dwarf_inlining: bool = (true, parse_bool, [UNTRACKED],
14281428
"provide minimal debug info in the object/executable to facilitate online \
14291429
symbolication/stack traces in the absence of .dwo/.dwp files when using Split DWARF"),
1430+
split_metadata: bool = (false, parse_bool, [UNTRACKED],
1431+
"split metadata out of libraries into .rmeta files"),
14301432
symbol_mangling_version: Option<SymbolManglingVersion> = (None,
14311433
parse_symbol_mangling_version, [TRACKED],
14321434
"which mangling version to use for symbol names ('legacy' (default) or 'v0')"),

0 commit comments

Comments
 (0)