Skip to content

Commit 6a752a8

Browse files
committed
Auto merge of #136410 - saethlin:clean-up-cgu-internal-copy, r=<try>
Remove InstanceKind::generates_cgu_internal_copy This PR should not contain any behavior changes. Before this PR, the logic for selecting instantiation mode is spread across all of * `instantiation_mode` * `cross_crate_inlinable` * `generates_cgu_internal_copy` * `requires_inline` The last two of those functions are not well-designed. The function that actually decides if we generate a CGU-internal copy is `instantiation_mode`, _not_ `generates_cgu_internal_copy`. The function `requires_inline` documents that it is about the LLVM `inline` attribute and that it is a hint. The LLVM attribute is called `inlinehint`, this function is also used by other codegen backends, and since it is part of instantiation mode selection it is *not* a hint. The goal of this PR is to start cleaning up the logic into a sequence of checks that have a more logical flow and are easier to customize in the future (to do things like improve incrementality or improve optimizations without causing obscure linker errors because you forgot to update another part of the compiler).
2 parents 73bf794 + 2c32926 commit 6a752a8

File tree

3 files changed

+62
-64
lines changed

3 files changed

+62
-64
lines changed

compiler/rustc_codegen_ssa/src/back/symbol_export.rs

+4-9
Original file line numberDiff line numberDiff line change
@@ -93,16 +93,11 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, _: LocalCrate) -> DefIdMap<S
9393
return None;
9494
}
9595

96-
// Functions marked with #[inline] are codegened with "internal"
97-
// linkage and are not exported unless marked with an extern
98-
// indicator
99-
if !Instance::mono(tcx, def_id.to_def_id()).def.generates_cgu_internal_copy(tcx)
100-
|| tcx.codegen_fn_attrs(def_id.to_def_id()).contains_extern_indicator()
101-
{
102-
Some(def_id)
103-
} else {
104-
None
96+
if Instance::mono(tcx, def_id.into()).def.requires_inline(tcx) {
97+
return None;
10598
}
99+
100+
if tcx.cross_crate_inlinable(def_id) { None } else { Some(def_id) }
106101
})
107102
.map(|def_id| {
108103
// We won't link right if this symbol is stripped during LTO.

compiler/rustc_middle/src/mir/mono.rs

+58-11
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use tracing::debug;
2020

2121
use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
2222
use crate::middle::codegen_fn_attrs::CodegenFnAttrFlags;
23-
use crate::ty::{GenericArgs, Instance, InstanceKind, SymbolName, TyCtxt};
23+
use crate::ty::{GenericArgs, Instance, InstanceKind, SymbolName, Ty, TyCtxt};
2424

2525
/// Describes how a monomorphization will be instantiated in object files.
2626
#[derive(PartialEq)]
@@ -54,6 +54,33 @@ pub enum MonoItem<'tcx> {
5454
GlobalAsm(ItemId),
5555
}
5656

57+
fn opt_incr_drop_glue_mode<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> InstantiationMode {
58+
// Non-ADTs can't have a Drop impl. This case is mostly hit by closures whose captures require
59+
// dropping.
60+
let Some(adt_def) = ty.ty_adt_def() else {
61+
return InstantiationMode::LocalCopy;
62+
};
63+
64+
// Types that don't have a direct Drop impl, but have fields that require dropping.
65+
let Some(dtor) = adt_def.destructor(tcx) else {
66+
if adt_def.is_enum() {
67+
return InstantiationMode::LocalCopy;
68+
} else {
69+
return InstantiationMode::GloballyShared { may_conflict: true };
70+
}
71+
};
72+
73+
// We've gotten to a drop_in_place for a type that directly implements Drop.
74+
// The drop glue is a wrapper for the Drop::drop impl, and we are an optimized build, so in an
75+
// effort to coordinate with the mode that the actual impl will get, we make the glue also
76+
// LocalCopy.
77+
if tcx.cross_crate_inlinable(dtor.did) {
78+
InstantiationMode::LocalCopy
79+
} else {
80+
InstantiationMode::GloballyShared { may_conflict: true }
81+
}
82+
}
83+
5784
impl<'tcx> MonoItem<'tcx> {
5885
/// Returns `true` if the mono item is user-defined (i.e. not compiler-generated, like shims).
5986
pub fn is_user_defined(&self) -> bool {
@@ -123,16 +150,36 @@ impl<'tcx> MonoItem<'tcx> {
123150
return InstantiationMode::GloballyShared { may_conflict: false };
124151
}
125152

126-
// FIXME: The logic for which functions are permitted to get LocalCopy is actually spread
127-
// across 4 functions:
128-
// * cross_crate_inlinable(def_id)
129-
// * InstanceKind::requires_inline
130-
// * InstanceKind::generate_cgu_internal_copy
131-
// * MonoItem::instantiation_mode
132-
// Since reachable_non_generics calls InstanceKind::generates_cgu_internal_copy to decide
133-
// which symbols this crate exports, we are obligated to only generate LocalCopy when
134-
// generates_cgu_internal_copy returns true.
135-
if !instance.def.generates_cgu_internal_copy(tcx) {
153+
// This is technically a heuristic even though it's in the "not a heuristic" part of
154+
// instantiation mode selection.
155+
// It is surely possible to untangle this; the root problem is that the way we instantiate
156+
// InstanceKind other than Item is very complicated.
157+
//
158+
// The fallback case is to give everything else GloballyShared at OptLevel::No and
159+
// LocalCopy at all other opt levels. This is a good default, except for one specific build
160+
// configuration: Optimized incremental builds.
161+
// In the current compiler architecture there is a fundamental tension between
162+
// optimizations (which want big CGUs with as many things LocalCopy as possible) and
163+
// incrementality (which wants small CGUs with as many things GloballyShared as possible).
164+
// The heuristics implemented here do better than a completely naive approach in the
165+
// compiler benchmark suite, but there is no reason to believe they are optimal.
166+
if let InstanceKind::DropGlue(_, Some(ty)) = instance.def {
167+
if tcx.sess.opts.optimize == OptLevel::No {
168+
return InstantiationMode::GloballyShared { may_conflict: false };
169+
}
170+
if tcx.sess.opts.incremental.is_none() {
171+
return InstantiationMode::LocalCopy;
172+
}
173+
return opt_incr_drop_glue_mode(tcx, ty);
174+
}
175+
176+
// We need to ensure that we do not decide the InstantiationMode of an exported symbol is
177+
// LocalCopy. Since exported symbols are computed based on the output of
178+
// cross_crate_inlinable, we are beholden to our previous decisions.
179+
//
180+
// Note that just like above, this check for requires_inline is technically a heuristic
181+
// even though it's in the "not a heuristic" part of instantiation mode selection.
182+
if !tcx.cross_crate_inlinable(instance.def_id()) && !instance.def.requires_inline(tcx) {
136183
return InstantiationMode::GloballyShared { may_conflict: false };
137184
}
138185

compiler/rustc_middle/src/ty/instance.rs

-44
Original file line numberDiff line numberDiff line change
@@ -301,50 +301,6 @@ impl<'tcx> InstanceKind<'tcx> {
301301
)
302302
}
303303

304-
/// Returns `true` if the machine code for this instance is instantiated in
305-
/// each codegen unit that references it.
306-
/// Note that this is only a hint! The compiler can globally decide to *not*
307-
/// do this in order to speed up compilation. CGU-internal copies are
308-
/// only exist to enable inlining. If inlining is not performed (e.g. at
309-
/// `-Copt-level=0`) then the time for generating them is wasted and it's
310-
/// better to create a single copy with external linkage.
311-
pub fn generates_cgu_internal_copy(&self, tcx: TyCtxt<'tcx>) -> bool {
312-
if self.requires_inline(tcx) {
313-
return true;
314-
}
315-
if let ty::InstanceKind::DropGlue(.., Some(ty))
316-
| ty::InstanceKind::AsyncDropGlueCtorShim(.., Some(ty)) = *self
317-
{
318-
// Drop glue generally wants to be instantiated at every codegen
319-
// unit, but without an #[inline] hint. We should make this
320-
// available to normal end-users.
321-
if tcx.sess.opts.incremental.is_none() {
322-
return true;
323-
}
324-
// When compiling with incremental, we can generate a *lot* of
325-
// codegen units. Including drop glue into all of them has a
326-
// considerable compile time cost.
327-
//
328-
// We include enums without destructors to allow, say, optimizing
329-
// drops of `Option::None` before LTO. We also respect the intent of
330-
// `#[inline]` on `Drop::drop` implementations.
331-
return ty.ty_adt_def().is_none_or(|adt_def| {
332-
match *self {
333-
ty::InstanceKind::DropGlue(..) => adt_def.destructor(tcx).map(|dtor| dtor.did),
334-
ty::InstanceKind::AsyncDropGlueCtorShim(..) => {
335-
adt_def.async_destructor(tcx).map(|dtor| dtor.ctor)
336-
}
337-
_ => unreachable!(),
338-
}
339-
.map_or_else(|| adt_def.is_enum(), |did| tcx.cross_crate_inlinable(did))
340-
});
341-
}
342-
if let ty::InstanceKind::ThreadLocalShim(..) = *self {
343-
return false;
344-
}
345-
tcx.cross_crate_inlinable(self.def_id())
346-
}
347-
348304
pub fn requires_caller_location(&self, tcx: TyCtxt<'_>) -> bool {
349305
match *self {
350306
InstanceKind::Item(def_id) | InstanceKind::Virtual(def_id, _) => {

0 commit comments

Comments
 (0)