-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[WIP] [Modules] Delay reading type source info of the preferred_name attribute. #122250
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-modules @llvm/pr-subscribers-clang Author: Viktoriia Bakalova (VitaNuo) ChangesFull diff: https://github.com/llvm/llvm-project/pull/122250.diff 7 Files Affected:
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index f739fe688c110d..3c79a4c86a5617 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -13,7 +13,10 @@
#ifndef LLVM_CLANG_SERIALIZATION_ASTREADER_H
#define LLVM_CLANG_SERIALIZATION_ASTREADER_H
+#include "clang/AST/DeclID.h"
+#include "clang/AST/NestedNameSpecifier.h"
#include "clang/AST/Type.h"
+#include "clang/Basic/AttributeCommonInfo.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticOptions.h"
#include "clang/Basic/IdentifierTable.h"
@@ -1156,6 +1159,19 @@ class ASTReader
SmallVector<std::pair<VarDecl *, serialization::TypeID>, 16>
PendingDeducedVarTypes;
+ struct PendingPreferredNameAttribute {
+ Decl* D;
+ AttributeCommonInfo Info;
+ serialization::TypeID TypeID;
+ bool isInherited;
+ bool isImplicit;
+ bool isPackExpansion;
+ SourceLocation ElaboratedTypedefSourceLocation;
+ NestedNameSpecifierLoc NNS;
+ SourceLocation TypedefSourceLocation;
+ };
+ std::deque<PendingPreferredNameAttribute> PendingPreferredNameAttributes;
+
/// The list of redeclaration chains that still need to be
/// reconstructed, and the local offset to the corresponding list
/// of redeclarations.
@@ -1419,7 +1435,7 @@ class ASTReader
const serialization::reader::DeclContextLookupTable *
getLoadedLookupTables(DeclContext *Primary) const;
-private:
+ private:
struct ImportedModule {
ModuleFile *Mod;
ModuleFile *ImportedBy;
diff --git a/clang/include/clang/Serialization/ASTRecordReader.h b/clang/include/clang/Serialization/ASTRecordReader.h
index 2561418b78ca7f..f476d72fcf9e71 100644
--- a/clang/include/clang/Serialization/ASTRecordReader.h
+++ b/clang/include/clang/Serialization/ASTRecordReader.h
@@ -337,6 +337,13 @@ class ASTRecordReader
/// Reads attributes from the current stream position, advancing Idx.
void readAttributes(AttrVec &Attrs);
+ /// Reads one attribute from the current stream position, advancing Idx.
+ Attr *readAttr(Decl *D);
+
+ /// Reads attributes from the current stream position, advancing Idx.
+ void readAttributes(AttrVec &Attrs, Decl *D);
+
+
/// Read an BTFTypeTagAttr object.
BTFTypeTagAttr *readBTFTypeTagAttr() {
return cast<BTFTypeTagAttr>(readAttr());
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index ec85fad3389a1c..4ee04552f13a96 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -23,6 +23,7 @@
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclFriend.h"
#include "clang/AST/DeclGroup.h"
+#include "clang/AST/DeclID.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/DeclarationName.h"
@@ -41,6 +42,7 @@
#include "clang/AST/TypeLocVisitor.h"
#include "clang/AST/UnresolvedSet.h"
#include "clang/Basic/ASTSourceDescriptor.h"
+#include "clang/Basic/AttributeCommonInfo.h"
#include "clang/Basic/CommentOptions.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticError.h"
@@ -9860,6 +9862,29 @@ void ASTReader::finishPendingActions() {
}
PendingDeducedVarTypes.clear();
+ ASTContext &Context = getContext();
+ for (unsigned I = 0; I != PendingPreferredNameAttributes.size(); ++I) {
+ auto *D = PendingPreferredNameAttributes[I].D;
+ QualType InfoTy = GetType(PendingPreferredNameAttributes[I].TypeID);
+ TypeSourceInfo *TInfo = nullptr;
+ if (!InfoTy.isNull()) {
+ TInfo = getContext().CreateTypeSourceInfo(InfoTy);
+ // TODO - this piece doesn't work yet
+ ElaboratedTypeLoc &Loc = (ElaboratedTypeLoc)TInfo->getTypeLoc();
+ Loc.setElaboratedKeywordLoc(PendingPreferredNameAttributes[I].ElaboratedTypedefSourceLocation);
+ Loc.setQualifierLoc(PendingPreferredNameAttributes[I].NNS);
+ Loc.setNameLoc(PendingPreferredNameAttributes[I].TypedefSourceLocation);
+ }
+
+ AttrVec &Attrs = getContext().getDeclAttrs(D);
+ PreferredNameAttr *New = new (Context) PreferredNameAttr(Context, PendingPreferredNameAttributes[I].Info, TInfo);
+ cast<InheritableAttr>(New)->setInherited(PendingPreferredNameAttributes[I].isInherited);
+ New->setImplicit(PendingPreferredNameAttributes[I].isImplicit);
+ New->setPackExpansion(PendingPreferredNameAttributes[I].isPackExpansion);
+ Attrs.push_back(New);
+ }
+ PendingPreferredNameAttributes.clear();
+
// For each decl chain that we wanted to complete while deserializing, mark
// it as "still needs to be completed".
for (unsigned I = 0; I != PendingIncompleteDeclChains.size(); ++I) {
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 6ece3ba7af9f4b..5e67f4fa8bf107 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -22,6 +22,7 @@
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclFriend.h"
+#include "clang/AST/DeclID.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/DeclTemplate.h"
@@ -638,7 +639,7 @@ void ASTDeclReader::VisitDecl(Decl *D) {
if (HasAttrs) {
AttrVec Attrs;
- Record.readAttributes(Attrs);
+ Record.readAttributes(Attrs, D);
// Avoid calling setAttrs() directly because it uses Decl::getASTContext()
// internally which is unsafe during derialization.
D->setAttrsImpl(Attrs, Reader.getContext());
@@ -725,12 +726,12 @@ void ASTDeclReader::VisitTypeDecl(TypeDecl *TD) {
RedeclarableResult ASTDeclReader::VisitTypedefNameDecl(TypedefNameDecl *TD) {
RedeclarableResult Redecl = VisitRedeclarable(TD);
VisitTypeDecl(TD);
- TypeSourceInfo *TInfo = readTypeSourceInfo();
- if (Record.readInt()) { // isModed
- QualType modedT = Record.readType();
- TD->setModedTypeSourceInfo(TInfo, modedT);
- } else
- TD->setTypeSourceInfo(TInfo);
+ TypeSourceInfo *TInfo = readTypeSourceInfo();
+ if (Record.readInt()) { // isModed
+ QualType modedT = Record.readType();
+ TD->setModedTypeSourceInfo(TInfo, modedT);
+ } else
+ TD->setTypeSourceInfo(TInfo);
// Read and discard the declaration for which this is a typedef name for
// linkage, if it exists. We cannot rely on our type to pull in this decl,
// because it might have been merged with a type from another module and
@@ -3167,6 +3168,54 @@ Attr *ASTRecordReader::readAttr() {
return New;
}
+Attr *ASTRecordReader::readAttr(Decl *D) {
+ AttrReader Record(*this);
+ auto V = Record.readInt();
+ if (!V)
+ return nullptr;
+
+ Attr *New = nullptr;
+ // Kind is stored as a 1-based integer because 0 is used to indicate a null
+ // Attr pointer.
+ auto Kind = static_cast<attr::Kind>(V - 1);
+ ASTContext &Context = getContext();
+
+ IdentifierInfo *AttrName = Record.readIdentifier();
+ IdentifierInfo *ScopeName = Record.readIdentifier();
+ SourceRange AttrRange = Record.readSourceRange();
+ SourceLocation ScopeLoc = Record.readSourceLocation();
+ unsigned ParsedKind = Record.readInt();
+ unsigned Syntax = Record.readInt();
+ unsigned SpellingIndex = Record.readInt();
+ bool IsAlignas = (ParsedKind == AttributeCommonInfo::AT_Aligned &&
+ Syntax == AttributeCommonInfo::AS_Keyword &&
+ SpellingIndex == AlignedAttr::Keyword_alignas);
+ bool IsRegularKeywordAttribute = Record.readBool();
+
+ AttributeCommonInfo Info(AttrName, ScopeName, AttrRange, ScopeLoc,
+ AttributeCommonInfo::Kind(ParsedKind),
+ {AttributeCommonInfo::Syntax(Syntax), SpellingIndex,
+ IsAlignas, IsRegularKeywordAttribute});
+ if (Kind == attr::PreferredName) {
+ bool isInherited = Record.readInt();
+ bool isImplicit = Record.readInt();
+ bool isPackExpansion = Record.readInt();
+
+
+ serialization::TypeID TypeID = getGlobalTypeID(Record.readInt());
+ SourceLocation SL = Record.readSourceLocation();
+ NestedNameSpecifierLoc NNL = readNestedNameSpecifierLoc();
+ SourceLocation TSL = Record.readSourceLocation();
+ Reader->PendingPreferredNameAttributes.push_back({D, Info, TypeID, isInherited, isImplicit, isPackExpansion, SL, NNL, TSL});
+ return nullptr;
+ }
+
+#include "clang/Serialization/AttrPCHRead.inc"
+
+ assert(New && "Unable to decode attribute?");
+ return New;
+}
+
/// Reads attributes from the current stream position.
void ASTRecordReader::readAttributes(AttrVec &Attrs) {
for (unsigned I = 0, E = readInt(); I != E; ++I)
@@ -3174,6 +3223,12 @@ void ASTRecordReader::readAttributes(AttrVec &Attrs) {
Attrs.push_back(A);
}
+void ASTRecordReader::readAttributes(AttrVec &Attrs, Decl* D) {
+ for (unsigned I = 0, E = readInt(); I != E; ++I)
+ if (auto *A = readAttr(D))
+ Attrs.push_back(A);
+}
+
//===----------------------------------------------------------------------===//
// ASTReader Implementation
//===----------------------------------------------------------------------===//
@@ -3572,7 +3627,7 @@ void mergeInheritableAttributes(ASTReader &Reader, Decl *D, Decl *Previous) {
template<typename DeclT>
void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
Redeclarable<DeclT> *D,
- Decl *Previous, Decl *Canon) {
+ Decl *Previous, Decl *Canon) {
D->RedeclLink.setPrevious(cast<DeclT>(Previous));
D->First = cast<DeclT>(Previous)->First;
}
@@ -4199,6 +4254,7 @@ Decl *ASTReader::ReadDeclRecord(GlobalDeclID ID) {
ReadVisibleDeclContextStorage(*Loc.F, DeclsCursor, Offsets.second, ID))
return nullptr;
}
+ // This is the crashing line.
assert(Record.getIdx() == Record.size());
// Load any relevant update records.
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index a52d59c61c4ce6..a259a55cd397a6 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -4720,8 +4720,9 @@ void ASTRecordWriter::AddAttr(const Attr *A) {
// FIXME: Clang can't handle the serialization/deserialization of
// preferred_name properly now. See
// https://github.com/llvm/llvm-project/issues/56490 for example.
- if (!A || (isa<PreferredNameAttr>(A) &&
- Writer->isWritingStdCXXNamedModules()))
+ if (!A)
+ // if (!A || (isa<PreferredNameAttr>(A) &&
+ // Writer->isWritingStdCXXNamedModules()))
return Record.push_back(0);
Record.push_back(A->getKind() + 1); // FIXME: stable encoding, target attrs
diff --git a/clang/test/Modules/preferred_name.cppm b/clang/test/Modules/preferred_name.cppm
index 806781a81c5ca7..255e915f824c8d 100644
--- a/clang/test/Modules/preferred_name.cppm
+++ b/clang/test/Modules/preferred_name.cppm
@@ -16,8 +16,6 @@
//
// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm
// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only
-// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use1.cpp -verify -fsyntax-only
-// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use2.cpp -verify -fsyntax-only
//
//--- foo.h
template<class _CharT>
@@ -42,7 +40,6 @@ inline foo_templ<char> bar()
module;
#include "foo.h"
export module A;
-export using ::foo_templ;
//--- Use.cppm
// expected-no-diagnostics
@@ -50,13 +47,3 @@ module;
#include "foo.h"
export module Use;
import A;
-export using ::foo_templ;
-
-//--- Use1.cpp
-import A; // [email protected]:8 {{attribute declaration must precede definition}}
-#include "foo.h" // [email protected]:9 {{previous definition is here}}
-
-//--- Use2.cpp
-// expected-no-diagnostics
-#include "foo.h"
-import A;
diff --git a/clang/test/Modules/preferred_name_2.cppm b/clang/test/Modules/preferred_name_2.cppm
new file mode 100644
index 00000000000000..b1d1c5a49b39ce
--- /dev/null
+++ b/clang/test/Modules/preferred_name_2.cppm
@@ -0,0 +1,47 @@
+// Tests that the ODR check wouldn't produce false-positive result for preferred_name attribute.
+//
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm
+// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only
+// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use1.cpp -verify -fsyntax-only
+// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use2.cpp -verify -fsyntax-only
+
+// Test again with reduced BMI.
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o %t/A.pcm
+// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t -I%t %t/Use.cppm -verify -fsyntax-only
+//
+//--- foo.h
+template<class _CharT>
+class foo_templ;
+
+typedef foo_templ<char> foo;
+
+template<class _CharT>
+class foo_templ {
+public:
+ foo_templ() {}
+};
+
+inline foo_templ<char> bar()
+{
+ return foo_templ<char>();
+}
+
+//--- A.cppm
+module;
+#include "foo.h"
+export module A;
+
+//--- Use.cppm
+// expected-no-diagnostics
+module;
+#include "foo.h"
+export module Use;
+import A;
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for working on this!
After looking at the code, I think we should explore an alternative approach that would entail less duplication of serialization logic to ensure the code is correct for arbitrary TypeLocs and is more robust to changes.
See the relevant comment in ASTReaderDecl.cpp for more details.
I left a few more minor comments, but we could address them later (and they might get stale if we pick a different approach)
NestedNameSpecifierLoc NestedNameSL; | ||
SourceLocation TypedefSL; | ||
}; | ||
std::deque<PendingPreferredNameAttribute> PendingPreferredNameAttributes; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not SmallVector
like the above?
For a slightly higher efficiency and for consistency with the code above?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For a slightly higher efficiency and for consistency with the code above?
Exactly. But I'm not sure if this matters much.
@@ -337,6 +337,14 @@ class ASTRecordReader | |||
/// Reads attributes from the current stream position, advancing Idx. | |||
void readAttributes(AttrVec &Attrs); | |||
|
|||
/// Reads one attribute from the current stream position, advancing Idx. | |||
/// Parent Decl is provided to delay attribute deserialization. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The lazy deserialization is a subtle difference, could we name the functions appropriately and "scary", e.g. readAttrOrDelay
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in #122726.
Context, PendingPreferredNameAttributes[I].Info, TInfo); | ||
cast<InheritableAttr>(New)->setInherited( | ||
PendingPreferredNameAttributes[I].isInherited); | ||
New->setImplicit(PendingPreferredNameAttributes[I].isImplicit); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Duplicating the code here may lead to errors later.
Could we ensure that's the only code path that's being used to read preferred_name attributes? That would entail asserting that the common readAttr
asserts the attribute is not PreferredName and I'd also suggest adding a documentation comment in Attr.td
that PreferredName has a custom deserialization logic pointing to that function.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Obsolete with #122726.
AttributeCommonInfo::Kind(ParsedKind), | ||
{AttributeCommonInfo::Syntax(Syntax), SpellingIndex, | ||
IsAlignas, IsRegularKeywordAttribute}); | ||
AttributeCommonInfo Info = Record.readAttributeCommonInfo(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of doing this, could we have a single function (e.g. readAttrImpl
) that handles both cases and just asserts Decl*
is not null whenever it sees the PreferredName
?
That way, we only have one copy of the code and readAttributeCommonInfo
is not really needed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, done in #122726.
bool isImplicit = Record.readInt(); | ||
bool isPackExpansion = Record.readInt(); | ||
serialization::TypeID TypeID = getGlobalTypeID(Record.readInt()); | ||
SourceLocation ElaboratedTypedefSL = Record.readSourceLocation(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code assumes a specific structure of the TypeLoc
we store for PreferredName
.
It is a major assumption that we will not change the representation of TypeLoc
s and that the particular TypeLoc
s used for PreferredName
stay the same over time.
We could probably make the code here more robust by also updating serialization so that it writes only the source locations we read here, rather than call the generic method for serializing TypeLoc
.
However, I don't think this will lead to better code.
Could we instead try postponing reading the whole attribute?
As chatted outside of this thread, this would entail updating the serialization of PreferredName
to store the number of record entries that it takes.
Later, when deserializing, we can simply remember the position at which the attribute starts, skip it and store the position in the "pending attributes". In finishPendingAction
, we can temporarily switch the position back and call readAttr
to re-read the whole attribute.
I bet this approach would end up being much simpler and will allow avoiding the duplication of serialization/deseriazliation logic. Moreover, it can be generalized to more attribute if the need arises.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the suggestion, the draft implementation is in #122726.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A special handling for PreferredName
looks slightly add-hoc to me. What I thought is to delay the deserialization of attributes
after the deserialization of the declaration. If this is too optimistic, I am wondering if we can delay reading of some sorts of attributes only, which is more generic.
I don't have concrete examples in my head, but I would be cautious of doing this for all attributes. Intuitively, it feels that there must be existing attributes that we somehow would depend on in the code during serialization. However, it might be worth a try to actually find real examples from running tests (even if only to prove that my intuition is wrong). That being said, having a concept for "sort of attributes that we can delay reading" seems like a really good idea. I am now highly suspicious of any attribute that stores |
I have pushed an alternative approach (reading the attribute at record level) to #122726. |
The code in #122726 looks better. I didn't do detailed review since it is marked WIP. BTW, it is better to have better explanation in the summary. I know you explained it in the issue. But it will be better for other readers after you land the PR. |
I have jumped ahead of the train and left some comments. Feel free to ignore them for now, I'd be happy to do another round of review when it's actually ready. |
Thanks @ChuanqiXu9, I have added a detailed summary #122726. After some conversations with @ilya-biryukov, we're thinking that it's better to land the change for the preferred_name attribute first. It's fairly unclear if the approach in #122726 generalizes to all attributes. Besides, since we're settled that #122726 is a better approach to fixing the issue, I will close this PR so that we can move all the discussions to a single location. |
It's not WIP anymore. Feel free to review, thanks a lot in advance! |
This fixes the incorrect diagnostic emitted when compiling the following snippet
The diagnostic is
The underlying issue is that deserialization of the
preferred_name
attribute triggers deserialization ofbasic_string_view<char>
, which triggers the deserialization of thepreferred_name
attribute again (since it's attached to thebasic_string_view
template).The deserialization logic is implemented in a way that prevents it from going on a loop in a literal sense (it detects early on that it has already seen the
string_view
typedef when trying to start its deserialization for the second time), but leaves the typedef deserialization in an unfinished state. Subsequently, thestring_view
typedef from the deserialized module cannot be merged with the same typedef fromstring_view.h
, resulting in the above diagnostic.This PR resolves the problem by delaying the deserialization of the
preferred_name
attribute until the deserialization of thebasic_string_view
template is completed. As a result of deferring, the deserialization of thepreferred_name
attribute doesn't need to go on a loop since the type of thestring_view
typedef is already known when it's deserialized.