Skip to content

Commit b99836b

Browse files
ktfhahnjo
andcommitted
[llvm-project] Introduce paged vector
The goal of the class is to be an (almost) drop in replacement for SmallVector and std::vector when those are presized and filled later, as it happens in SourceManager and ASTReader. By doing so, sparsely accessed PagedVector can profit from reduced memory footprint. Co-authored-by: Jonas Hahnfeld <[email protected]>
1 parent 47ca67d commit b99836b

File tree

6 files changed

+278
-11
lines changed

6 files changed

+278
-11
lines changed
Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
//===- llvm/ADT/PagedVector.h - 'Lazily allocated' vectors --*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines the PagedVector class.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
#ifndef LLVM_ADT_PAGEDVECTOR_H
13+
#define LLVM_ADT_PAGEDVECTOR_H
14+
15+
#include "llvm/ADT/PointerIntPair.h"
16+
#include "llvm/ADT/SmallVector.h"
17+
#include "llvm/ADT/iterator_range.h"
18+
#include "llvm/Support/Allocator.h"
19+
#include <cassert>
20+
#include <vector>
21+
22+
namespace llvm {
23+
/// A vector that allocates memory in pages.
24+
///
25+
/// Order is kept, but memory is allocated only when one element of the page is
26+
/// accessed. This introduces a level of indirection, but it is useful when you
27+
/// have a sparsely initialised vector where the full size is allocated upfront.
28+
///
29+
/// As a side effect the elements are initialised later than in a normal vector.
30+
/// On the first access to one of the elements of a given page, all the elements
31+
/// of the page are initialised. This also means that the elements of the page
32+
/// are initialised beyond the size of the vector.
33+
///
34+
/// Similarly on destruction the elements are destroyed only when the page is
35+
/// not needed anymore, delaying invoking the destructor of the elements.
36+
///
37+
/// Notice that this has iterators only on materialized elements. This
38+
/// is deliberately done under the assumption you would dereference the elements
39+
/// while iterating, therefore materialising them and losing the gains in terms
40+
/// of memory usage this container provides. If you have such a use case, you
41+
/// probably want to use a normal std::vector or a llvm::SmallVector.
42+
template <typename T, size_t PageSize = 1024 / sizeof(T)> class PagedVector {
43+
static_assert(PageSize > 1, "PageSize must be greater than 0. Most likely "
44+
"you want it to be greater than 16.");
45+
/// The actual number of elements in the vector which can be accessed.
46+
size_t Size = 0;
47+
48+
/// The position of the initial element of the page in the Data vector.
49+
/// Pages are allocated contiguously in the Data vector.
50+
mutable SmallVector<T *, 0> PageToDataPtrs;
51+
/// Actual page data. All the page elements are allocated on the
52+
/// first access of any of the elements of the page. Elements are default
53+
/// constructed and elements of the page are stored contiguously.
54+
PointerIntPair<BumpPtrAllocator *, 1, bool> Allocator;
55+
56+
public:
57+
using value_type = T;
58+
59+
/// Default constructor. We build our own allocator and mark it as such with
60+
/// `true` in the second pair element.
61+
PagedVector() : Allocator(new BumpPtrAllocator, true) {}
62+
explicit PagedVector(BumpPtrAllocator *A) : Allocator(A, false) {
63+
assert(A && "Allocator cannot be nullptr");
64+
}
65+
66+
~PagedVector() {
67+
clear();
68+
// If we own the allocator, delete it.
69+
if (Allocator.getInt())
70+
delete Allocator.getPointer();
71+
}
72+
73+
// Forbid copy and move as we do not need them for the current use case.
74+
PagedVector(const PagedVector &) = delete;
75+
PagedVector(PagedVector &&) = delete;
76+
PagedVector &operator=(const PagedVector &) = delete;
77+
PagedVector &operator=(PagedVector &&) = delete;
78+
79+
/// Look up an element at position `Index`.
80+
/// If the associated page is not filled, it will be filled with default
81+
/// constructed elements.
82+
T &operator[](size_t Index) const {
83+
assert(Index < Size);
84+
assert(Index / PageSize < PageToDataPtrs.size());
85+
T *&PagePtr = PageToDataPtrs[Index / PageSize];
86+
// If the page was not yet allocated, allocate it.
87+
if (!PagePtr) {
88+
PagePtr = Allocator.getPointer()->template Allocate<T>(PageSize);
89+
// We need to invoke the default constructor on all the elements of the
90+
// page.
91+
std::uninitialized_value_construct_n(PagePtr, PageSize);
92+
}
93+
// Dereference the element in the page.
94+
return PagePtr[Index % PageSize];
95+
}
96+
97+
/// Return the capacity of the vector. I.e. the maximum size it can be
98+
/// expanded to with the resize method without allocating more pages.
99+
[[nodiscard]] size_t capacity() const {
100+
return PageToDataPtrs.size() * PageSize;
101+
}
102+
103+
/// Return the size of the vector.
104+
[[nodiscard]] size_t size() const { return Size; }
105+
106+
/// Resize the vector. Notice that the constructor of the elements will not
107+
/// be invoked until an element of a given page is accessed, at which point
108+
/// all the elements of the page will be constructed.
109+
///
110+
/// If the new size is smaller than the current size, the elements of the
111+
/// pages that are not needed anymore will be destroyed, however, elements of
112+
/// the last page will not be destroyed.
113+
///
114+
/// For these reason the usage of this vector is discouraged if you rely
115+
/// on the construction / destructor of the elements to be invoked.
116+
void resize(size_t NewSize) {
117+
if (NewSize == 0) {
118+
clear();
119+
return;
120+
}
121+
// Handle shrink case: destroy the elements in the pages that are not
122+
// needed any more and deallocate the pages.
123+
//
124+
// On the other hand, we do not destroy the extra elements in the last page,
125+
// because we might need them later and the logic is simpler if we do not
126+
// destroy them. This means that elements are only destroyed when the
127+
// page they belong to is destroyed. This is similar to what happens on
128+
// access of the elements of a page, where all the elements of the page are
129+
// constructed not only the one effectively needed.
130+
size_t NewLastPage = (NewSize - 1) / PageSize;
131+
if (NewSize < Size) {
132+
for (size_t I = NewLastPage + 1, N = PageToDataPtrs.size(); I < N; ++I) {
133+
T *Page = PageToDataPtrs[I];
134+
if (!Page)
135+
continue;
136+
// We need to invoke the destructor on all the elements of the page.
137+
std::destroy_n(Page, PageSize);
138+
Allocator.getPointer()->Deallocate(Page);
139+
}
140+
}
141+
142+
Size = NewSize;
143+
PageToDataPtrs.resize(NewLastPage + 1);
144+
}
145+
146+
[[nodiscard]] bool empty() const { return Size == 0; }
147+
148+
/// Clear the vector, i.e. clear the allocated pages, the whole page
149+
/// lookup index and reset the size.
150+
void clear() {
151+
Size = 0;
152+
for (T *Page : PageToDataPtrs) {
153+
if (Page == nullptr)
154+
continue;
155+
std::destroy_n(Page, PageSize);
156+
// If we do not own the allocator, deallocate the pages one by one.
157+
if (!Allocator.getInt())
158+
Allocator.getPointer()->Deallocate(Page);
159+
}
160+
// If we own the allocator, simply reset it.
161+
if (Allocator.getInt())
162+
Allocator.getPointer()->Reset();
163+
PageToDataPtrs.clear();
164+
}
165+
166+
/// Iterator on all the elements of the vector
167+
/// which have actually being constructed.
168+
class MaterializedIterator {
169+
const PagedVector *PV;
170+
size_t ElementIdx;
171+
172+
public:
173+
using iterator_category = std::forward_iterator_tag;
174+
using value_type = T;
175+
using difference_type = std::ptrdiff_t;
176+
using pointer = T *;
177+
using reference = T &;
178+
179+
MaterializedIterator(PagedVector const *PV, size_t ElementIdx)
180+
: PV(PV), ElementIdx(ElementIdx) {}
181+
182+
/// Pre-increment operator.
183+
///
184+
/// When incrementing the iterator, we skip the elements which have not
185+
/// been materialized yet.
186+
MaterializedIterator &operator++() {
187+
++ElementIdx;
188+
if (ElementIdx % PageSize == 0) {
189+
while (ElementIdx < PV->Size &&
190+
!PV->PageToDataPtrs[ElementIdx / PageSize])
191+
ElementIdx += PageSize;
192+
if (ElementIdx > PV->Size)
193+
ElementIdx = PV->Size;
194+
}
195+
196+
return *this;
197+
}
198+
199+
MaterializedIterator operator++(int) {
200+
MaterializedIterator Copy = *this;
201+
++*this;
202+
return Copy;
203+
}
204+
205+
T const &operator*() const {
206+
assert(ElementIdx < PV->Size);
207+
assert(PV->PageToDataPtrs[ElementIdx / PageSize]);
208+
T *PagePtr = PV->PageToDataPtrs[ElementIdx / PageSize];
209+
return PagePtr[ElementIdx % PageSize];
210+
}
211+
212+
/// Equality operator.
213+
friend bool operator==(const MaterializedIterator &LHS,
214+
const MaterializedIterator &RHS) {
215+
return LHS.equals(RHS);
216+
}
217+
218+
[[nodiscard]] size_t getIndex() const { return ElementIdx; }
219+
220+
friend bool operator!=(const MaterializedIterator &LHS,
221+
const MaterializedIterator &RHS) {
222+
return !(LHS == RHS);
223+
}
224+
225+
private:
226+
void verify() const {
227+
assert(
228+
ElementIdx == PV->Size ||
229+
(ElementIdx < PV->Size && PV->PageToDataPtrs[ElementIdx / PageSize]));
230+
}
231+
232+
bool equals(const MaterializedIterator &Other) const {
233+
assert(PV == Other.PV);
234+
verify();
235+
Other.verify();
236+
return ElementIdx == Other.ElementIdx;
237+
}
238+
};
239+
240+
/// Iterators over the materialized elements of the vector.
241+
///
242+
/// This includes all the elements belonging to allocated pages,
243+
/// even if they have not been accessed yet. It's enough to access
244+
/// one element of a page to materialize all the elements of the page.
245+
MaterializedIterator materialized_begin() const {
246+
// Look for the first valid page.
247+
for (size_t ElementIdx = 0; ElementIdx < Size; ElementIdx += PageSize)
248+
if (PageToDataPtrs[ElementIdx / PageSize])
249+
return MaterializedIterator(this, ElementIdx);
250+
251+
return MaterializedIterator(this, Size);
252+
}
253+
254+
MaterializedIterator materialized_end() const {
255+
return MaterializedIterator(this, Size);
256+
}
257+
258+
[[nodiscard]] llvm::iterator_range<MaterializedIterator>
259+
materialized() const {
260+
return {materialized_begin(), materialized_end()};
261+
}
262+
};
263+
} // namespace llvm
264+
#endif // LLVM_ADT_PAGEDVECTOR_H

interpreter/llvm-project/clang/include/clang/Basic/SourceManager.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "clang/Basic/Diagnostic.h"
3838
#include "clang/Basic/FileEntry.h"
3939
#include "clang/Basic/FileManager.h"
40+
#include "clang/Basic/PagedVector.h"
4041
#include "clang/Basic/SourceLocation.h"
4142
#include "llvm/ADT/ArrayRef.h"
4243
#include "llvm/ADT/BitVector.h"
@@ -699,7 +700,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
699700
///
700701
/// Negative FileIDs are indexes into this table. To get from ID to an index,
701702
/// use (-ID - 2).
702-
SmallVector<SrcMgr::SLocEntry, 0> LoadedSLocEntryTable;
703+
llvm::PagedVector<SrcMgr::SLocEntry> LoadedSLocEntryTable;
703704

704705
/// The starting offset of the next local SLocEntry.
705706
///

interpreter/llvm-project/clang/include/clang/Serialization/ASTReader.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "clang/Basic/DiagnosticOptions.h"
1919
#include "clang/Basic/IdentifierTable.h"
2020
#include "clang/Basic/OpenCLOptions.h"
21+
#include "clang/Basic/PagedVector.h"
2122
#include "clang/Basic/SourceLocation.h"
2223
#include "clang/Basic/Version.h"
2324
#include "clang/Lex/ExternalPreprocessorSource.h"
@@ -485,7 +486,7 @@ class ASTReader
485486
///
486487
/// When the pointer at index I is non-NULL, the type with
487488
/// ID = (I + 1) << FastQual::Width has already been loaded
488-
std::vector<QualType> TypesLoaded;
489+
llvm::PagedVector<QualType> TypesLoaded;
489490

490491
using GlobalTypeMapType =
491492
ContinuousRangeMap<serialization::TypeID, ModuleFile *, 4>;
@@ -499,7 +500,7 @@ class ASTReader
499500
///
500501
/// When the pointer at index I is non-NULL, the declaration with ID
501502
/// = I + 1 has already been loaded.
502-
std::vector<Decl *> DeclsLoaded;
503+
llvm::PagedVector<Decl *> DeclsLoaded;
503504

504505
using GlobalDeclMapType =
505506
ContinuousRangeMap<serialization::DeclID, ModuleFile *, 4>;

interpreter/llvm-project/clang/lib/Basic/SourceManager.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2373,11 +2373,11 @@ SourceManager::MemoryBufferSizes SourceManager::getMemoryBufferSizes() const {
23732373
}
23742374

23752375
size_t SourceManager::getDataStructureSizes() const {
2376-
size_t size = llvm::capacity_in_bytes(MemBufferInfos)
2377-
+ llvm::capacity_in_bytes(LocalSLocEntryTable)
2378-
+ llvm::capacity_in_bytes(LoadedSLocEntryTable)
2379-
+ llvm::capacity_in_bytes(SLocEntryLoaded)
2380-
+ llvm::capacity_in_bytes(FileInfos);
2376+
size_t size = llvm::capacity_in_bytes(MemBufferInfos) +
2377+
llvm::capacity_in_bytes(LocalSLocEntryTable) +
2378+
llvm::capacity_in_bytes(LoadedSLocEntryTable) +
2379+
llvm::capacity_in_bytes(SLocEntryLoaded) +
2380+
llvm::capacity_in_bytes(FileInfos);
23812381

23822382
if (OverriddenFilesInfo)
23832383
size += llvm::capacity_in_bytes(OverriddenFilesInfo->OverriddenFiles);

interpreter/llvm-project/clang/lib/Serialization/ASTReader.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7891,9 +7891,10 @@ void ASTReader::PrintStats() {
78917891
std::fprintf(stderr, "*** AST File Statistics:\n");
78927892

78937893
unsigned NumTypesLoaded =
7894-
TypesLoaded.size() - llvm::count(TypesLoaded, QualType());
7894+
TypesLoaded.size() - llvm::count(TypesLoaded.materialized(), QualType());
78957895
unsigned NumDeclsLoaded =
7896-
DeclsLoaded.size() - llvm::count(DeclsLoaded, (Decl *)nullptr);
7896+
DeclsLoaded.size() -
7897+
llvm::count(DeclsLoaded.materialized(), (Decl *)nullptr);
78977898
unsigned NumIdentifiersLoaded =
78987899
IdentifiersLoaded.size() -
78997900
llvm::count(IdentifiersLoaded, (IdentifierInfo *)nullptr);
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
ROOT-llvm16-20240115-01
1+
ROOT-llvm16-20240116-01

0 commit comments

Comments
 (0)