10
10
#include " sanitizer_common/sanitizer_allocator_internal.h"
11
11
#include " sanitizer_common/sanitizer_common.h"
12
12
#include " sanitizer_common/sanitizer_dense_map.h"
13
+ #include " sanitizer_common/sanitizer_libc.h"
13
14
#include " sanitizer_common/sanitizer_mutex.h"
14
15
#include " sanitizer_common/sanitizer_placement_new.h"
15
16
#include " sanitizer_common/sanitizer_thread_safety.h"
17
+ #include " sanitizer_common/sanitizer_vector.h"
16
18
17
19
#include < assert.h>
18
20
19
21
using namespace __ctx_profile ;
20
22
23
+ namespace {
24
+ // Keep track of all the context roots we actually saw, so we can then traverse
25
+ // them when the user asks for the profile in __llvm_ctx_profile_fetch
26
+ __sanitizer::SpinMutex AllContextsMutex;
27
+ SANITIZER_GUARDED_BY (AllContextsMutex)
28
+ __sanitizer::Vector<ContextRoot *> AllContextRoots;
29
+
30
+ // utility to taint a pointer by setting the LSB. There is an assumption
31
+ // throughout that the addresses of contexts are even (really, they should be
32
+ // align(8), but "even"-ness is the minimum assumption)
33
+ // "scratch contexts" are buffers that we return in certain cases - they are
34
+ // large enough to allow for memory safe counter access, but they don't link
35
+ // subcontexts below them (the runtime recognizes them and enforces that)
36
+ ContextNode *markAsScratch (const ContextNode *Ctx) {
37
+ return reinterpret_cast <ContextNode *>(reinterpret_cast <uint64_t >(Ctx) | 1 );
38
+ }
39
+
40
+ // Used when getting the data from TLS. We don't *really* need to reset, but
41
+ // it's a simpler system if we do.
42
+ template <typename T> inline T consume (T &V) {
43
+ auto R = V;
44
+ V = {0 };
45
+ return R;
46
+ }
47
+
48
+ // We allocate at least kBuffSize Arena pages. The scratch buffer is also that
49
+ // large.
50
+ constexpr size_t kPower = 20 ;
51
+ constexpr size_t kBuffSize = 1 << kPower ;
52
+
53
+ // Highly unlikely we need more than kBuffSize for a context.
54
+ size_t getArenaAllocSize (size_t Needed) {
55
+ if (Needed >= kBuffSize )
56
+ return 2 * Needed;
57
+ return kBuffSize ;
58
+ }
59
+
60
+ // verify the structural integrity of the context
61
+ bool validate (const ContextRoot *Root) {
62
+ // all contexts should be laid out in some arena page. Go over each arena
63
+ // allocated for this Root, and jump over contained contexts based on
64
+ // self-reported sizes.
65
+ __sanitizer::DenseMap<uint64_t , bool > ContextStartAddrs;
66
+ for (const auto *Mem = Root->FirstMemBlock ; Mem; Mem = Mem->next ()) {
67
+ const auto *Pos = Mem->start ();
68
+ while (Pos < Mem->pos ()) {
69
+ const auto *Ctx = reinterpret_cast <const ContextNode *>(Pos);
70
+ if (!ContextStartAddrs.insert ({reinterpret_cast <uint64_t >(Ctx), true })
71
+ .second )
72
+ return false ;
73
+ Pos += Ctx->size ();
74
+ }
75
+ }
76
+
77
+ // Now traverse the contexts again the same way, but validate all nonull
78
+ // subcontext addresses appear in the set computed above.
79
+ for (const auto *Mem = Root->FirstMemBlock ; Mem; Mem = Mem->next ()) {
80
+ const auto *Pos = Mem->start ();
81
+ while (Pos < Mem->pos ()) {
82
+ const auto *Ctx = reinterpret_cast <const ContextNode *>(Pos);
83
+ for (uint32_t I = 0 ; I < Ctx->callsites_size (); ++I)
84
+ for (auto *Sub = Ctx->subContexts ()[I]; Sub; Sub = Sub->next ())
85
+ if (!ContextStartAddrs.find (reinterpret_cast <uint64_t >(Sub)))
86
+ return false ;
87
+
88
+ Pos += Ctx->size ();
89
+ }
90
+ }
91
+ return true ;
92
+ }
93
+ } // namespace
94
+
95
+ // the scratch buffer - what we give when we can't produce a real context (the
96
+ // scratch isn't "real" in that it's expected to be clobbered carelessly - we
97
+ // don't read it). The other important thing is that the callees from a scratch
98
+ // context also get a scratch context.
99
+ // Eventually this can be replaced with per-function buffers, a'la the typical
100
+ // (flat) instrumented FDO buffers. The clobbering aspect won't apply there, but
101
+ // the part about determining the nature of the subcontexts does.
102
+ __thread char __Buffer[kBuffSize ] = {0 };
103
+
104
+ #define TheScratchContext \
105
+ markAsScratch (reinterpret_cast <ContextNode *>(__Buffer))
106
+
107
+ // init the TLSes
108
+ __thread void *volatile __llvm_ctx_profile_expected_callee[2] = {nullptr ,
109
+ nullptr };
110
+ __thread ContextNode **volatile __llvm_ctx_profile_callsite[2 ] = {0 , 0 };
111
+
112
+ __thread ContextRoot *volatile __llvm_ctx_profile_current_context_root =
113
+ nullptr ;
114
+
21
115
// FIXME(mtrofin): use malloc / mmap instead of sanitizer common APIs to reduce
22
116
// the dependency on the latter.
23
117
Arena *Arena::allocateNewArena (size_t Size , Arena *Prev) {
24
118
assert (!Prev || Prev->Next == nullptr );
25
- Arena *NewArena =
26
- new (__sanitizer::InternalAlloc (Size + sizeof (Arena))) Arena (Size );
119
+ Arena *NewArena = new (__sanitizer::InternalAlloc (
120
+ Size + sizeof (Arena), /* cache=*/ nullptr , /* alignment=*/ ExpectedAlignment))
121
+ Arena (Size );
27
122
if (Prev)
28
123
Prev->Next = NewArena;
29
124
return NewArena;
@@ -38,3 +133,187 @@ void Arena::freeArenaList(Arena *&A) {
38
133
}
39
134
A = nullptr ;
40
135
}
136
+
137
+ inline ContextNode *ContextNode::alloc (char *Place, GUID Guid,
138
+ uint32_t NrCounters,
139
+ uint32_t NrCallsites,
140
+ ContextNode *Next) {
141
+ assert (reinterpret_cast <uint64_t >(Place) % ExpectedAlignment == 0 );
142
+ return new (Place) ContextNode (Guid, NrCounters, NrCallsites, Next);
143
+ }
144
+
145
+ void ContextNode::reset () {
146
+ // FIXME(mtrofin): this is std::memset, which we can probably use if we
147
+ // drop/reduce the dependency on sanitizer_common.
148
+ for (uint32_t I = 0 ; I < NrCounters; ++I)
149
+ counters ()[I] = 0 ;
150
+ for (uint32_t I = 0 ; I < NrCallsites; ++I)
151
+ for (auto *Next = subContexts ()[I]; Next; Next = Next->Next )
152
+ Next->reset ();
153
+ }
154
+
155
+ // If this is the first time we hit a callsite with this (Guid) particular
156
+ // callee, we need to allocate.
157
+ ContextNode *getCallsiteSlow (uint64_t Guid, ContextNode **InsertionPoint,
158
+ uint32_t NrCounters, uint32_t NrCallsites) {
159
+ auto AllocSize = ContextNode::getAllocSize (NrCounters, NrCallsites);
160
+ auto *Mem = __llvm_ctx_profile_current_context_root->CurrentMem ;
161
+ char *AllocPlace = Mem->tryBumpAllocate (AllocSize);
162
+ if (!AllocPlace) {
163
+ // if we failed to allocate on the current arena, allocate a new arena,
164
+ // and place it on __llvm_ctx_profile_current_context_root->CurrentMem so we
165
+ // find it from now on for other cases when we need to getCallsiteSlow.
166
+ // Note that allocateNewArena will link the allocated memory in the list of
167
+ // Arenas.
168
+ __llvm_ctx_profile_current_context_root->CurrentMem = Mem =
169
+ Mem->allocateNewArena (getArenaAllocSize (AllocSize), Mem);
170
+ AllocPlace = Mem->tryBumpAllocate (AllocSize);
171
+ }
172
+ auto *Ret = ContextNode::alloc (AllocPlace, Guid, NrCounters, NrCallsites,
173
+ *InsertionPoint);
174
+ *InsertionPoint = Ret;
175
+ return Ret;
176
+ }
177
+
178
+ ContextNode *__llvm_ctx_profile_get_context (void *Callee, GUID Guid,
179
+ uint32_t NrCounters,
180
+ uint32_t NrCallsites) {
181
+ // fast "out" if we're not even doing contextual collection.
182
+ if (!__llvm_ctx_profile_current_context_root)
183
+ return TheScratchContext;
184
+
185
+ // also fast "out" if the caller is scratch. We can see if it's scratch by
186
+ // looking at the interior pointer into the subcontexts vector that the caller
187
+ // provided, which, if the context is scratch, so is that interior pointer
188
+ // (because all the address calculations are using even values. Or more
189
+ // precisely, aligned - 8 values)
190
+ auto **CallsiteContext = consume (__llvm_ctx_profile_callsite[0 ]);
191
+ if (!CallsiteContext || isScratch (CallsiteContext))
192
+ return TheScratchContext;
193
+
194
+ // if the callee isn't the expected one, return scratch.
195
+ // Signal handler(s) could have been invoked at any point in the execution.
196
+ // Should that have happened, and had it (the handler) be built with
197
+ // instrumentation, its __llvm_ctx_profile_get_context would have failed here.
198
+ // Its sub call graph would have then populated
199
+ // __llvm_ctx_profile_{expected_callee | callsite} at index 1.
200
+ // The normal call graph may be impacted in that, if the signal handler
201
+ // happened somewhere before we read the TLS here, we'd see the TLS reset and
202
+ // we'd also fail here. That would just mean we would loose counter values for
203
+ // the normal subgraph, this time around. That should be very unlikely, but if
204
+ // it happens too frequently, we should be able to detect discrepancies in
205
+ // entry counts (caller-callee). At the moment, the design goes on the
206
+ // assumption that is so unfrequent, though, that it's not worth doing more
207
+ // for that case.
208
+ auto *ExpectedCallee = consume (__llvm_ctx_profile_expected_callee[0 ]);
209
+ if (ExpectedCallee != Callee)
210
+ return TheScratchContext;
211
+
212
+ auto *Callsite = *CallsiteContext;
213
+ // in the case of indirect calls, we will have all seen targets forming a
214
+ // linked list here. Find the one corresponding to this callee.
215
+ while (Callsite && Callsite->guid () != Guid) {
216
+ Callsite = Callsite->next ();
217
+ }
218
+ auto *Ret = Callsite ? Callsite
219
+ : getCallsiteSlow (Guid, CallsiteContext, NrCounters,
220
+ NrCallsites);
221
+ if (Ret->callsites_size () != NrCallsites ||
222
+ Ret->counters_size () != NrCounters)
223
+ __sanitizer::Printf (" [ctxprof] Returned ctx differs from what's asked: "
224
+ " Context: %p, Asked: %lu %u %u, Got: %lu %u %u \n " ,
225
+ Ret, Guid, NrCallsites, NrCounters, Ret->guid (),
226
+ Ret->callsites_size (), Ret->counters_size ());
227
+ Ret->onEntry ();
228
+ return Ret;
229
+ }
230
+
231
+ // This should be called once for a Root. Allocate the first arena, set up the
232
+ // first context.
233
+ void setupContext (ContextRoot *Root, GUID Guid, uint32_t NrCounters,
234
+ uint32_t NrCallsites) {
235
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
236
+ &AllContextsMutex);
237
+ // Re-check - we got here without having had taken a lock.
238
+ if (Root->FirstMemBlock )
239
+ return ;
240
+ const auto Needed = ContextNode::getAllocSize (NrCounters, NrCallsites);
241
+ auto *M = Arena::allocateNewArena (getArenaAllocSize (Needed));
242
+ Root->FirstMemBlock = M;
243
+ Root->CurrentMem = M;
244
+ Root->FirstNode = ContextNode::alloc (M->tryBumpAllocate (Needed), Guid,
245
+ NrCounters, NrCallsites);
246
+ AllContextRoots.PushBack (Root);
247
+ }
248
+
249
+ ContextNode *__llvm_ctx_profile_start_context (
250
+ ContextRoot *Root, GUID Guid, uint32_t Counters,
251
+ uint32_t Callsites) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
252
+ if (!Root->FirstMemBlock ) {
253
+ setupContext (Root, Guid, Counters, Callsites);
254
+ }
255
+ if (Root->Taken .TryLock ()) {
256
+ __llvm_ctx_profile_current_context_root = Root;
257
+ Root->FirstNode ->onEntry ();
258
+ return Root->FirstNode ;
259
+ }
260
+ // If this thread couldn't take the lock, return scratch context.
261
+ __llvm_ctx_profile_current_context_root = nullptr ;
262
+ return TheScratchContext;
263
+ }
264
+
265
+ void __llvm_ctx_profile_release_context (ContextRoot *Root)
266
+ SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
267
+ if (__llvm_ctx_profile_current_context_root) {
268
+ __llvm_ctx_profile_current_context_root = nullptr ;
269
+ Root->Taken .Unlock ();
270
+ }
271
+ }
272
+
273
+ void __llvm_ctx_profile_start_collection () {
274
+ size_t NrMemUnits = 0 ;
275
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
276
+ &AllContextsMutex);
277
+ for (uint32_t I = 0 ; I < AllContextRoots.Size (); ++I) {
278
+ auto *Root = AllContextRoots[I];
279
+ __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> Lock (
280
+ &Root->Taken );
281
+ for (auto *Mem = Root->FirstMemBlock ; Mem; Mem = Mem->next ())
282
+ ++NrMemUnits;
283
+
284
+ Root->FirstNode ->reset ();
285
+ }
286
+ __sanitizer::Printf (" [ctxprof] Initial NrMemUnits: %zu \n " , NrMemUnits);
287
+ }
288
+
289
+ bool __llvm_ctx_profile_fetch (
290
+ void *Data, bool (*Writer)(void *W, const __ctx_profile::ContextNode &)) {
291
+ assert (Writer);
292
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
293
+ &AllContextsMutex);
294
+
295
+ for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I) {
296
+ auto *Root = AllContextRoots[I];
297
+ __sanitizer::GenericScopedLock<__sanitizer::StaticSpinMutex> TakenLock (
298
+ &Root->Taken );
299
+ if (!validate (Root)) {
300
+ __sanitizer::Printf (" [ctxprof] Contextual Profile is %s\n " , " invalid" );
301
+ return false ;
302
+ }
303
+ if (!Writer (Data, *Root->FirstNode ))
304
+ return false ;
305
+ }
306
+ return true ;
307
+ }
308
+
309
+ void __llvm_ctx_profile_free () {
310
+ __sanitizer::GenericScopedLock<__sanitizer::SpinMutex> Lock (
311
+ &AllContextsMutex);
312
+ for (int I = 0 , E = AllContextRoots.Size (); I < E; ++I)
313
+ for (auto *A = AllContextRoots[I]->FirstMemBlock ; A;) {
314
+ auto *C = A;
315
+ A = A->next ();
316
+ __sanitizer::InternalFree (C);
317
+ }
318
+ AllContextRoots.Reset ();
319
+ }
0 commit comments