Skip to content

Commit 8df02bf

Browse files
perf(profiling): Additionl performance improvements to the profiler (getsentry#1991)
This change adds additional performance improvements to the profiler after observing the following: - extracting filename information is expensive, so add a cache on to allow reuse of results - extracting the full frame information is expensive, but we only need to do it once since the subsequent occurrences can reuse previous results - the abs_path + lineno is sufficient to uniquely identify a frame, so use that as the frame key Co-authored-by: Anton Pirker <[email protected]>
1 parent a7bcdc2 commit 8df02bf

File tree

2 files changed

+89
-261
lines changed

2 files changed

+89
-261
lines changed

sentry_sdk/profiler.py

Lines changed: 35 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,10 @@
7373

7474
RawFrame = Tuple[
7575
str, # abs_path
76-
Optional[str], # module
77-
Optional[str], # filename
78-
str, # function
7976
int, # lineno
8077
]
8178
RawStack = Tuple[RawFrame, ...]
82-
RawSample = Sequence[Tuple[str, Tuple[RawStackId, RawStack]]]
79+
RawSample = Sequence[Tuple[str, Tuple[RawStackId, RawStack, Deque[FrameType]]]]
8380

8481
ProcessedSample = TypedDict(
8582
"ProcessedSample",
@@ -249,7 +246,6 @@ def teardown_profiler():
249246

250247
def extract_stack(
251248
frame, # type: Optional[FrameType]
252-
cwd, # type: str
253249
prev_cache=None, # type: Optional[Tuple[RawStackId, RawStack, Deque[FrameType]]]
254250
max_stack_depth=MAX_STACK_DEPTH, # type: int
255251
):
@@ -278,7 +274,7 @@ def extract_stack(
278274
frame = f_back
279275

280276
if prev_cache is None:
281-
stack = tuple(extract_frame(frame, cwd) for frame in frames)
277+
stack = tuple(frame_key(frame) for frame in frames)
282278
else:
283279
_, prev_stack, prev_frames = prev_cache
284280
prev_depth = len(prev_frames)
@@ -292,9 +288,7 @@ def extract_stack(
292288
# Make sure to keep in mind that the stack is ordered from the inner most
293289
# from to the outer most frame so be careful with the indexing.
294290
stack = tuple(
295-
prev_stack[i]
296-
if i >= 0 and frame is prev_frames[i]
297-
else extract_frame(frame, cwd)
291+
prev_stack[i] if i >= 0 and frame is prev_frames[i] else frame_key(frame)
298292
for i, frame in zip(range(prev_depth - depth, prev_depth), frames)
299293
)
300294

@@ -314,8 +308,13 @@ def extract_stack(
314308
return stack_id, stack, frames
315309

316310

311+
def frame_key(frame):
312+
# type: (FrameType) -> RawFrame
313+
return (frame.f_code.co_filename, frame.f_lineno)
314+
315+
317316
def extract_frame(frame, cwd):
318-
# type: (FrameType, str) -> RawFrame
317+
# type: (FrameType, str) -> ProcessedFrame
319318
abs_path = frame.f_code.co_filename
320319

321320
try:
@@ -325,7 +324,7 @@ def extract_frame(frame, cwd):
325324

326325
# namedtuples can be many times slower when initialing
327326
# and accessing attribute so we opt to use a tuple here instead
328-
return (
327+
return {
329328
# This originally was `os.path.abspath(abs_path)` but that had
330329
# a large performance overhead.
331330
#
@@ -335,12 +334,12 @@ def extract_frame(frame, cwd):
335334
#
336335
# Additionally, since we are using normalized path already,
337336
# we skip calling `os.path.normpath` entirely.
338-
os.path.join(cwd, abs_path),
339-
module,
340-
filename_for_module(module, abs_path) or None,
341-
get_frame_name(frame),
342-
frame.f_lineno,
343-
)
337+
"abs_path": os.path.join(cwd, abs_path),
338+
"module": module,
339+
"filename": filename_for_module(module, abs_path) or None,
340+
"function": get_frame_name(frame),
341+
"lineno": frame.f_lineno,
342+
}
344343

345344

346345
if PY311:
@@ -625,8 +624,8 @@ def __exit__(self, ty, value, tb):
625624

626625
scope.profile = old_profile
627626

628-
def write(self, ts, sample):
629-
# type: (int, RawSample) -> None
627+
def write(self, cwd, ts, sample, frame_cache):
628+
# type: (str, int, RawSample, Dict[RawFrame, ProcessedFrame]) -> None
630629
if not self.active:
631630
return
632631

@@ -642,25 +641,23 @@ def write(self, ts, sample):
642641

643642
elapsed_since_start_ns = str(offset)
644643

645-
for tid, (stack_id, stack) in sample:
644+
for tid, (stack_id, raw_stack, frames) in sample:
646645
# Check if the stack is indexed first, this lets us skip
647646
# indexing frames if it's not necessary
648647
if stack_id not in self.indexed_stacks:
649-
for frame in stack:
650-
if frame not in self.indexed_frames:
651-
self.indexed_frames[frame] = len(self.indexed_frames)
652-
self.frames.append(
653-
{
654-
"abs_path": frame[0],
655-
"module": frame[1],
656-
"filename": frame[2],
657-
"function": frame[3],
658-
"lineno": frame[4],
659-
}
660-
)
648+
for i, raw_frame in enumerate(raw_stack):
649+
if raw_frame not in self.indexed_frames:
650+
self.indexed_frames[raw_frame] = len(self.indexed_frames)
651+
processed_frame = frame_cache.get(raw_frame)
652+
if processed_frame is None:
653+
processed_frame = extract_frame(frames[i], cwd)
654+
frame_cache[raw_frame] = processed_frame
655+
self.frames.append(processed_frame)
661656

662657
self.indexed_stacks[stack_id] = len(self.indexed_stacks)
663-
self.stacks.append([self.indexed_frames[frame] for frame in stack])
658+
self.stacks.append(
659+
[self.indexed_frames[raw_frame] for raw_frame in raw_stack]
660+
)
664661

665662
self.samples.append(
666663
{
@@ -833,18 +830,15 @@ def _sample_stack(*args, **kwargs):
833830
now = nanosecond_time()
834831

835832
raw_sample = {
836-
tid: extract_stack(frame, cwd, last_sample[0].get(tid))
833+
tid: extract_stack(frame, last_sample[0].get(tid))
837834
for tid, frame in sys._current_frames().items()
838835
}
839836

840837
# make sure to update the last sample so the cache has
841838
# the most recent stack for better cache hits
842839
last_sample[0] = raw_sample
843840

844-
sample = [
845-
(str(tid), (stack_id, stack))
846-
for tid, (stack_id, stack, _) in raw_sample.items()
847-
]
841+
sample = [(str(tid), data) for tid, data in raw_sample.items()]
848842

849843
# Move the new profiles into the active_profiles set.
850844
#
@@ -861,9 +855,11 @@ def _sample_stack(*args, **kwargs):
861855

862856
inactive_profiles = []
863857

858+
frame_cache = {} # type: Dict[RawFrame, ProcessedFrame]
859+
864860
for profile in self.active_profiles:
865861
if profile.active:
866-
profile.write(now, sample)
862+
profile.write(cwd, now, sample, frame_cache)
867863
else:
868864
# If a thread is marked inactive, we buffer it
869865
# to `inactive_profiles` so it can be removed.

0 commit comments

Comments
 (0)