diff --git a/sentry_sdk/_copy.py b/sentry_sdk/_copy.py new file mode 100644 index 0000000000..d34061b1ff --- /dev/null +++ b/sentry_sdk/_copy.py @@ -0,0 +1,212 @@ +""" +A modified version of Python 3.11's copy.deepcopy (found in Python's 'cpython/Lib/copy.py') +that falls back to repr for non-datastrucure types that we use for extracting frame local variables +in a safe way without holding references to the original objects. + +https://github.com/python/cpython/blob/v3.11.7/Lib/copy.py#L128-L241 + +Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; + +All Rights Reserved + + +PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 +-------------------------------------------- + +1. This LICENSE AGREEMENT is between the Python Software Foundation +("PSF"), and the Individual or Organization ("Licensee") accessing and +otherwise using this software ("Python") in source or binary form and +its associated documentation. + +2. Subject to the terms and conditions of this License Agreement, PSF hereby +grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, +analyze, test, perform and/or display publicly, prepare derivative works, +distribute, and otherwise use Python alone or in any derivative version, +provided, however, that PSF's License Agreement and PSF's notice of copyright, +i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation; +All Rights Reserved" are retained in Python alone or in any derivative version +prepared by Licensee. + +3. In the event Licensee prepares a derivative work that is based on +or incorporates Python or any part thereof, and wants to make +the derivative work available to others as provided herein, then +Licensee hereby agrees to include in any such work a brief summary of +the changes made to Python. + +4. PSF is making Python available to Licensee on an "AS IS" +basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR +IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND +DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS +FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT +INFRINGE ANY THIRD PARTY RIGHTS. + +5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON +FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS +A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, +OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + +6. This License Agreement will automatically terminate upon a material +breach of its terms and conditions. + +7. Nothing in this License Agreement shall be deemed to create any +relationship of agency, partnership, or joint venture between PSF and +Licensee. This License Agreement does not grant permission to use PSF +trademarks or trade name in a trademark sense to endorse or promote +products or services of Licensee, or any third party. + +8. By copying, installing or otherwise using Python, Licensee +agrees to be bound by the terms and conditions of this License +Agreement. + +""" + +import types +import weakref +import sys +from collections.abc import Mapping, Sequence, Set + +from sentry_sdk.utils import ( + safe_repr, + serializable_str_types, + capture_internal_exception, + capture_event_disabled, +) +from sentry_sdk._types import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Optional, Union + + +# copying these over to avoid yet another circular dep +MAX_DATABAG_DEPTH = 5 +MAX_DATABAG_BREADTH = 10 + + +def deepcopy_fallback_repr(x, memo=None, _nil=[], stack_depth=0): # noqa: B006 + # type: (Any, Optional[dict[int, Any]], Any, int) -> Any + """Deep copy like operation on arbitrary Python objects that falls back to repr + for non-datastructure like objects. + Also has a max recursion depth of 10 because more than that will be thrown away by + the serializer anyway. + """ + with capture_event_disabled(): + try: + if memo is None: + memo = {} + + d = id(x) + y = memo.get(d, _nil) + if y is not _nil: + return y + + cls = type(x) + + copier = _deepcopy_dispatch.get(cls) + if copier is not None: + y = copier(x, memo, stack_depth=stack_depth + 1) + elif issubclass(cls, type): + y = _deepcopy_atomic(x, memo, stack_depth=stack_depth + 1) + elif isinstance(x, serializable_str_types): + y = safe_repr(x) + elif isinstance(x, Mapping): + y = _deepcopy_dict(x, memo, stack_depth=stack_depth + 1) + elif not isinstance(x, serializable_str_types) and isinstance( + x, (Set, Sequence) + ): + y = _deepcopy_list(x, memo, stack_depth=stack_depth + 1) + else: + y = safe_repr(x) + + # If is its own copy, don't memoize. + if y is not x: + memo[d] = y + _keep_alive(x, memo) # Make sure x lives at least as long as d + return y + except BaseException: + capture_internal_exception(sys.exc_info()) + return "" + + +_deepcopy_dispatch = d = {} # type: dict[Any, Any] + + +def _deepcopy_atomic(x, memo, stack_depth=0): + # type: (Any, dict[int, Any], int) -> Any + return x + + +d[type(None)] = _deepcopy_atomic +d[type(Ellipsis)] = _deepcopy_atomic +d[type(NotImplemented)] = _deepcopy_atomic +d[int] = _deepcopy_atomic +d[float] = _deepcopy_atomic +d[bool] = _deepcopy_atomic +d[complex] = _deepcopy_atomic +d[bytes] = _deepcopy_atomic +d[str] = _deepcopy_atomic +d[types.CodeType] = _deepcopy_atomic +d[type] = _deepcopy_atomic +d[range] = _deepcopy_atomic +d[types.BuiltinFunctionType] = _deepcopy_atomic +d[types.FunctionType] = _deepcopy_atomic +d[weakref.ref] = _deepcopy_atomic +d[property] = _deepcopy_atomic + + +def _deepcopy_list(x, memo, stack_depth=0): + # type: (Union[Sequence[Any], Set[Any]], dict[int, Any], int) -> list[Any] + y = [] # type: list[Any] + memo[id(x)] = y + if stack_depth >= MAX_DATABAG_DEPTH: + return y + append = y.append + for i, a in enumerate(x): + if i >= MAX_DATABAG_BREADTH: + break + append(deepcopy_fallback_repr(a, memo, stack_depth=stack_depth + 1)) + return y + + +def _deepcopy_dict(x, memo, stack_depth=0): + # type: (Mapping[Any, Any], dict[int, Any], int) -> dict[Any, Any] + y = {} # type: dict[Any, Any] + memo[id(x)] = y + if stack_depth >= MAX_DATABAG_DEPTH: + return y + i = 0 + for key, value in x.items(): + if i >= MAX_DATABAG_BREADTH: + break + y[deepcopy_fallback_repr(key, memo)] = deepcopy_fallback_repr(value, memo) + i += 1 + return y + + +def _deepcopy_method(x, memo): # Copy instance methods + # type: (types.MethodType, dict[int, Any]) -> types.MethodType + return type(x)(x.__func__, deepcopy_fallback_repr(x.__self__, memo)) + + +d[types.MethodType] = _deepcopy_method + +del d + + +def _keep_alive(x, memo): + # type: (Any, dict[int, Any]) -> None + """Keeps a reference to the object x in the memo. + + Because we remember objects by their id, we have + to assure that possibly temporary objects are kept + alive by referencing them. + We store a reference at the id of the memo, which should + normally not be used unless someone tries to deepcopy + the memo itself... + """ + try: + memo[id(memo)].append(x) + except KeyError: + # aha, this is the first one :-) + memo[id(memo)] = [x] diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py index 6698ee527d..208e7fab5f 100644 --- a/sentry_sdk/client.py +++ b/sentry_sdk/client.py @@ -10,7 +10,6 @@ from sentry_sdk.utils import ( capture_internal_exceptions, current_stacktrace, - disable_capture_event, format_timestamp, get_sdk_name, get_type_name, @@ -726,9 +725,6 @@ def capture_event( :returns: An event ID. May be `None` if there is no DSN set or of if the SDK decided to discard the event for other reasons. In such situations setting `debug=True` on `init()` may help. """ - if disable_capture_event.get(False): - return None - if hint is None: hint = {} event_id = event.get("event_id") diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index 4e07e818c9..c3ea149d46 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -28,6 +28,7 @@ ) from sentry_sdk._types import TYPE_CHECKING from sentry_sdk.utils import ( + disable_capture_event, capture_internal_exception, capture_internal_exceptions, ContextVar, @@ -1130,6 +1131,9 @@ def capture_event(self, event, hint=None, scope=None, **scope_kwargs): :returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`). """ + if disable_capture_event.get(False): + return None + scope = self._merge_scopes(scope, scope_kwargs) event_id = self.get_client().capture_event(event=event, hint=hint, scope=scope) @@ -1157,6 +1161,9 @@ def capture_message(self, message, level=None, scope=None, **scope_kwargs): :returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`). """ + if disable_capture_event.get(False): + return None + if level is None: level = "info" @@ -1182,6 +1189,9 @@ def capture_exception(self, error=None, scope=None, **scope_kwargs): :returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`). """ + if disable_capture_event.get(False): + return None + if error is not None: exc_info = exc_info_from_error(error) else: diff --git a/sentry_sdk/serializer.py b/sentry_sdk/serializer.py index ff243eeadc..9768e05b79 100644 --- a/sentry_sdk/serializer.py +++ b/sentry_sdk/serializer.py @@ -10,6 +10,7 @@ format_timestamp, safe_repr, strip_string, + serializable_str_types, ) from sentry_sdk._types import TYPE_CHECKING @@ -33,10 +34,6 @@ Segment = Union[str, int] -# Bytes are technically not strings in Python 3, but we can serialize them -serializable_str_types = (str, bytes, bytearray, memoryview) - - # Maximum length of JSON-serialized event payloads that can be safely sent # before the server may reject the event due to its size. This is not intended # to reflect actual values defined server-side, but rather only be an upper diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 08d2768cde..096bcaa8da 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -10,6 +10,7 @@ import sys import threading import time +from contextlib import contextmanager from collections import namedtuple from datetime import datetime from decimal import Decimal @@ -50,6 +51,7 @@ Type, TypeVar, Union, + Generator, ) from gevent.hub import Hub @@ -71,6 +73,9 @@ SENSITIVE_DATA_SUBSTITUTE = "[Filtered]" +# Bytes are technically not strings in Python 3, but we can serialize them +serializable_str_types = (str, bytes, bytearray, memoryview) + def json_dumps(data): # type: (Any) -> bytes @@ -549,10 +554,11 @@ def safe_str(value): def safe_repr(value): # type: (Any) -> str - try: - return repr(value) - except Exception: - return "" + with capture_event_disabled(): + try: + return repr(value) + except Exception: + return "" def filename_for_module(module, abs_path): @@ -616,7 +622,9 @@ def serialize_frame( ) if include_local_variables: - rv["vars"] = frame.f_locals.copy() + from sentry_sdk._copy import deepcopy_fallback_repr + + rv["vars"] = deepcopy_fallback_repr(frame.f_locals) return rv @@ -1370,6 +1378,16 @@ def transaction_from_function(func): disable_capture_event = ContextVar("disable_capture_event") +@contextmanager +def capture_event_disabled(): + # type: () -> Generator[None, None, None] + disable_capture_event.set(True) + try: + yield + finally: + disable_capture_event.set(False) + + class ServerlessTimeoutWarning(Exception): # noqa: N818 """Raised when a serverless method is about to reach its timeout.""" diff --git a/tests/test_scrubber.py b/tests/test_scrubber.py index 2c4bd3aa90..5034121b83 100644 --- a/tests/test_scrubber.py +++ b/tests/test_scrubber.py @@ -187,3 +187,20 @@ def test_recursive_event_scrubber(sentry_init, capture_events): (event,) = events assert event["extra"]["deep"]["deeper"][0]["deepest"]["password"] == "'[Filtered]'" + + +def test_recursive_scrubber_does_not_override_original(sentry_init, capture_events): + sentry_init(event_scrubber=EventScrubber(recursive=True)) + events = capture_events() + + data = {"csrf": "secret"} + try: + raise RuntimeError("An error") + except Exception: + capture_exception() + + (event,) = events + frames = event["exception"]["values"][0]["stacktrace"]["frames"] + (frame,) = frames + assert data["csrf"] == "secret" + assert frame["vars"]["data"]["csrf"] == "[Filtered]"