Skip to content

feat(metrics): Stronger recursion protection #2426

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 36 additions & 11 deletions sentry_sdk/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import zlib
from functools import wraps, partial
from threading import Event, Lock, Thread
from contextlib import contextmanager

from sentry_sdk._compat import text_type
from sentry_sdk.hub import Hub
Expand All @@ -26,6 +27,7 @@
from typing import Iterable
from typing import Callable
from typing import Optional
from typing import Generator
from typing import Tuple

from sentry_sdk._types import BucketKey
Expand Down Expand Up @@ -53,21 +55,33 @@
)


@contextmanager
def recursion_protection():
# type: () -> Generator[bool, None, None]
"""Enters recursion protection and returns the old flag."""
try:
in_metrics = _thread_local.in_metrics
except AttributeError:
in_metrics = False
_thread_local.in_metrics = True
try:
yield in_metrics
finally:
_thread_local.in_metrics = in_metrics


def metrics_noop(func):
# type: (Any) -> Any
"""Convenient decorator that uses `recursion_protection` to
make a function a noop.
"""

@wraps(func)
def new_func(*args, **kwargs):
# type: (*Any, **Any) -> Any
try:
in_metrics = _thread_local.in_metrics
except AttributeError:
in_metrics = False
_thread_local.in_metrics = True
try:
with recursion_protection() as in_metrics:
if not in_metrics:
return func(*args, **kwargs)
finally:
_thread_local.in_metrics = in_metrics

return new_func

Expand Down Expand Up @@ -449,7 +463,16 @@ def _emit(
encoded_metrics = _encode_metrics(flushable_buckets)
metric_item = Item(payload=encoded_metrics, type="statsd")
envelope = Envelope(items=[metric_item])
self._capture_func(envelope)

# A malfunctioning transport might create a forever loop of metric
# emission when it emits a metric in capture_envelope. We still
# allow the capture to take place, but interior metric incr calls
# or similar will be disabled. In the background thread this can
# never happen, but in the force flush case which happens in the
# foreground we might make it here unprotected.
with recursion_protection():
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the background thread, this is basically always a no-op as in_metrics is forcefully set to True. In the force flush case we might be flipping it here. Why is flush itself unprotected? Two reasons:

  1. on the sentry side we currently patch it, so making it a noop would break the logic there
  2. it's potentially brittle since _emit is invoked in more than one place, this should be safer.

self._capture_func(envelope)

return envelope

def _serialize_tags(
Expand Down Expand Up @@ -495,8 +518,10 @@ def _get_aggregator_and_update_tags(key, tags):

callback = client.options.get("_experiments", {}).get("before_emit_metric")
if callback is not None:
if not callback(key, updated_tags):
return None, updated_tags
with recursion_protection() as in_metrics:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Motivation here is to intentionally protect the callback. That way we only poke around on the thread local when we need to. This also means any user of the _get_aggregator_and_update_tags gets automatic protection and does not need to manually take care of marking their functions as noop.

if not in_metrics:
if not callback(key, updated_tags):
return None, updated_tags

return client.metrics_aggregator, updated_tags

Expand Down
31 changes: 31 additions & 0 deletions tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,8 @@ def before_emit(key, tags):
return False
tags["extra"] = "foo"
del tags["release"]
# this better be a noop!
metrics.incr("shitty-recursion")
return True

sentry_init(
Expand Down Expand Up @@ -501,3 +503,32 @@ def test_tag_serialization(sentry_init, capture_envelopes):
"release": "fun-release",
"environment": "not-fun-env",
}


def test_flush_recursion_protection(sentry_init, capture_envelopes, monkeypatch):
sentry_init(
release="fun-release",
environment="not-fun-env",
_experiments={"enable_metrics": True},
)
envelopes = capture_envelopes()
test_client = Hub.current.client

real_capture_envelope = test_client.transport.capture_envelope

def bad_capture_envelope(*args, **kwargs):
metrics.incr("bad-metric")
return real_capture_envelope(*args, **kwargs)

monkeypatch.setattr(test_client.transport, "capture_envelope", bad_capture_envelope)

metrics.incr("counter")

# flush twice to see the inner metric
Hub.current.flush()
Hub.current.flush()

(envelope,) = envelopes
m = parse_metrics(envelope.items[0].payload.get_bytes())
assert len(m) == 1
assert m[0][1] == "counter@none"