From c55b6066213d0b46fa894c5cc4aa8ccd66675d5a Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 31 Oct 2022 18:39:21 +0100 Subject: [PATCH 1/6] feat(metrics): add support to not share data among Metrics instance --- aws_lambda_powertools/metrics/base.py | 4 +- aws_lambda_powertools/metrics/metrics.py | 45 +++++++++++++++++----- tests/functional/test_metrics.py | 48 ++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 11 deletions(-) diff --git a/aws_lambda_powertools/metrics/base.py b/aws_lambda_powertools/metrics/base.py index 29a780d0af1..5dfaf85efb8 100644 --- a/aws_lambda_powertools/metrics/base.py +++ b/aws_lambda_powertools/metrics/base.py @@ -81,14 +81,16 @@ def __init__( namespace: Optional[str] = None, metadata_set: Optional[Dict[str, Any]] = None, service: Optional[str] = None, + default_dimensions: Optional[Dict[str, Any]] = None, ): self.metric_set = metric_set if metric_set is not None else {} self.dimension_set = dimension_set if dimension_set is not None else {} self.namespace = resolve_env_var_choice(choice=namespace, env=os.getenv(constants.METRICS_NAMESPACE_ENV)) self.service = resolve_env_var_choice(choice=service, env=os.getenv(constants.SERVICE_NAME_ENV)) + self.metadata_set = metadata_set if metadata_set is not None else {} + self.default_dimensions = default_dimensions if default_dimensions is not None else {} self._metric_units = [unit.value for unit in MetricUnit] self._metric_unit_options = list(MetricUnit.__members__) - self.metadata_set = metadata_set if metadata_set is not None else {} def add_metric(self, name: str, unit: Union[MetricUnit, str], value: float) -> None: """Adds given metric diff --git a/aws_lambda_powertools/metrics/metrics.py b/aws_lambda_powertools/metrics/metrics.py index cbf1d2eb2e2..801958394b6 100644 --- a/aws_lambda_powertools/metrics/metrics.py +++ b/aws_lambda_powertools/metrics/metrics.py @@ -55,6 +55,9 @@ def lambda_handler(): service name to be used as metric dimension, by default "service_undefined" namespace : str, optional Namespace for metrics + singleton : boolean, by default True + Whether to reuse metrics data across Metrics instances (isolation) + Raises ------ @@ -66,27 +69,49 @@ def lambda_handler(): When metric object fails EMF schema validation """ + # NOTE: We use class attrs to share metrics data across instances + # this allows customers to initialize Metrics() throughout their code base (and middlewares) + # and not get caught by accident with metrics data loss + # e.g., m1 and m2 add metric ProductCreated, however m1 has 'version' dimension but m2 doesn't + # Result: ProductCreated is created twice as we now have 2 different EMF blobs _metrics: Dict[str, Any] = {} _dimensions: Dict[str, str] = {} _metadata: Dict[str, Any] = {} _default_dimensions: Dict[str, Any] = {} - def __init__(self, service: Optional[str] = None, namespace: Optional[str] = None): + def __init__(self, service: Optional[str] = None, namespace: Optional[str] = None, singleton: bool = True): self.metric_set = self._metrics self.service = service self.namespace: Optional[str] = namespace self.metadata_set = self._metadata self.default_dimensions = self._default_dimensions self.dimension_set = self._dimensions - self.dimension_set.update(**self._default_dimensions) - - super().__init__( - metric_set=self.metric_set, - dimension_set=self.dimension_set, - namespace=self.namespace, - metadata_set=self.metadata_set, - service=self.service, - ) + + # We couldn't find a better name for a new class; 'singleton' param fits the purpose + # Customers can now disable data sharing with singleton=False + # It unlocks distinct namespace metrics, multi-EMF blobs and multi-tenant use cases + # See https://github.com/awslabs/aws-lambda-powertools-python/issues/1668 + if singleton: + self.dimension_set.update(**self._default_dimensions) + return super().__init__( + metric_set=self.metric_set, + dimension_set=self.dimension_set, + namespace=self.namespace, + metadata_set=self.metadata_set, + service=self.service, + default_dimensions=self.default_dimensions, + ) + + # NOTE: With class initialized, we can safely clean this instance attrs + # this ensures instantiating a non-singleton instance after a singleton instance + # won't affect metric data sets, as we have different memory pointers + # e.g., `self._metrics != Metrics._metrics` (instance attrs vs class attrs) + self._metrics = {} + self._dimensions = {} + self._metadata = {} + self._default_dimensions = {} + + return super().__init__() def set_default_dimensions(self, **dimensions) -> None: """Persist dimensions across Lambda invocations diff --git a/tests/functional/test_metrics.py b/tests/functional/test_metrics.py index 96dd3b41b25..e24952a64d6 100644 --- a/tests/functional/test_metrics.py +++ b/tests/functional/test_metrics.py @@ -925,3 +925,51 @@ def test_metrics_reuse_metadata_set(metric, dimension, namespace): # THEN both class instances should have the same metadata set assert my_metrics_2.metadata_set == my_metrics.metadata_set + + +def test_metrics_singleton_disabled_isolates_data_set(metric, dimension, namespace, metadata): + # GIVEN two Metrics instance are initialized, but one has singleton disabled + my_metrics = Metrics(namespace=namespace) + isolated_metrics = Metrics(namespace=namespace, singleton=False) + + # WHEN metrics, dimensions and metadata are added to the first instance + my_metrics.add_dimension(**dimension) + my_metrics.add_metric(**metric) + my_metrics.add_metadata(**metadata) + + # THEN the non-singleton instance should not have them + assert my_metrics.metric_set != isolated_metrics.metric_set + assert my_metrics.metadata_set != isolated_metrics.metadata_set + assert my_metrics.dimension_set != isolated_metrics.dimension_set + + +def test_metrics_singleton_disabled_do_not_share_default_dimensions(dimension, namespace): + # GIVEN Metrics is initialized with a default dimension + my_metrics = Metrics(namespace=namespace) + my_metrics.set_default_dimensions(**dimension) + + # WHEN a non-singleton Metrics instance is initialized thereafter + isolated_metrics = Metrics(namespace=namespace, singleton=False) + + # THEN the non-singleton instance should not have them + assert my_metrics.default_dimensions != isolated_metrics.default_dimensions + + +def test_metrics_singleton_disabled_do_not_clear_existing_data_set(metric, dimension, namespace, metadata): + # GIVEN Metrics is initialized with some data + my_metrics = Metrics(namespace=namespace) + my_metrics.add_dimension(**dimension) + my_metrics.add_metric(**metric) + my_metrics.add_metadata(**metadata) + + # WHEN a non-singleton Metrics instance is initialized thereafter + _ = Metrics(namespace=namespace, singleton=False) + my_metrics_2 = Metrics(namespace=namespace) + + # THEN the existing metrics instance should still have their data + expected = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace) + my_metrics_output = my_metrics.serialize_metric_set() + my_metrics_2_output = my_metrics_2.serialize_metric_set() + + remove_timestamp(metrics=[my_metrics_output, my_metrics_2_output, expected]) + assert my_metrics_output == my_metrics_2_output From e3cc9f62a782d4e11b34e0398d211ebd1a817b99 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 1 Nov 2022 09:00:30 +0100 Subject: [PATCH 2/6] chore(metrics): add test for nested metric blobs --- tests/functional/test_metrics.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/functional/test_metrics.py b/tests/functional/test_metrics.py index e24952a64d6..eff951f30ba 100644 --- a/tests/functional/test_metrics.py +++ b/tests/functional/test_metrics.py @@ -973,3 +973,29 @@ def test_metrics_singleton_disabled_do_not_clear_existing_data_set(metric, dimen remove_timestamp(metrics=[my_metrics_output, my_metrics_2_output, expected]) assert my_metrics_output == my_metrics_2_output + + +def test_nested_log_metrics(metric, dimension, namespace, metadata, capsys): + # GIVEN two distinct Metrics are initialized + my_metrics = Metrics(namespace=namespace) + isolated_metrics = Metrics(namespace=namespace, singleton=False) + + my_metrics.add_metric(**metric) + my_metrics.add_dimension(**dimension) + my_metrics.add_metadata(**metadata) + + isolated_metrics.add_metric(**metric) + isolated_metrics.add_dimension(**dimension) + isolated_metrics.add_metadata(**metadata) + + # WHEN we nest log_metrics to serialize + # and flush all metrics at the end of a function execution + @isolated_metrics.log_metrics + @my_metrics.log_metrics + def lambda_handler(evt, ctx): + pass + + lambda_handler({}, {}) + + output = capture_metrics_output_multiple_emf_objects(capsys) + assert len(output) == 2 From ce7f87b74a134a74ff11e207e2930fd5a9bac2ba Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 1 Nov 2022 10:15:21 +0100 Subject: [PATCH 3/6] refactor(metrics): move common logic to MetricManagaer base class --- aws_lambda_powertools/metrics/base.py | 222 +++++++++++++++++++++- aws_lambda_powertools/metrics/metric.py | 123 +----------- aws_lambda_powertools/metrics/metrics.py | 156 ++------------- aws_lambda_powertools/shared/functions.py | 17 +- tests/functional/test_metrics.py | 2 +- 5 files changed, 256 insertions(+), 264 deletions(-) diff --git a/aws_lambda_powertools/metrics/base.py b/aws_lambda_powertools/metrics/base.py index 5dfaf85efb8..2d6dca8dc1f 100644 --- a/aws_lambda_powertools/metrics/base.py +++ b/aws_lambda_powertools/metrics/base.py @@ -1,11 +1,14 @@ import datetime +import functools import json import logging import numbers import os +import warnings from collections import defaultdict +from contextlib import contextmanager from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Any, Callable, Dict, Generator, List, Optional, Union from ..shared import constants from ..shared.functions import resolve_env_var_choice @@ -16,6 +19,8 @@ MAX_METRICS = 100 MAX_DIMENSIONS = 29 +is_cold_start = True + class MetricUnit(Enum): Seconds = "Seconds" @@ -122,7 +127,7 @@ def add_metric(self, name: str, unit: Union[MetricUnit, str], value: float) -> N if not isinstance(value, numbers.Number): raise MetricValueError(f"{value} is not a valid number") - unit = self.__extract_metric_unit_value(unit=unit) + unit = self._extract_metric_unit_value(unit=unit) metric: Dict = self.metric_set.get(name, defaultdict(list)) metric["Unit"] = unit metric["Value"].append(float(value)) @@ -181,7 +186,7 @@ def serialize_metric_set( if self.service and not self.dimension_set.get("service"): # self.service won't be a float - self.add_dimension(name="service", value=self.service) # type: ignore[arg-type] + self.add_dimension(name="service", value=self.service) if len(metrics) == 0: raise SchemaValidationError("Must contain at least one metric.") @@ -276,7 +281,86 @@ def add_metadata(self, key: str, value: Any) -> None: else: self.metadata_set[str(key)] = value - def __extract_metric_unit_value(self, unit: Union[str, MetricUnit]) -> str: + def clear_metrics(self) -> None: + logger.debug("Clearing out existing metric set from memory") + self.metric_set.clear() + self.dimension_set.clear() + self.metadata_set.clear() + + def log_metrics( + self, + lambda_handler: Union[Callable[[Dict, Any], Any], Optional[Callable[[Dict, Any, Optional[Dict]], Any]]] = None, + capture_cold_start_metric: bool = False, + raise_on_empty_metrics: bool = False, + default_dimensions: Optional[Dict[str, str]] = None, + ): + """Decorator to serialize and publish metrics at the end of a function execution. + + Be aware that the log_metrics **does call* the decorated function (e.g. lambda_handler). + + Example + ------- + **Lambda function using tracer and metrics decorators** + + from aws_lambda_powertools import Metrics, Tracer + + metrics = Metrics(service="payment") + tracer = Tracer(service="payment") + + @tracer.capture_lambda_handler + @metrics.log_metrics + def handler(event, context): + ... + + Parameters + ---------- + lambda_handler : Callable[[Any, Any], Any], optional + lambda function handler, by default None + capture_cold_start_metric : bool, optional + captures cold start metric, by default False + raise_on_empty_metrics : bool, optional + raise exception if no metrics are emitted, by default False + default_dimensions: Dict[str, str], optional + metric dimensions as key=value that will always be present + + Raises + ------ + e + Propagate error received + """ + + # If handler is None we've been called with parameters + # Return a partial function with args filled + if lambda_handler is None: + logger.debug("Decorator called with parameters") + return functools.partial( + self.log_metrics, + capture_cold_start_metric=capture_cold_start_metric, + raise_on_empty_metrics=raise_on_empty_metrics, + default_dimensions=default_dimensions, + ) + + @functools.wraps(lambda_handler) + def decorate(event, context): + try: + if default_dimensions: + self.set_default_dimensions(**default_dimensions) + response = lambda_handler(event, context) + if capture_cold_start_metric: + self._add_cold_start_metric(context=context) + finally: + if not raise_on_empty_metrics and not self.metric_set: + warnings.warn("No metrics to publish, skipping") + else: + metrics = self.serialize_metric_set() + self.clear_metrics() + print(json.dumps(metrics, separators=(",", ":"))) + + return response + + return decorate + + def _extract_metric_unit_value(self, unit: Union[str, MetricUnit]) -> str: """Return metric value from metric unit whether that's str or MetricUnit enum Parameters @@ -308,3 +392,133 @@ def __extract_metric_unit_value(self, unit: Union[str, MetricUnit]) -> str: unit = unit.value return unit + + def _add_cold_start_metric(self, context: Any) -> None: + """Add cold start metric and function_name dimension + + Parameters + ---------- + context : Any + Lambda context + """ + global is_cold_start + if is_cold_start: + logger.debug("Adding cold start metric and function_name dimension") + with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1, namespace=self.namespace) as metric: + metric.add_dimension(name="function_name", value=context.function_name) + if self.service: + metric.add_dimension(name="service", value=str(self.service)) + is_cold_start = False + + +class SingleMetric(MetricManager): + """SingleMetric creates an EMF object with a single metric. + + EMF specification doesn't allow metrics with different dimensions. + SingleMetric overrides MetricManager's add_metric method to do just that. + + Use `single_metric` when you need to create metrics with different dimensions, + otherwise `aws_lambda_powertools.metrics.metrics.Metrics` is + a more cost effective option + + Environment variables + --------------------- + POWERTOOLS_METRICS_NAMESPACE : str + metric namespace + + Example + ------- + **Creates cold start metric with function_version as dimension** + + import json + from aws_lambda_powertools.metrics import single_metric, MetricUnit + metric = single_metric(namespace="ServerlessAirline") + + metric.add_metric(name="ColdStart", unit=MetricUnit.Count, value=1) + metric.add_dimension(name="function_version", value=47) + + print(json.dumps(metric.serialize_metric_set(), indent=4)) + + Parameters + ---------- + MetricManager : MetricManager + Inherits from `aws_lambda_powertools.metrics.base.MetricManager` + """ + + def add_metric(self, name: str, unit: Union[MetricUnit, str], value: float) -> None: + """Method to prevent more than one metric being created + + Parameters + ---------- + name : str + Metric name (e.g. BookingConfirmation) + unit : MetricUnit + Metric unit (e.g. "Seconds", MetricUnit.Seconds) + value : float + Metric value + """ + if len(self.metric_set) > 0: + logger.debug(f"Metric {name} already set, skipping...") + return + return super().add_metric(name, unit, value) + + +@contextmanager +def single_metric( + name: str, unit: MetricUnit, value: float, namespace: Optional[str] = None +) -> Generator[SingleMetric, None, None]: + """Context manager to simplify creation of a single metric + + Example + ------- + **Creates cold start metric with function_version as dimension** + + from aws_lambda_powertools import single_metric + from aws_lambda_powertools.metrics import MetricUnit + + with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1, namespace="ServerlessAirline") as metric: + metric.add_dimension(name="function_version", value="47") + + **Same as above but set namespace using environment variable** + + $ export POWERTOOLS_METRICS_NAMESPACE="ServerlessAirline" + + from aws_lambda_powertools import single_metric + from aws_lambda_powertools.metrics import MetricUnit + + with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1) as metric: + metric.add_dimension(name="function_version", value="47") + + Parameters + ---------- + name : str + Metric name + unit : MetricUnit + `aws_lambda_powertools.helper.models.MetricUnit` + value : float + Metric value + namespace: str + Namespace for metrics + + Yields + ------- + SingleMetric + SingleMetric class instance + + Raises + ------ + MetricUnitError + When metric metric isn't supported by CloudWatch + MetricValueError + When metric value isn't a number + SchemaValidationError + When metric object fails EMF schema validation + """ + metric_set: Optional[Dict] = None + try: + metric: SingleMetric = SingleMetric(namespace=namespace) + metric.add_metric(name=name, unit=unit, value=value) + yield metric + metric_set = metric.serialize_metric_set() + finally: + print(json.dumps(metric_set, separators=(",", ":"))) diff --git a/aws_lambda_powertools/metrics/metric.py b/aws_lambda_powertools/metrics/metric.py index 94b427738a1..5465889f1f0 100644 --- a/aws_lambda_powertools/metrics/metric.py +++ b/aws_lambda_powertools/metrics/metric.py @@ -1,121 +1,4 @@ -import json -import logging -from contextlib import contextmanager -from typing import Dict, Generator, Optional, Union +# NOTE: prevents circular inheritance import +from .base import SingleMetric, single_metric -from .base import MetricManager, MetricUnit - -logger = logging.getLogger(__name__) - - -class SingleMetric(MetricManager): - """SingleMetric creates an EMF object with a single metric. - - EMF specification doesn't allow metrics with different dimensions. - SingleMetric overrides MetricManager's add_metric method to do just that. - - Use `single_metric` when you need to create metrics with different dimensions, - otherwise `aws_lambda_powertools.metrics.metrics.Metrics` is - a more cost effective option - - Environment variables - --------------------- - POWERTOOLS_METRICS_NAMESPACE : str - metric namespace - - Example - ------- - **Creates cold start metric with function_version as dimension** - - import json - from aws_lambda_powertools.metrics import single_metric, MetricUnit - metric = single_metric(namespace="ServerlessAirline") - - metric.add_metric(name="ColdStart", unit=MetricUnit.Count, value=1) - metric.add_dimension(name="function_version", value=47) - - print(json.dumps(metric.serialize_metric_set(), indent=4)) - - Parameters - ---------- - MetricManager : MetricManager - Inherits from `aws_lambda_powertools.metrics.base.MetricManager` - """ - - def add_metric(self, name: str, unit: Union[MetricUnit, str], value: float) -> None: - """Method to prevent more than one metric being created - - Parameters - ---------- - name : str - Metric name (e.g. BookingConfirmation) - unit : MetricUnit - Metric unit (e.g. "Seconds", MetricUnit.Seconds) - value : float - Metric value - """ - if len(self.metric_set) > 0: - logger.debug(f"Metric {name} already set, skipping...") - return - return super().add_metric(name, unit, value) - - -@contextmanager -def single_metric( - name: str, unit: MetricUnit, value: float, namespace: Optional[str] = None -) -> Generator[SingleMetric, None, None]: - """Context manager to simplify creation of a single metric - - Example - ------- - **Creates cold start metric with function_version as dimension** - - from aws_lambda_powertools import single_metric - from aws_lambda_powertools.metrics import MetricUnit - - with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1, namespace="ServerlessAirline") as metric: - metric.add_dimension(name="function_version", value="47") - - **Same as above but set namespace using environment variable** - - $ export POWERTOOLS_METRICS_NAMESPACE="ServerlessAirline" - - from aws_lambda_powertools import single_metric - from aws_lambda_powertools.metrics import MetricUnit - - with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1) as metric: - metric.add_dimension(name="function_version", value="47") - - Parameters - ---------- - name : str - Metric name - unit : MetricUnit - `aws_lambda_powertools.helper.models.MetricUnit` - value : float - Metric value - namespace: str - Namespace for metrics - - Yields - ------- - SingleMetric - SingleMetric class instance - - Raises - ------ - MetricUnitError - When metric metric isn't supported by CloudWatch - MetricValueError - When metric value isn't a number - SchemaValidationError - When metric object fails EMF schema validation - """ - metric_set: Optional[Dict] = None - try: - metric: SingleMetric = SingleMetric(namespace=namespace) - metric.add_metric(name=name, unit=unit, value=value) - yield metric - metric_set = metric.serialize_metric_set() - finally: - print(json.dumps(metric_set, separators=(",", ":"))) +__all__ = ["SingleMetric", "single_metric"] diff --git a/aws_lambda_powertools/metrics/metrics.py b/aws_lambda_powertools/metrics/metrics.py index 801958394b6..0899456cf23 100644 --- a/aws_lambda_powertools/metrics/metrics.py +++ b/aws_lambda_powertools/metrics/metrics.py @@ -1,15 +1,6 @@ -import functools -import json -import logging -import warnings -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Dict -from .base import MetricManager, MetricUnit -from .metric import single_metric - -logger = logging.getLogger(__name__) - -is_cold_start = True +from .base import MetricManager class Metrics(MetricManager): @@ -55,9 +46,6 @@ def lambda_handler(): service name to be used as metric dimension, by default "service_undefined" namespace : str, optional Namespace for metrics - singleton : boolean, by default True - Whether to reuse metrics data across Metrics instances (isolation) - Raises ------ @@ -71,7 +59,7 @@ def lambda_handler(): # NOTE: We use class attrs to share metrics data across instances # this allows customers to initialize Metrics() throughout their code base (and middlewares) - # and not get caught by accident with metrics data loss + # and not get caught by accident with metrics data loss, or data deduplication # e.g., m1 and m2 add metric ProductCreated, however m1 has 'version' dimension but m2 doesn't # Result: ProductCreated is created twice as we now have 2 different EMF blobs _metrics: Dict[str, Any] = {} @@ -79,39 +67,23 @@ def lambda_handler(): _metadata: Dict[str, Any] = {} _default_dimensions: Dict[str, Any] = {} - def __init__(self, service: Optional[str] = None, namespace: Optional[str] = None, singleton: bool = True): + def __init__(self, service: str = "", namespace: str = ""): self.metric_set = self._metrics self.service = service - self.namespace: Optional[str] = namespace + self.namespace = namespace self.metadata_set = self._metadata self.default_dimensions = self._default_dimensions self.dimension_set = self._dimensions - # We couldn't find a better name for a new class; 'singleton' param fits the purpose - # Customers can now disable data sharing with singleton=False - # It unlocks distinct namespace metrics, multi-EMF blobs and multi-tenant use cases - # See https://github.com/awslabs/aws-lambda-powertools-python/issues/1668 - if singleton: - self.dimension_set.update(**self._default_dimensions) - return super().__init__( - metric_set=self.metric_set, - dimension_set=self.dimension_set, - namespace=self.namespace, - metadata_set=self.metadata_set, - service=self.service, - default_dimensions=self.default_dimensions, - ) - - # NOTE: With class initialized, we can safely clean this instance attrs - # this ensures instantiating a non-singleton instance after a singleton instance - # won't affect metric data sets, as we have different memory pointers - # e.g., `self._metrics != Metrics._metrics` (instance attrs vs class attrs) - self._metrics = {} - self._dimensions = {} - self._metadata = {} - self._default_dimensions = {} - - return super().__init__() + self.dimension_set.update(**self._default_dimensions) + return super().__init__( + metric_set=self.metric_set, + dimension_set=self.dimension_set, + namespace=self.namespace, + metadata_set=self.metadata_set, + service=self.service, + default_dimensions=self.default_dimensions, + ) def set_default_dimensions(self, **dimensions) -> None: """Persist dimensions across Lambda invocations @@ -132,7 +104,7 @@ def set_default_dimensions(self, **dimensions) -> None: @metrics.log_metrics() def lambda_handler(): - return True + return True """ for name, value in dimensions.items(): self.add_dimension(name, value) @@ -143,98 +115,6 @@ def clear_default_dimensions(self) -> None: self.default_dimensions.clear() def clear_metrics(self) -> None: - logger.debug("Clearing out existing metric set from memory") - self.metric_set.clear() - self.dimension_set.clear() - self.metadata_set.clear() - self.set_default_dimensions(**self.default_dimensions) # re-add default dimensions - - def log_metrics( - self, - lambda_handler: Union[Callable[[Dict, Any], Any], Optional[Callable[[Dict, Any, Optional[Dict]], Any]]] = None, - capture_cold_start_metric: bool = False, - raise_on_empty_metrics: bool = False, - default_dimensions: Optional[Dict[str, str]] = None, - ): - """Decorator to serialize and publish metrics at the end of a function execution. - - Be aware that the log_metrics **does call* the decorated function (e.g. lambda_handler). - - Example - ------- - **Lambda function using tracer and metrics decorators** - - from aws_lambda_powertools import Metrics, Tracer - - metrics = Metrics(service="payment") - tracer = Tracer(service="payment") - - @tracer.capture_lambda_handler - @metrics.log_metrics - def handler(event, context): - ... - - Parameters - ---------- - lambda_handler : Callable[[Any, Any], Any], optional - lambda function handler, by default None - capture_cold_start_metric : bool, optional - captures cold start metric, by default False - raise_on_empty_metrics : bool, optional - raise exception if no metrics are emitted, by default False - default_dimensions: Dict[str, str], optional - metric dimensions as key=value that will always be present - - Raises - ------ - e - Propagate error received - """ - - # If handler is None we've been called with parameters - # Return a partial function with args filled - if lambda_handler is None: - logger.debug("Decorator called with parameters") - return functools.partial( - self.log_metrics, - capture_cold_start_metric=capture_cold_start_metric, - raise_on_empty_metrics=raise_on_empty_metrics, - default_dimensions=default_dimensions, - ) - - @functools.wraps(lambda_handler) - def decorate(event, context): - try: - if default_dimensions: - self.set_default_dimensions(**default_dimensions) - response = lambda_handler(event, context) - if capture_cold_start_metric: - self.__add_cold_start_metric(context=context) - finally: - if not raise_on_empty_metrics and not self.metric_set: - warnings.warn("No metrics to publish, skipping") - else: - metrics = self.serialize_metric_set() - self.clear_metrics() - print(json.dumps(metrics, separators=(",", ":"))) - - return response - - return decorate - - def __add_cold_start_metric(self, context: Any) -> None: - """Add cold start metric and function_name dimension - - Parameters - ---------- - context : Any - Lambda context - """ - global is_cold_start - if is_cold_start: - logger.debug("Adding cold start metric and function_name dimension") - with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1, namespace=self.namespace) as metric: - metric.add_dimension(name="function_name", value=context.function_name) - if self.service: - metric.add_dimension(name="service", value=str(self.service)) - is_cold_start = False + super().clear_metrics() + # re-add default dimensions + self.set_default_dimensions(**self.default_dimensions) diff --git a/aws_lambda_powertools/shared/functions.py b/aws_lambda_powertools/shared/functions.py index 30070382d31..fb4eedb7f36 100644 --- a/aws_lambda_powertools/shared/functions.py +++ b/aws_lambda_powertools/shared/functions.py @@ -3,7 +3,7 @@ import os import warnings from binascii import Error as BinAsciiError -from typing import Optional, Union +from typing import Optional, Union, overload from aws_lambda_powertools.shared import constants @@ -47,6 +47,21 @@ def resolve_truthy_env_var_choice(env: str, choice: Optional[bool] = None) -> bo return choice if choice is not None else strtobool(env) +@overload +def resolve_env_var_choice(env: Optional[str], choice: float) -> float: + ... + + +@overload +def resolve_env_var_choice(env: Optional[str], choice: str) -> str: + ... + + +@overload +def resolve_env_var_choice(env: Optional[str], choice: Optional[str]) -> str: + ... + + def resolve_env_var_choice( env: Optional[str] = None, choice: Optional[Union[str, float]] = None ) -> Optional[Union[str, float]]: diff --git a/tests/functional/test_metrics.py b/tests/functional/test_metrics.py index eff951f30ba..218dca341ef 100644 --- a/tests/functional/test_metrics.py +++ b/tests/functional/test_metrics.py @@ -12,7 +12,7 @@ MetricValueError, SchemaValidationError, ) -from aws_lambda_powertools.metrics import metrics as metrics_global +from aws_lambda_powertools.metrics import base as metrics_global from aws_lambda_powertools.metrics.base import MAX_DIMENSIONS, MetricManager From 2c5a74bf512acd6a3585b01ad10cf5e0f5f04fbb Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 1 Nov 2022 10:27:43 +0100 Subject: [PATCH 4/6] chore(tests): ensure env_var continues to work for refactored Metrics --- aws_lambda_powertools/metrics/metrics.py | 10 ++++------ tests/functional/test_metrics.py | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/aws_lambda_powertools/metrics/metrics.py b/aws_lambda_powertools/metrics/metrics.py index 0899456cf23..47e3255381b 100644 --- a/aws_lambda_powertools/metrics/metrics.py +++ b/aws_lambda_powertools/metrics/metrics.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Any, Dict, Optional from .base import MetricManager @@ -67,21 +67,19 @@ def lambda_handler(): _metadata: Dict[str, Any] = {} _default_dimensions: Dict[str, Any] = {} - def __init__(self, service: str = "", namespace: str = ""): + def __init__(self, service: Optional[str] = None, namespace: Optional[str] = None): self.metric_set = self._metrics - self.service = service - self.namespace = namespace self.metadata_set = self._metadata self.default_dimensions = self._default_dimensions self.dimension_set = self._dimensions self.dimension_set.update(**self._default_dimensions) return super().__init__( + namespace=namespace, + service=service, metric_set=self.metric_set, dimension_set=self.dimension_set, - namespace=self.namespace, metadata_set=self.metadata_set, - service=self.service, default_dimensions=self.default_dimensions, ) diff --git a/tests/functional/test_metrics.py b/tests/functional/test_metrics.py index 218dca341ef..bc6b6f116fa 100644 --- a/tests/functional/test_metrics.py +++ b/tests/functional/test_metrics.py @@ -209,6 +209,29 @@ def test_service_env_var(monkeypatch, capsys, metric, namespace): assert expected == output +def test_service_env_var_with_metrics_instance(monkeypatch, capsys, metric, namespace, service): + # GIVEN we use POWERTOOLS_SERVICE_NAME + monkeypatch.setenv("POWERTOOLS_SERVICE_NAME", service) + + # WHEN initializing Metrics without an explicit service name + metrics = Metrics(namespace=namespace) + metrics.add_metric(**metric) + + @metrics.log_metrics + def lambda_handler(_, __): + pass + + lambda_handler({}, {}) + + output = capture_metrics_output(capsys) + expected_dimension = {"name": "service", "value": service} + expected = serialize_single_metric(metric=metric, dimension=expected_dimension, namespace=namespace) + + # THEN a metric should be logged using the implicitly created "service" dimension + remove_timestamp(metrics=[output, expected]) + assert expected == output + + def test_metrics_spillover(monkeypatch, capsys, metric, dimension, namespace, a_hundred_metrics): # GIVEN Metrics is initialized and we have over a hundred metrics to add my_metrics = Metrics(namespace=namespace) From 83be0fcc035aa676cca4249d1140138d2b324b26 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 1 Nov 2022 10:58:49 +0100 Subject: [PATCH 5/6] feat(metrics): add EphemeralMetrics as alternative to singleton Metrics class --- aws_lambda_powertools/metrics/__init__.py | 3 +- aws_lambda_powertools/metrics/base.py | 8 +++- aws_lambda_powertools/metrics/metrics.py | 14 +++++- tests/functional/test_metrics.py | 54 +++++++++-------------- 4 files changed, 42 insertions(+), 37 deletions(-) diff --git a/aws_lambda_powertools/metrics/__init__.py b/aws_lambda_powertools/metrics/__init__.py index 7379dad8b88..3315899da0b 100644 --- a/aws_lambda_powertools/metrics/__init__.py +++ b/aws_lambda_powertools/metrics/__init__.py @@ -3,10 +3,11 @@ from .base import MetricUnit from .exceptions import MetricUnitError, MetricValueError, SchemaValidationError from .metric import single_metric -from .metrics import Metrics +from .metrics import EphemeralMetrics, Metrics __all__ = [ "Metrics", + "EphemeralMetrics", "single_metric", "MetricUnit", "MetricUnitError", diff --git a/aws_lambda_powertools/metrics/base.py b/aws_lambda_powertools/metrics/base.py index 2d6dca8dc1f..b032d181811 100644 --- a/aws_lambda_powertools/metrics/base.py +++ b/aws_lambda_powertools/metrics/base.py @@ -86,14 +86,12 @@ def __init__( namespace: Optional[str] = None, metadata_set: Optional[Dict[str, Any]] = None, service: Optional[str] = None, - default_dimensions: Optional[Dict[str, Any]] = None, ): self.metric_set = metric_set if metric_set is not None else {} self.dimension_set = dimension_set if dimension_set is not None else {} self.namespace = resolve_env_var_choice(choice=namespace, env=os.getenv(constants.METRICS_NAMESPACE_ENV)) self.service = resolve_env_var_choice(choice=service, env=os.getenv(constants.SERVICE_NAME_ENV)) self.metadata_set = metadata_set if metadata_set is not None else {} - self.default_dimensions = default_dimensions if default_dimensions is not None else {} self._metric_units = [unit.value for unit in MetricUnit] self._metric_unit_options = list(MetricUnit.__members__) @@ -522,3 +520,9 @@ def single_metric( metric_set = metric.serialize_metric_set() finally: print(json.dumps(metric_set, separators=(",", ":"))) + + +def reset_cold_start_flag(): + global is_cold_start + if not is_cold_start: + is_cold_start = True diff --git a/aws_lambda_powertools/metrics/metrics.py b/aws_lambda_powertools/metrics/metrics.py index 47e3255381b..43a45ff885d 100644 --- a/aws_lambda_powertools/metrics/metrics.py +++ b/aws_lambda_powertools/metrics/metrics.py @@ -80,7 +80,6 @@ def __init__(self, service: Optional[str] = None, namespace: Optional[str] = Non metric_set=self.metric_set, dimension_set=self.dimension_set, metadata_set=self.metadata_set, - default_dimensions=self.default_dimensions, ) def set_default_dimensions(self, **dimensions) -> None: @@ -116,3 +115,16 @@ def clear_metrics(self) -> None: super().clear_metrics() # re-add default dimensions self.set_default_dimensions(**self.default_dimensions) + + +class EphemeralMetrics(MetricManager): + """Non-singleton version of Metrics to not persist metrics across instances + + NOTE: This is useful when you want to: + + - Create metrics for distinct namespaces + - Create the same metrics with different dimensions more than once + """ + + def __init__(self, service: Optional[str] = None, namespace: Optional[str] = None): + super().__init__(namespace=namespace, service=service) diff --git a/tests/functional/test_metrics.py b/tests/functional/test_metrics.py index bc6b6f116fa..c45c138ad59 100644 --- a/tests/functional/test_metrics.py +++ b/tests/functional/test_metrics.py @@ -7,13 +7,17 @@ from aws_lambda_powertools import Metrics, single_metric from aws_lambda_powertools.metrics import ( + EphemeralMetrics, MetricUnit, MetricUnitError, MetricValueError, SchemaValidationError, ) -from aws_lambda_powertools.metrics import base as metrics_global -from aws_lambda_powertools.metrics.base import MAX_DIMENSIONS, MetricManager +from aws_lambda_powertools.metrics.base import ( + MAX_DIMENSIONS, + MetricManager, + reset_cold_start_flag, +) @pytest.fixture(scope="function", autouse=True) @@ -21,7 +25,7 @@ def reset_metric_set(): metrics = Metrics() metrics.clear_metrics() metrics.clear_default_dimensions() - metrics_global.is_cold_start = True # ensure each test has cold start + reset_cold_start_flag() # ensure each test has cold start yield @@ -950,10 +954,10 @@ def test_metrics_reuse_metadata_set(metric, dimension, namespace): assert my_metrics_2.metadata_set == my_metrics.metadata_set -def test_metrics_singleton_disabled_isolates_data_set(metric, dimension, namespace, metadata): - # GIVEN two Metrics instance are initialized, but one has singleton disabled - my_metrics = Metrics(namespace=namespace) - isolated_metrics = Metrics(namespace=namespace, singleton=False) +def test_ephemeral_metrics_isolates_data_set(metric, dimension, namespace, metadata): + # GIVEN two EphemeralMetrics instances are initialized + my_metrics = EphemeralMetrics(namespace=namespace) + isolated_metrics = EphemeralMetrics(namespace=namespace) # WHEN metrics, dimensions and metadata are added to the first instance my_metrics.add_dimension(**dimension) @@ -966,42 +970,26 @@ def test_metrics_singleton_disabled_isolates_data_set(metric, dimension, namespa assert my_metrics.dimension_set != isolated_metrics.dimension_set -def test_metrics_singleton_disabled_do_not_share_default_dimensions(dimension, namespace): - # GIVEN Metrics is initialized with a default dimension +def test_ephemeral_metrics_combined_with_metrics(metric, dimension, namespace, metadata): + # GIVEN Metrics and EphemeralMetrics instances are initialized my_metrics = Metrics(namespace=namespace) - my_metrics.set_default_dimensions(**dimension) - - # WHEN a non-singleton Metrics instance is initialized thereafter - isolated_metrics = Metrics(namespace=namespace, singleton=False) - - # THEN the non-singleton instance should not have them - assert my_metrics.default_dimensions != isolated_metrics.default_dimensions - + isolated_metrics = EphemeralMetrics(namespace=namespace) -def test_metrics_singleton_disabled_do_not_clear_existing_data_set(metric, dimension, namespace, metadata): - # GIVEN Metrics is initialized with some data - my_metrics = Metrics(namespace=namespace) + # WHEN metrics, dimensions and metadata are added to the first instance my_metrics.add_dimension(**dimension) my_metrics.add_metric(**metric) my_metrics.add_metadata(**metadata) - # WHEN a non-singleton Metrics instance is initialized thereafter - _ = Metrics(namespace=namespace, singleton=False) - my_metrics_2 = Metrics(namespace=namespace) - - # THEN the existing metrics instance should still have their data - expected = serialize_single_metric(metric=metric, dimension=dimension, namespace=namespace) - my_metrics_output = my_metrics.serialize_metric_set() - my_metrics_2_output = my_metrics_2.serialize_metric_set() - - remove_timestamp(metrics=[my_metrics_output, my_metrics_2_output, expected]) - assert my_metrics_output == my_metrics_2_output + # THEN EphemeralMetrics instance should not have them + assert my_metrics.metric_set != isolated_metrics.metric_set + assert my_metrics.metadata_set != isolated_metrics.metadata_set + assert my_metrics.dimension_set != isolated_metrics.dimension_set -def test_nested_log_metrics(metric, dimension, namespace, metadata, capsys): +def test_ephemeral_metrics_nested_log_metrics(metric, dimension, namespace, metadata, capsys): # GIVEN two distinct Metrics are initialized my_metrics = Metrics(namespace=namespace) - isolated_metrics = Metrics(namespace=namespace, singleton=False) + isolated_metrics = EphemeralMetrics(namespace=namespace) my_metrics.add_metric(**metric) my_metrics.add_dimension(**dimension) From 777166f7747f1c8d7e5943f35edece0b38cff403 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 1 Nov 2022 12:38:56 +0100 Subject: [PATCH 6/6] docs(metrics): metrics isolation section and design explanation --- docs/core/metrics.md | 55 +++++++++++++++++++++++ examples/metrics/src/ephemeral_metrics.py | 9 ++++ 2 files changed, 64 insertions(+) create mode 100644 examples/metrics/src/ephemeral_metrics.py diff --git a/docs/core/metrics.md b/docs/core/metrics.md index 45e3ce1a4c0..e02b247f117 100644 --- a/docs/core/metrics.md +++ b/docs/core/metrics.md @@ -227,6 +227,61 @@ If you prefer not to use `log_metrics` because you might want to encapsulate add --8<-- "examples/metrics/src/single_metric.py" ``` +### Metrics isolation + +You can use `EphemeralMetrics` class when looking to isolate multiple instances of metrics with distinct namespaces and/or dimensions. + +!!! note "This is a typical use case is for multi-tenant, or emitting same metrics for distinct applications." + +```python hl_lines="1 4" title="EphemeralMetrics usage" +--8<-- "examples/metrics/src/ephemeral_metrics.py" +``` + +**Differences between `EphemeralMetrics` and `Metrics`** + +`EphemeralMetrics` has only two differences while keeping nearly the exact same set of features: + +| Feature | Metrics | EphemeralMetrics | +| ----------------------------------------------------------------------------------------------------------- | ------- | ---------------- | +| **Share data across instances** (metrics, dimensions, metadata, etc.) | Yes | - | +| **[Default dimensions](#adding-default-dimensions) that persists across Lambda invocations** (metric flush) | Yes | - | + +!!! question "Why not changing the default `Metrics` behaviour to not share data across instances?" + +This is an intentional design to prevent accidental data deduplication or data loss issues due to [CloudWatch EMF](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Embedded_Metric_Format_Specification.html){target="_blank"} metric dimension constraint. + +In CloudWatch, there are two metric ingestion mechanisms: [EMF (async)](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Embedded_Metric_Format_Specification.html){target="_blank"} and [`PutMetricData` API (sync)](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudwatch.html#CloudWatch.Client.put_metric_data){target="_blank"}. + +The former creates metrics asynchronously via CloudWatch Logs, and the latter uses a synchronous and more flexible ingestion API. + +!!! important "Key concept" + CloudWatch [considers a metric unique](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/cloudwatch_concepts.html#Metric){target="_blank"} by a combination of metric **name**, metric **namespace**, and zero or more metric **dimensions**. + +With EMF, metric dimensions are shared with any metrics you define. With `PutMetricData` API, you can set a [list](https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_MetricDatum.html) defining one or more metrics with distinct dimensions. + +This is a subtle yet important distinction. Imagine you had the following metrics to emit: + +| Metric Name | Dimension | Intent | +| ---------------------- | ----------------------------------------- | ------------------ | +| **SuccessfulBooking** | service="booking", **tenant_id**="sample" | Application metric | +| **IntegrationLatency** | service="booking", function_name="sample" | Operational metric | +| **ColdStart** | service="booking", function_name="sample" | Operational metric | + +The `tenant_id` dimension could vary leading to two common issues: + +1. `ColdStart` metric will be created multiple times (N * number of unique tenant_id dimension value), despite the `function_name` being the same +2. `IntegrationLatency` metric will be also created multiple times due to `tenant_id` as well as `function_name` (may or not be intentional) + +These issues are exacerbated when you create **(A)** metric dimensions conditionally, **(B)** multiple metrics' instances throughout your code instead of reusing them (globals). Subsequent metrics' instances will have (or lack) different metric dimensions resulting in different metrics and data points with the same name. + +!!! note "Intentional design to address these scenarios" + +**On 1**, when you enable [capture_start_metric feature](#capturing-cold-start-metric), we transparently create and flush an additional EMF JSON Blob that is independent from your application metrics. This prevents data pollution. + +**On 2**, you can use `EphemeralMetrics` to create an additional EMF JSON Blob from your application metric (`SuccessfulBooking`). This ensures that `IntegrationLatency` operational metric data points aren't tied to any dynamic dimension values like `tenant_id`. + +That is why `Metrics` shares data across instances by default, as that covers 80% of use cases and different personas using Powertools. This allows them to instantiate `Metrics` in multiple places throughout their code - be a separate file, a middleware, or an abstraction that sets default dimensions. + ## Testing your code ### Environment variables diff --git a/examples/metrics/src/ephemeral_metrics.py b/examples/metrics/src/ephemeral_metrics.py new file mode 100644 index 00000000000..930404a563f --- /dev/null +++ b/examples/metrics/src/ephemeral_metrics.py @@ -0,0 +1,9 @@ +from aws_lambda_powertools.metrics import EphemeralMetrics, MetricUnit +from aws_lambda_powertools.utilities.typing import LambdaContext + +metrics = EphemeralMetrics() + + +@metrics.log_metrics +def lambda_handler(event: dict, context: LambdaContext): + metrics.add_metric(name="SuccessfulBooking", unit=MetricUnit.Count, value=1)