diff --git a/.gitignore b/.gitignore index 2a814459161..990f6517fe9 100644 --- a/.gitignore +++ b/.gitignore @@ -314,4 +314,4 @@ examples/**/sam/.aws-sam cdk.out # NOTE: different accounts will be used for E2E thus creating unnecessary git clutter -cdk.context.json +cdk.context.json \ No newline at end of file diff --git a/.markdownlint.yaml b/.markdownlint.yaml index 4d571206e07..4529480ad19 100644 --- a/.markdownlint.yaml +++ b/.markdownlint.yaml @@ -73,8 +73,6 @@ MD013: tables: false # Include headings headings: true - # Include headings - headers: true # Strict length checking strict: false # Stern length checking @@ -107,8 +105,6 @@ MD023: true # MD024/no-duplicate-heading/no-duplicate-header - Multiple headings with the same content MD024: - # Only check sibling headings - allow_different_nesting: false # Only check sibling headings siblings_only: false diff --git a/aws_lambda_powertools/logging/constants.py b/aws_lambda_powertools/logging/constants.py new file mode 100644 index 00000000000..1845828d79a --- /dev/null +++ b/aws_lambda_powertools/logging/constants.py @@ -0,0 +1,5 @@ +# logger.powertools_handler is set with Powertools Logger handler; useful when there are many handlers +LOGGER_ATTRIBUTE_POWERTOOLS_HANDLER = "powertools_handler" +# logger.init attribute is set when Logger has been configured +LOGGER_ATTRIBUTE_PRECONFIGURED = "init" +LOGGER_ATTRIBUTE_HANDLER = "logger_handler" diff --git a/aws_lambda_powertools/logging/exceptions.py b/aws_lambda_powertools/logging/exceptions.py index 65b30906edf..db2fb0f04ba 100644 --- a/aws_lambda_powertools/logging/exceptions.py +++ b/aws_lambda_powertools/logging/exceptions.py @@ -1,2 +1,6 @@ class InvalidLoggerSamplingRateError(Exception): pass + + +class OrphanedChildLoggerError(Exception): + pass diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index dc03e1af8eb..5eadfecc6fa 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -19,9 +19,15 @@ Optional, TypeVar, Union, + cast, overload, ) +from aws_lambda_powertools.logging.constants import ( + LOGGER_ATTRIBUTE_HANDLER, + LOGGER_ATTRIBUTE_POWERTOOLS_HANDLER, + LOGGER_ATTRIBUTE_PRECONFIGURED, +) from aws_lambda_powertools.shared import constants from aws_lambda_powertools.shared.functions import ( extract_event_from_common_models, @@ -31,7 +37,7 @@ from aws_lambda_powertools.utilities import jmespath_utils from ..shared.types import AnyCallableT -from .exceptions import InvalidLoggerSamplingRateError +from .exceptions import InvalidLoggerSamplingRateError, OrphanedChildLoggerError from .filters import SuppressFilter from .formatter import ( RESERVED_FORMATTER_CUSTOM_KEYS, @@ -233,7 +239,6 @@ def __init__( self.child = child self.logger_formatter = logger_formatter self._stream = stream or sys.stdout - self.logger_handler = logger_handler or logging.StreamHandler(self._stream) self.log_uncaught_exceptions = log_uncaught_exceptions self._is_deduplication_disabled = resolve_truthy_env_var_choice( @@ -241,6 +246,7 @@ def __init__( ) self._default_log_keys = {"service": self.service, "sampling_rate": self.sampling_rate} self._logger = self._get_logger() + self.logger_handler = logger_handler or self._get_handler() # NOTE: This is primarily to improve UX, so IDEs can autocomplete LambdaPowertoolsFormatter options # previously, we masked all of them as kwargs thus limiting feature discovery @@ -264,7 +270,7 @@ def __init__( # Prevent __getattr__ from shielding unknown attribute errors in type checkers # https://github.com/aws-powertools/powertools-lambda-python/issues/1660 - if not TYPE_CHECKING: + if not TYPE_CHECKING: # pragma: no cover def __getattr__(self, name): # Proxy attributes not found to actual logger to support backward compatibility @@ -279,6 +285,18 @@ def _get_logger(self) -> logging.Logger: return logging.getLogger(logger_name) + def _get_handler(self) -> logging.Handler: + # is a logger handler already configured? + if getattr(self, LOGGER_ATTRIBUTE_HANDLER, None): + return self.logger_handler + + # for children, use parent's handler + if self.child: + return getattr(self._logger.parent, LOGGER_ATTRIBUTE_POWERTOOLS_HANDLER, None) # type: ignore[return-value] # always checked in formatting + + # otherwise, create a new stream handler (first time init) + return logging.StreamHandler(self._stream) + def _init_logger( self, formatter_options: Optional[Dict] = None, @@ -292,7 +310,7 @@ def _init_logger( # a) multiple handlers being attached # b) different sampling mechanisms # c) multiple messages from being logged as handlers can be duplicated - is_logger_preconfigured = getattr(self._logger, "init", False) + is_logger_preconfigured = getattr(self._logger, LOGGER_ATTRIBUTE_PRECONFIGURED, False) if self.child or is_logger_preconfigured: return @@ -317,6 +335,7 @@ def _init_logger( # std logging will return the same Logger with our attribute if name is reused logger.debug(f"Marking logger {self.service} as preconfigured") self._logger.init = True # type: ignore[attr-defined] + self._logger.powertools_handler = self.logger_handler # type: ignore[attr-defined] def _configure_sampling(self) -> None: """Dynamically set log level based on sampling rate @@ -613,7 +632,7 @@ def structure_logs(self, append: bool = False, formatter_options: Optional[Dict] # Mode 1 log_keys = {**self._default_log_keys, **keys} - is_logger_preconfigured = getattr(self._logger, "init", False) + is_logger_preconfigured = getattr(self._logger, LOGGER_ATTRIBUTE_PRECONFIGURED, False) if not is_logger_preconfigured: formatter = self.logger_formatter or LambdaPowertoolsFormatter(**formatter_options, **log_keys) self.registered_handler.setFormatter(formatter) @@ -672,15 +691,20 @@ def removeFilter(self, filter: logging._FilterType) -> None: # noqa: A002 # fil @property def registered_handler(self) -> logging.Handler: """Convenience property to access the first logger handler""" - # We ignore mypy here because self.child encodes whether or not self._logger.parent is - # None, mypy can't see this from context but we can - handlers = self._logger.parent.handlers if self.child else self._logger.handlers # type: ignore[union-attr] - return handlers[0] + return self._get_handler() @property def registered_formatter(self) -> BasePowertoolsFormatter: """Convenience property to access the first logger formatter""" - return self.registered_handler.formatter # type: ignore[return-value] + handler = self.registered_handler + if handler is None: + raise OrphanedChildLoggerError( + "Orphan child loggers cannot append nor remove keys until a parent is initialized first. " + "To solve this issue, you can A) make sure a parent logger is initialized first, or B) move append/remove keys operations to a later stage." # noqa: E501 + "Reference: https://docs.powertools.aws.dev/lambda/python/latest/core/logger/#reusing-logger-across-your-code", + ) + + return cast(BasePowertoolsFormatter, handler.formatter) @property def log_level(self) -> int: diff --git a/aws_lambda_powertools/shared/constants.py b/aws_lambda_powertools/shared/constants.py index babf834e0b4..9652e09a0b2 100644 --- a/aws_lambda_powertools/shared/constants.py +++ b/aws_lambda_powertools/shared/constants.py @@ -9,6 +9,7 @@ INVALID_XRAY_NAME_CHARACTERS = r"[?;*()!$~^<>]" # Logger constants +# maintenance: future major version should start having localized `constants.py` to ease future modularization LOGGER_LOG_SAMPLING_RATE: str = "POWERTOOLS_LOGGER_SAMPLE_RATE" LOGGER_LOG_EVENT_ENV: str = "POWERTOOLS_LOGGER_LOG_EVENT" LOGGER_LOG_DEDUPLICATION_ENV: str = "POWERTOOLS_LOG_DEDUPLICATION_DISABLED" diff --git a/docs/core/logger.md b/docs/core/logger.md index bf600e285ca..758762b547c 100644 --- a/docs/core/logger.md +++ b/docs/core/logger.md @@ -475,9 +475,9 @@ You can use any of the following built-in JMESPath expressions as part of [injec ### Reusing Logger across your code -Similar to [Tracer](./tracer.md#reusing-tracer-across-your-code){target="_blank"}, a new instance that uses the same `service` name - env var or explicit parameter - will reuse a previous Logger instance. Just like `logging.getLogger("logger_name")` would in the standard library if called with the same logger name. +Similar to [Tracer](./tracer.md#reusing-tracer-across-your-code){target="_blank"}, a new instance that uses the same `service` name will reuse a previous Logger instance. -Notice in the CloudWatch Logs output how `payment_id` appeared as expected when logging in `collect.py`. +Notice in the CloudWatch Logs output how `payment_id` appears as expected when logging in `collect.py`. === "logger_reuse.py" @@ -497,17 +497,6 @@ Notice in the CloudWatch Logs output how `payment_id` appeared as expected when --8<-- "examples/logger/src/logger_reuse_output.json" ``` -???+ note "Note: About Child Loggers" - Coming from standard library, you might be used to use `logging.getLogger(__name__)`. This will create a new instance of a Logger with a different name. - - In Powertools, you can have the same effect by using `child=True` parameter: `Logger(child=True)`. This creates a new Logger instance named after `service.`. All state changes will be propagated bi-directionally between Child and Parent. - - For that reason, there could be side effects depending on the order the Child Logger is instantiated, because Child Loggers don't have a handler. - - For example, if you instantiated a Child Logger and immediately used `logger.append_keys/remove_keys/set_correlation_id` to update logging state, this might fail if the Parent Logger wasn't instantiated. - - In this scenario, you can either ensure any calls manipulating state are only called when a Parent Logger is instantiated (example above), or refrain from using `child=True` parameter altogether. - ### Sampling debug logs Use sampling when you want to dynamically change your log level to **DEBUG** based on a **percentage of your concurrent/cold start invocations**. @@ -582,7 +571,7 @@ You can use import and use them as any other Logger formatter via `logger_format ### Migrating from other Loggers -If you're migrating from other Loggers, there are few key points to be aware of: [Service parameter](#the-service-parameter), [Inheriting Loggers](#inheriting-loggers), [Overriding Log records](#overriding-log-records), and [Logging exceptions](#logging-exceptions). +If you're migrating from other Loggers, there are few key points to be aware of: [Service parameter](#the-service-parameter), [Child Loggers](#child-loggers), [Overriding Log records](#overriding-log-records), and [Logging exceptions](#logging-exceptions). #### The service parameter @@ -590,23 +579,29 @@ Service is what defines the Logger name, including what the Lambda function is r For Logger, the `service` is the logging key customers can use to search log operations for one or more functions - For example, **search for all errors, or messages like X, where service is payment**. -#### Inheriting Loggers +#### Child Loggers -??? tip "Tip: Prefer [Logger Reuse feature](#reusing-logger-across-your-code) over inheritance unless strictly necessary, [see caveats.](#reusing-logger-across-your-code)" - -> Python Logging hierarchy happens via the dot notation: `service`, `service.child`, `service.child_2` - -For inheritance, Logger uses a `child=True` parameter along with `service` being the same value across Loggers. +
+```mermaid +stateDiagram-v2 + direction LR + Parent: Logger() + Child: Logger(child=True) + Parent --> Child: bi-directional updates + Note right of Child + Both have the same service + end note +``` +
-For child Loggers, we introspect the name of your module where `Logger(child=True, service="name")` is called, and we name your Logger as **{service}.{filename}**. +For inheritance, Logger uses `child` parameter to ensure we don't compete with its parents config. We name child Loggers following Python's convention: _`{service}`.`{filename}`_. -???+ danger - A common issue when migrating from other Loggers is that `service` might be defined in the parent Logger (no child param), and not defined in the child Logger: +Changes are bidirectional between parents and loggers. That is, appending a key in a child or parent will ensure both have them. This means, having the same `service` name is important when instantiating them. -=== "logging_inheritance_bad.py" +=== "logging_inheritance_good.py" ```python hl_lines="1 9" - --8<-- "examples/logger/src/logging_inheritance_bad.py" + --8<-- "examples/logger/src/logging_inheritance_good.py" ``` === "logging_inheritance_module.py" @@ -615,17 +610,17 @@ For child Loggers, we introspect the name of your module where `Logger(child=Tru --8<-- "examples/logger/src/logging_inheritance_module.py" ``` -In this case, Logger will register a Logger named `payment`, and a Logger named `service_undefined`. The latter isn't inheriting from the parent, and will have no handler, resulting in no message being logged to standard output. - -???+ tip - This can be fixed by either ensuring both has the `service` value as `payment`, or simply use the environment variable `POWERTOOLS_SERVICE_NAME` to ensure service value will be the same across all Loggers when not explicitly set. +There are two important side effects when using child loggers: -Do this instead: +1. **Service name mismatch**. Logging messages will be dropped as child loggers don't have logging handlers. + * Solution: use `POWERTOOLS_SERVICE_NAME` env var. Alternatively, use the same service explicit value. +2. **Changing state before a parent instantiate**. Using `logger.append_keys` or `logger.remove_keys` without a parent Logger will lead to `OrphanedChildLoggerError` exception. + * Solution: always initialize parent Loggers first. Alternatively, move calls to `append_keys`/`remove_keys` from the child at a later stage. -=== "logging_inheritance_good.py" +=== "logging_inheritance_bad.py" ```python hl_lines="1 9" - --8<-- "examples/logger/src/logging_inheritance_good.py" + --8<-- "examples/logger/src/logging_inheritance_bad.py" ``` === "logging_inheritance_module.py" diff --git a/tests/functional/test_logger.py b/tests/functional/test_logger.py index 7aa4037cb9c..b59ce604d98 100644 --- a/tests/functional/test_logger.py +++ b/tests/functional/test_logger.py @@ -17,7 +17,7 @@ from aws_lambda_powertools import Logger, Tracer, set_package_logger_handler from aws_lambda_powertools.logging import correlation_paths -from aws_lambda_powertools.logging.exceptions import InvalidLoggerSamplingRateError +from aws_lambda_powertools.logging.exceptions import InvalidLoggerSamplingRateError, OrphanedChildLoggerError from aws_lambda_powertools.logging.formatter import ( BasePowertoolsFormatter, LambdaPowertoolsFormatter, @@ -1176,3 +1176,63 @@ def test_logger_json_unicode(stdout, service_name): assert log["message"] == non_ascii_chars assert log[japanese_field] == japanese_string + + +def test_logger_registered_handler_is_custom_handler(service_name): + # GIVEN a library or environment pre-setup a logger for us using the same name (see #4277) + class ForeignHandler(logging.StreamHandler): ... + + foreign_handler = ForeignHandler() + logging.getLogger(service_name).addHandler(foreign_handler) + + # WHEN Logger init with a custom handler + custom_handler = logging.StreamHandler() + logger = Logger(service=service_name, logger_handler=custom_handler) + + # THEN registered handler should always return what we provided + assert logger.registered_handler is not foreign_handler + assert logger.registered_handler is custom_handler + assert logger.logger_handler is custom_handler + assert logger.handlers == [foreign_handler, custom_handler] + + +def test_child_logger_registered_handler_is_custom_handler(service_name): + # GIVEN + class ForeignHandler(logging.StreamHandler): ... + + foreign_handler = ForeignHandler() + logging.getLogger(service_name).addHandler(foreign_handler) + + custom_handler = logging.StreamHandler() + custom_handler.name = "CUSTOM HANDLER" + parent = Logger(service=service_name, logger_handler=custom_handler) + + # WHEN a child Logger init + child = Logger(service=service_name, child=True) + + # THEN child registered handler should always return what we provided in the parent + assert child.registered_handler is not foreign_handler + assert child.registered_handler is custom_handler + assert child.registered_handler is parent.registered_handler + + +def test_logger_handler_is_created_despite_env_pre_setup(service_name): + # GIVEN a library or environment pre-setup a logger for us using the same name + environment_handler = logging.StreamHandler() + logging.getLogger(service_name).addHandler(environment_handler) + + # WHEN Logger init without a custom handler + logger = Logger(service=service_name) + + # THEN registered handler should be Powertools default handler, not env + assert logger.registered_handler is not environment_handler + + +def test_child_logger_append_keys_before_parent(stdout, service_name): + # GIVEN a child Logger is initialized before its/without parent + child = Logger(stream=stdout, service=service_name, child=True) + + # WHEN a child Logger appends a key + # THEN it will raise an AttributeError + with pytest.raises(OrphanedChildLoggerError): + child.append_keys(customer_id="value")